source: rtems/cpukit/libblock/src/bdbuf.c @ b5b07cad

4.104.11
Last change on this file since b5b07cad was b5b07cad, checked in by Thomas Doerfler <Thomas.Doerfler@…>, on Oct 29, 2009 at 12:50:01 PM
  • - Reorderd AVL node fields to save space
  • Fixed printf() formats. New structure for waiters synchronization. Added BDBUF_INVALID_DEV define. New error handling in rtems_bdbuf_init().
  • Release disk in case of an error.
  • Property mode set to 100644
File size: 83.3 KB
Line 
1/**
2 * @file
3 *
4 * @ingroup rtems_bdbuf
5 *
6 * Block device buffer management.
7 */
8
9/*
10 * Disk I/O buffering
11 * Buffer managment
12 *
13 * Copyright (C) 2001 OKTET Ltd., St.-Peterburg, Russia
14 * Author: Andrey G. Ivanov <Andrey.Ivanov@oktet.ru>
15 *         Victor V. Vengerov <vvv@oktet.ru>
16 *         Alexander Kukuta <kam@oktet.ru>
17 *
18 * Copyright (C) 2008,2009 Chris Johns <chrisj@rtems.org>
19 *    Rewritten to remove score mutex access. Fixes many performance
20 *    issues.
21 *
22 * @(#) bdbuf.c,v 1.14 2004/04/17 08:15:17 ralf Exp
23 */
24
25/**
26 * Set to 1 to enable debug tracing.
27 */
28#define RTEMS_BDBUF_TRACE 0
29
30#if HAVE_CONFIG_H
31#include "config.h"
32#endif
33#include <limits.h>
34#include <errno.h>
35#include <assert.h>
36#include <stdio.h>
37#include <string.h>
38#include <inttypes.h>
39
40#include <rtems.h>
41#include <rtems/error.h>
42#include <rtems/malloc.h>
43
44#include "rtems/bdbuf.h"
45
46#define BDBUF_INVALID_DEV ((dev_t) -1)
47
48/*
49 * Simpler label for this file.
50 */
51#define bdbuf_config rtems_bdbuf_configuration
52
53/**
54 * A swapout transfer transaction data. This data is passed to a worked thread
55 * to handle the write phase of the transfer.
56 */
57typedef struct rtems_bdbuf_swapout_transfer
58{
59  rtems_chain_control   bds;         /**< The transfer list of BDs. */
60  dev_t                 dev;         /**< The device the transfer is for. */
61  rtems_blkdev_request* write_req;   /**< The write request array. */
62  uint32_t              bufs_per_bd; /**< Number of buffers per bd. */
63} rtems_bdbuf_swapout_transfer;
64
65/**
66 * Swapout worker thread. These are available to take processing from the
67 * main swapout thread and handle the I/O operation.
68 */
69typedef struct rtems_bdbuf_swapout_worker
70{
71  rtems_chain_node             link;     /**< The threads sit on a chain when
72                                          * idle. */
73  rtems_id                     id;       /**< The id of the task so we can wake
74                                          * it. */
75  volatile bool                enabled;  /**< The worked is enabled. */
76  rtems_bdbuf_swapout_transfer transfer; /**< The transfer data for this
77                                          * thread. */
78} rtems_bdbuf_swapout_worker;
79
80/**
81 * Buffer waiters synchronization.
82 */
83typedef struct rtems_bdbuf_waiters {
84  volatile unsigned count;
85  rtems_id sema;
86} rtems_bdbuf_waiters;
87
88/**
89 * The BD buffer cache.
90 */
91typedef struct rtems_bdbuf_cache
92{
93  rtems_id            swapout;           /**< Swapout task ID */
94  volatile bool       swapout_enabled;   /**< Swapout is only running if
95                                          * enabled. Set to false to kill the
96                                          * swap out task. It deletes itself. */
97  rtems_chain_control swapout_workers;   /**< The work threads for the swapout
98                                          * task. */
99 
100  rtems_bdbuf_buffer* bds;               /**< Pointer to table of buffer
101                                          * descriptors. */
102  void*               buffers;           /**< The buffer's memory. */
103  size_t              buffer_min_count;  /**< Number of minimum size buffers
104                                          * that fit the buffer memory. */
105  size_t              max_bds_per_group; /**< The number of BDs of minimum
106                                          * buffer size that fit in a group. */
107  uint32_t            flags;             /**< Configuration flags. */
108
109  rtems_id            lock;              /**< The cache lock. It locks all
110                                          * cache data, BD and lists. */
111  rtems_id            sync_lock;         /**< Sync calls block writes. */
112  volatile bool       sync_active;       /**< True if a sync is active. */
113  volatile rtems_id   sync_requester;    /**< The sync requester. */
114  volatile dev_t      sync_device;       /**< The device to sync and
115                                          * BDBUF_INVALID_DEV not a device
116                                          * sync. */
117
118  rtems_bdbuf_buffer* tree;              /**< Buffer descriptor lookup AVL tree
119                                          * root. There is only one. */
120  rtems_chain_control ready;             /**< Free buffers list, read-ahead, or
121                                          * resized group buffers. */
122  rtems_chain_control lru;               /**< Least recently used list */
123  rtems_chain_control modified;          /**< Modified buffers list */
124  rtems_chain_control sync;              /**< Buffers to sync list */
125
126  rtems_bdbuf_waiters access_waiters;    /**< Wait for a buffer in ACCESS
127                                          * state. */
128  rtems_bdbuf_waiters transfer_waiters;  /**< Wait for a buffer in TRANSFER
129                                          * state. */
130  rtems_bdbuf_waiters buffer_waiters;    /**< Wait for a buffer and no one is
131                                          * available. */
132
133  size_t              group_count;       /**< The number of groups. */
134  rtems_bdbuf_group*  groups;            /**< The groups. */
135 
136  bool                initialised;       /**< Initialised state. */
137} rtems_bdbuf_cache;
138
139/**
140 * Fatal errors
141 */
142#define RTEMS_BLKDEV_FATAL_ERROR(n) \
143  (((uint32_t)'B' << 24) | ((uint32_t)(n) & (uint32_t)0x00FFFFFF))
144
145#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_1 RTEMS_BLKDEV_FATAL_ERROR(1)
146#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_2 RTEMS_BLKDEV_FATAL_ERROR(2)
147#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_3 RTEMS_BLKDEV_FATAL_ERROR(3)
148#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_4 RTEMS_BLKDEV_FATAL_ERROR(4)
149#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_5 RTEMS_BLKDEV_FATAL_ERROR(5)
150#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_6 RTEMS_BLKDEV_FATAL_ERROR(6)
151#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_7 RTEMS_BLKDEV_FATAL_ERROR(7)
152#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_8 RTEMS_BLKDEV_FATAL_ERROR(8)
153#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_9 RTEMS_BLKDEV_FATAL_ERROR(9)
154#define RTEMS_BLKDEV_FATAL_BDBUF_SWAPOUT       RTEMS_BLKDEV_FATAL_ERROR(10)
155#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK     RTEMS_BLKDEV_FATAL_ERROR(11)
156#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK   RTEMS_BLKDEV_FATAL_ERROR(12)
157#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_LOCK    RTEMS_BLKDEV_FATAL_ERROR(13)
158#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_UNLOCK  RTEMS_BLKDEV_FATAL_ERROR(14)
159#define RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_DIS   RTEMS_BLKDEV_FATAL_ERROR(15)
160#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_2  RTEMS_BLKDEV_FATAL_ERROR(16)
161#define RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_RST   RTEMS_BLKDEV_FATAL_ERROR(17)
162#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_TO RTEMS_BLKDEV_FATAL_ERROR(18)
163#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAKE    RTEMS_BLKDEV_FATAL_ERROR(19)
164#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE       RTEMS_BLKDEV_FATAL_ERROR(20)
165#define RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM      RTEMS_BLKDEV_FATAL_ERROR(21)
166#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_CREATE  RTEMS_BLKDEV_FATAL_ERROR(22)
167#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_START   RTEMS_BLKDEV_FATAL_ERROR(23)
168#define BLKDEV_FATAL_BDBUF_SWAPOUT_RE          RTEMS_BLKDEV_FATAL_ERROR(24)
169#define BLKDEV_FATAL_BDBUF_SWAPOUT_TS          RTEMS_BLKDEV_FATAL_ERROR(25)
170
171/**
172 * The events used in this code. These should be system events rather than
173 * application events.
174 */
175#define RTEMS_BDBUF_TRANSFER_SYNC  RTEMS_EVENT_1
176#define RTEMS_BDBUF_SWAPOUT_SYNC   RTEMS_EVENT_2
177
178/**
179 * The swap out task size. Should be more than enough for most drivers with
180 * tracing turned on.
181 */
182#define SWAPOUT_TASK_STACK_SIZE (8 * 1024)
183
184/**
185 * Lock semaphore attributes. This is used for locking type mutexes.
186 *
187 * @warning Priority inheritance is on.
188 */
189#define RTEMS_BDBUF_CACHE_LOCK_ATTRIBS \
190  (RTEMS_PRIORITY | RTEMS_BINARY_SEMAPHORE | \
191   RTEMS_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
192
193/**
194 * Waiter semaphore attributes.
195 *
196 * @warning Do not configure as inherit priority. If a driver is in the driver
197 *          initialisation table this locked semaphore will have the IDLE task
198 *          as the holder and a blocking task will raise the priority of the
199 *          IDLE task which can cause unsual side effects.
200 */
201#define RTEMS_BDBUF_CACHE_WAITER_ATTRIBS \
202  (RTEMS_PRIORITY | RTEMS_SIMPLE_BINARY_SEMAPHORE | \
203   RTEMS_NO_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
204
205/**
206 * Waiter timeout. Set to non-zero to find some info on a waiter that is
207 * waiting too long.
208 */
209#define RTEMS_BDBUF_WAIT_TIMEOUT RTEMS_NO_TIMEOUT
210#if !defined (RTEMS_BDBUF_WAIT_TIMEOUT)
211#define RTEMS_BDBUF_WAIT_TIMEOUT \
212  (TOD_MICROSECONDS_TO_TICKS (20000000))
213#endif
214
215/*
216 * The swap out task.
217 */
218static rtems_task rtems_bdbuf_swapout_task(rtems_task_argument arg);
219
220/**
221 * The Buffer Descriptor cache.
222 */
223static rtems_bdbuf_cache bdbuf_cache;
224
225#if RTEMS_BDBUF_TRACE
226/**
227 * If true output the trace message.
228 */
229bool rtems_bdbuf_tracer;
230
231/**
232 * Return the number of items on the list.
233 *
234 * @param list The chain control.
235 * @return uint32_t The number of items on the list.
236 */
237uint32_t
238rtems_bdbuf_list_count (rtems_chain_control* list)
239{
240  rtems_chain_node* node = rtems_chain_first (list);
241  uint32_t          count = 0;
242  while (!rtems_chain_is_tail (list, node))
243  {
244    count++;
245    node = rtems_chain_next (node);
246  }
247  return count;
248}
249
250/**
251 * Show the usage for the bdbuf cache.
252 */
253void
254rtems_bdbuf_show_usage (void)
255{
256  uint32_t group;
257  uint32_t total = 0;
258  uint32_t val;
259
260  for (group = 0; group < bdbuf_cache.group_count; group++)
261    total += bdbuf_cache.groups[group].users;
262  printf ("bdbuf:group users=%lu", total);
263  val = rtems_bdbuf_list_count (&bdbuf_cache.ready);
264  printf (", ready=%lu", val);
265  total = val;
266  val = rtems_bdbuf_list_count (&bdbuf_cache.lru);
267  printf (", lru=%lu", val);
268  total += val;
269  val = rtems_bdbuf_list_count (&bdbuf_cache.modified);
270  printf (", mod=%lu", val);
271  total += val;
272  val = rtems_bdbuf_list_count (&bdbuf_cache.sync);
273  printf (", sync=%lu", val);
274  total += val;
275  printf (", total=%lu\n", total);
276}
277
278/**
279 * Show the users for a group of a bd.
280 *
281 * @param where A label to show the context of output.
282 * @param bd The bd to show the users of.
283 */
284void
285rtems_bdbuf_show_users (const char* where, rtems_bdbuf_buffer* bd)
286{
287  const char* states[] =
288    { "EM", "RA", "CH", "AC", "MD", "AM", "SY", "TR" };
289
290  printf ("bdbuf:users: %15s: [%" PRIu32 " (%s)] %td:%td = %" PRIu32 " %s\n",
291          where,
292          bd->block, states[bd->state],
293          bd->group - bdbuf_cache.groups,
294          bd - bdbuf_cache.bds,
295          bd->group->users,
296          bd->group->users > 8 ? "<<<<<<<" : "");
297}
298#else
299#define rtems_bdbuf_tracer (0)
300#define rtems_bdbuf_show_usage()
301#define rtems_bdbuf_show_users(_w, _b)
302#endif
303
304/**
305 * The default maximum height of 32 allows for AVL trees having between
306 * 5,704,880 and 4,294,967,295 nodes, depending on order of insertion.  You may
307 * change this compile-time constant as you wish.
308 */
309#ifndef RTEMS_BDBUF_AVL_MAX_HEIGHT
310#define RTEMS_BDBUF_AVL_MAX_HEIGHT (32)
311#endif
312
313/**
314 * Searches for the node with specified dev/block.
315 *
316 * @param root pointer to the root node of the AVL-Tree
317 * @param dev device search key
318 * @param block block search key
319 * @retval NULL node with the specified dev/block is not found
320 * @return pointer to the node with specified dev/block
321 */
322static rtems_bdbuf_buffer *
323rtems_bdbuf_avl_search (rtems_bdbuf_buffer** root,
324                        dev_t                dev,
325                        rtems_blkdev_bnum    block)
326{
327  rtems_bdbuf_buffer* p = *root;
328
329  while ((p != NULL) && ((p->dev != dev) || (p->block != block)))
330  {
331    if ((p->dev < dev) || ((p->dev == dev) && (p->block < block)))
332    {
333      p = p->avl.right;
334    }
335    else
336    {
337      p = p->avl.left;
338    }
339  }
340
341  return p;
342}
343
344/**
345 * Inserts the specified node to the AVl-Tree.
346 *
347 * @param root pointer to the root node of the AVL-Tree
348 * @param node Pointer to the node to add.
349 * @retval 0 The node added successfully
350 * @retval -1 An error occured
351 */
352static int
353rtems_bdbuf_avl_insert(rtems_bdbuf_buffer** root,
354                       rtems_bdbuf_buffer*  node)
355{
356  dev_t             dev = node->dev;
357  rtems_blkdev_bnum block = node->block;
358
359  rtems_bdbuf_buffer*  p = *root;
360  rtems_bdbuf_buffer*  q;
361  rtems_bdbuf_buffer*  p1;
362  rtems_bdbuf_buffer*  p2;
363  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
364  rtems_bdbuf_buffer** buf_prev = buf_stack;
365
366  bool modified = false;
367
368  if (p == NULL)
369  {
370    *root = node;
371    node->avl.left = NULL;
372    node->avl.right = NULL;
373    node->avl.bal = 0;
374    return 0;
375  }
376
377  while (p != NULL)
378  {
379    *buf_prev++ = p;
380
381    if ((p->dev < dev) || ((p->dev == dev) && (p->block < block)))
382    {
383      p->avl.cache = 1;
384      q = p->avl.right;
385      if (q == NULL)
386      {
387        q = node;
388        p->avl.right = q = node;
389        break;
390      }
391    }
392    else if ((p->dev != dev) || (p->block != block))
393    {
394      p->avl.cache = -1;
395      q = p->avl.left;
396      if (q == NULL)
397      {
398        q = node;
399        p->avl.left = q;
400        break;
401      }
402    }
403    else
404    {
405      return -1;
406    }
407
408    p = q;
409  }
410 
411  q->avl.left = q->avl.right = NULL;
412  q->avl.bal = 0;
413  modified = true;
414  buf_prev--;
415
416  while (modified)
417  {
418    if (p->avl.cache == -1)
419    {
420      switch (p->avl.bal)
421      {
422        case 1:
423          p->avl.bal = 0;
424          modified = false;
425          break;
426
427        case 0:
428          p->avl.bal = -1;
429          break;
430
431        case -1:
432          p1 = p->avl.left;
433          if (p1->avl.bal == -1) /* simple LL-turn */
434          {
435            p->avl.left = p1->avl.right;
436            p1->avl.right = p;
437            p->avl.bal = 0;
438            p = p1;
439          }
440          else /* double LR-turn */
441          {
442            p2 = p1->avl.right;
443            p1->avl.right = p2->avl.left;
444            p2->avl.left = p1;
445            p->avl.left = p2->avl.right;
446            p2->avl.right = p;
447            if (p2->avl.bal == -1) p->avl.bal = +1; else p->avl.bal = 0;
448            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
449            p = p2;
450          }
451          p->avl.bal = 0;
452          modified = false;
453          break;
454
455        default:
456          break;
457      }
458    }
459    else
460    {
461      switch (p->avl.bal)
462      {
463        case -1:
464          p->avl.bal = 0;
465          modified = false;
466          break;
467
468        case 0:
469          p->avl.bal = 1;
470          break;
471
472        case 1:
473          p1 = p->avl.right;
474          if (p1->avl.bal == 1) /* simple RR-turn */
475          {
476            p->avl.right = p1->avl.left;
477            p1->avl.left = p;
478            p->avl.bal = 0;
479            p = p1;
480          }
481          else /* double RL-turn */
482          {
483            p2 = p1->avl.left;
484            p1->avl.left = p2->avl.right;
485            p2->avl.right = p1;
486            p->avl.right = p2->avl.left;
487            p2->avl.left = p;
488            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
489            if (p2->avl.bal == -1) p1->avl.bal = +1; else p1->avl.bal = 0;
490            p = p2;
491          }
492          p->avl.bal = 0;
493          modified = false;
494          break;
495
496        default:
497          break;
498      }
499    }
500    q = p;
501    if (buf_prev > buf_stack)
502    {
503      p = *--buf_prev;
504
505      if (p->avl.cache == -1)
506      {
507        p->avl.left = q;
508      }
509      else
510      {
511        p->avl.right = q;
512      }
513    }
514    else
515    {
516      *root = p;
517      break;
518    }
519  };
520
521  return 0;
522}
523
524
525/**
526 * Removes the node from the tree.
527 *
528 * @param root Pointer to pointer to the root node
529 * @param node Pointer to the node to remove
530 * @retval 0 Item removed
531 * @retval -1 No such item found
532 */
533static int
534rtems_bdbuf_avl_remove(rtems_bdbuf_buffer**      root,
535                       const rtems_bdbuf_buffer* node)
536{
537  dev_t             dev = node->dev;
538  rtems_blkdev_bnum block = node->block;
539
540  rtems_bdbuf_buffer*  p = *root;
541  rtems_bdbuf_buffer*  q;
542  rtems_bdbuf_buffer*  r;
543  rtems_bdbuf_buffer*  s;
544  rtems_bdbuf_buffer*  p1;
545  rtems_bdbuf_buffer*  p2;
546  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
547  rtems_bdbuf_buffer** buf_prev = buf_stack;
548
549  bool modified = false;
550
551  memset (buf_stack, 0, sizeof(buf_stack));
552
553  while (p != NULL)
554  {
555    *buf_prev++ = p;
556
557    if ((p->dev < dev) || ((p->dev == dev) && (p->block < block)))
558    {
559      p->avl.cache = 1;
560      p = p->avl.right;
561    }
562    else if ((p->dev != dev) || (p->block != block))
563    {
564      p->avl.cache = -1;
565      p = p->avl.left;
566    }
567    else
568    {
569      /* node found */
570      break;
571    }
572  }
573
574  if (p == NULL)
575  {
576    /* there is no such node */
577    return -1;
578  }
579
580  q = p;
581
582  buf_prev--;
583  if (buf_prev > buf_stack)
584  {
585    p = *(buf_prev - 1);
586  }
587  else
588  {
589    p = NULL;
590  }
591
592  /* at this moment q - is a node to delete, p is q's parent */
593  if (q->avl.right == NULL)
594  {
595    r = q->avl.left;
596    if (r != NULL)
597    {
598      r->avl.bal = 0;
599    }
600    q = r;
601  }
602  else
603  {
604    rtems_bdbuf_buffer **t;
605
606    r = q->avl.right;
607
608    if (r->avl.left == NULL)
609    {
610      r->avl.left = q->avl.left;
611      r->avl.bal = q->avl.bal;
612      r->avl.cache = 1;
613      *buf_prev++ = q = r;
614    }
615    else
616    {
617      t = buf_prev++;
618      s = r;
619
620      while (s->avl.left != NULL)
621      {
622        *buf_prev++ = r = s;
623        s = r->avl.left;
624        r->avl.cache = -1;
625      }
626
627      s->avl.left = q->avl.left;
628      r->avl.left = s->avl.right;
629      s->avl.right = q->avl.right;
630      s->avl.bal = q->avl.bal;
631      s->avl.cache = 1;
632
633      *t = q = s;
634    }
635  }
636
637  if (p != NULL)
638  {
639    if (p->avl.cache == -1)
640    {
641      p->avl.left = q;
642    }
643    else
644    {
645      p->avl.right = q;
646    }
647  }
648  else
649  {
650    *root = q;
651  }
652
653  modified = true;
654
655  while (modified)
656  {
657    if (buf_prev > buf_stack)
658    {
659      p = *--buf_prev;
660    }
661    else
662    {
663      break;
664    }
665
666    if (p->avl.cache == -1)
667    {
668      /* rebalance left branch */
669      switch (p->avl.bal)
670      {
671        case -1:
672          p->avl.bal = 0;
673          break;
674        case  0:
675          p->avl.bal = 1;
676          modified = false;
677          break;
678
679        case +1:
680          p1 = p->avl.right;
681
682          if (p1->avl.bal >= 0) /* simple RR-turn */
683          {
684            p->avl.right = p1->avl.left;
685            p1->avl.left = p;
686
687            if (p1->avl.bal == 0)
688            {
689              p1->avl.bal = -1;
690              modified = false;
691            }
692            else
693            {
694              p->avl.bal = 0;
695              p1->avl.bal = 0;
696            }
697            p = p1;
698          }
699          else /* double RL-turn */
700          {
701            p2 = p1->avl.left;
702
703            p1->avl.left = p2->avl.right;
704            p2->avl.right = p1;
705            p->avl.right = p2->avl.left;
706            p2->avl.left = p;
707
708            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
709            if (p2->avl.bal == -1) p1->avl.bal = 1; else p1->avl.bal = 0;
710
711            p = p2;
712            p2->avl.bal = 0;
713          }
714          break;
715
716        default:
717          break;
718      }
719    }
720    else
721    {
722      /* rebalance right branch */
723      switch (p->avl.bal)
724      {
725        case +1:
726          p->avl.bal = 0;
727          break;
728
729        case  0:
730          p->avl.bal = -1;
731          modified = false;
732          break;
733
734        case -1:
735          p1 = p->avl.left;
736
737          if (p1->avl.bal <= 0) /* simple LL-turn */
738          {
739            p->avl.left = p1->avl.right;
740            p1->avl.right = p;
741            if (p1->avl.bal == 0)
742            {
743              p1->avl.bal = 1;
744              modified = false;
745            }
746            else
747            {
748              p->avl.bal = 0;
749              p1->avl.bal = 0;
750            }
751            p = p1;
752          }
753          else /* double LR-turn */
754          {
755            p2 = p1->avl.right;
756
757            p1->avl.right = p2->avl.left;
758            p2->avl.left = p1;
759            p->avl.left = p2->avl.right;
760            p2->avl.right = p;
761
762            if (p2->avl.bal == -1) p->avl.bal = 1; else p->avl.bal = 0;
763            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
764
765            p = p2;
766            p2->avl.bal = 0;
767          }
768          break;
769
770        default:
771          break;
772      }
773    }
774
775    if (buf_prev > buf_stack)
776    {
777      q = *(buf_prev - 1);
778
779      if (q->avl.cache == -1)
780      {
781        q->avl.left = p;
782      }
783      else
784      {
785        q->avl.right = p;
786      }
787    }
788    else
789    {
790      *root = p;
791      break;
792    }
793
794  }
795
796  return 0;
797}
798
799/**
800 * Change the block number for the block size to the block number for the media
801 * block size. We have to use 64bit maths. There is no short cut here.
802 *
803 * @param block The logical block number in the block size terms.
804 * @param block_size The block size.
805 * @param media_block_size The block size of the media.
806 * @return rtems_blkdev_bnum The media block number.
807 */
808static rtems_blkdev_bnum
809rtems_bdbuf_media_block (rtems_blkdev_bnum block,
810                         size_t            block_size,
811                         size_t            media_block_size)
812{
813  return (((uint64_t) block) * block_size) / media_block_size;
814}
815
816/**
817 * Lock the mutex. A single task can nest calls.
818 *
819 * @param lock The mutex to lock.
820 * @param fatal_error_code The error code if the call fails.
821 */
822static void
823rtems_bdbuf_lock (rtems_id lock, uint32_t fatal_error_code)
824{
825  rtems_status_code sc = rtems_semaphore_obtain (lock,
826                                                 RTEMS_WAIT,
827                                                 RTEMS_NO_TIMEOUT);
828  if (sc != RTEMS_SUCCESSFUL)
829    rtems_fatal_error_occurred (fatal_error_code);
830}
831
832/**
833 * Unlock the mutex.
834 *
835 * @param lock The mutex to unlock.
836 * @param fatal_error_code The error code if the call fails.
837 */
838static void
839rtems_bdbuf_unlock (rtems_id lock, uint32_t fatal_error_code)
840{
841  rtems_status_code sc = rtems_semaphore_release (lock);
842  if (sc != RTEMS_SUCCESSFUL)
843    rtems_fatal_error_occurred (fatal_error_code);
844}
845
846/**
847 * Lock the cache. A single task can nest calls.
848 */
849static void
850rtems_bdbuf_lock_cache (void)
851{
852  rtems_bdbuf_lock (bdbuf_cache.lock, RTEMS_BLKDEV_FATAL_BDBUF_CACHE_LOCK);
853}
854
855/**
856 * Unlock the cache.
857 */
858static void
859rtems_bdbuf_unlock_cache (void)
860{
861  rtems_bdbuf_unlock (bdbuf_cache.lock, RTEMS_BLKDEV_FATAL_BDBUF_CACHE_UNLOCK);
862}
863
864/**
865 * Lock the cache's sync. A single task can nest calls.
866 */
867static void
868rtems_bdbuf_lock_sync (void)
869{
870  rtems_bdbuf_lock (bdbuf_cache.sync_lock, RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK);
871}
872
873/**
874 * Unlock the cache's sync lock. Any blocked writers are woken.
875 */
876static void
877rtems_bdbuf_unlock_sync (void)
878{
879  rtems_bdbuf_unlock (bdbuf_cache.sync_lock,
880                      RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK);
881}
882
883static rtems_mode
884rtems_bdbuf_disable_preemption (void)
885{
886  rtems_status_code sc = RTEMS_SUCCESSFUL;
887  rtems_mode prev_mode = 0;
888
889  sc = rtems_task_mode (RTEMS_NO_PREEMPT, RTEMS_PREEMPT_MASK, &prev_mode);
890  if (sc != RTEMS_SUCCESSFUL)
891    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_DIS);
892
893  return prev_mode;
894}
895
896static void
897rtems_bdbuf_restore_preemption (rtems_mode prev_mode)
898{
899  rtems_status_code sc = RTEMS_SUCCESSFUL;
900
901  sc = rtems_task_mode (prev_mode, RTEMS_ALL_MODE_MASKS, &prev_mode);
902  if (sc != RTEMS_SUCCESSFUL)
903    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_RST);
904}
905
906/**
907 * Wait until woken. Semaphores are used so a number of tasks can wait and can
908 * be woken at once. Task events would require we maintain a list of tasks to
909 * be woken and this would require storgage and we do not know the number of
910 * tasks that could be waiting.
911 *
912 * While we have the cache locked we can try and claim the semaphore and
913 * therefore know when we release the lock to the cache we will block until the
914 * semaphore is released. This may even happen before we get to block.
915 *
916 * A counter is used to save the release call when no one is waiting.
917 *
918 * The function assumes the cache is locked on entry and it will be locked on
919 * exit.
920 *
921 * @param sema The semaphore to block on and wait.
922 * @param waiters The wait counter for this semaphore.
923 */
924static void
925rtems_bdbuf_wait (rtems_bdbuf_waiters* waiters)
926{
927  rtems_status_code sc;
928  rtems_mode        prev_mode;
929 
930  /*
931   * Indicate we are waiting.
932   */
933  ++waiters->count;
934
935  /*
936   * Disable preemption then unlock the cache and block.  There is no POSIX
937   * condition variable in the core API so this is a work around.
938   *
939   * The issue is a task could preempt after the cache is unlocked because it is
940   * blocking or just hits that window, and before this task has blocked on the
941   * semaphore. If the preempting task flushes the queue this task will not see
942   * the flush and may block for ever or until another transaction flushes this
943   * semaphore.
944   */
945  prev_mode = rtems_bdbuf_disable_preemption ();
946 
947  /*
948   * Unlock the cache, wait, and lock the cache when we return.
949   */
950  rtems_bdbuf_unlock_cache ();
951
952  sc = rtems_semaphore_obtain (waiters->sema, RTEMS_WAIT, RTEMS_BDBUF_WAIT_TIMEOUT);
953
954  if (sc == RTEMS_TIMEOUT)
955    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_TO);
956 
957  if (sc != RTEMS_UNSATISFIED)
958    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_2);
959 
960  rtems_bdbuf_lock_cache ();
961
962  rtems_bdbuf_restore_preemption (prev_mode);
963 
964  --waiters->count;
965}
966
967/**
968 * Wake a blocked resource. The resource has a counter that lets us know if
969 * there are any waiters.
970 */
971static void
972rtems_bdbuf_wake (const rtems_bdbuf_waiters *waiters)
973{
974  rtems_status_code sc = RTEMS_SUCCESSFUL;
975
976  if (waiters->count > 0)
977  {
978    sc = rtems_semaphore_flush (waiters->sema);
979    if (sc != RTEMS_SUCCESSFUL)
980      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAKE);
981  }
982}
983
984/**
985 * Add a buffer descriptor to the modified list. This modified list is treated
986 * a litte differently to the other lists. To access it you must have the cache
987 * locked and this is assumed to be the case on entry to this call.
988 *
989 * If the cache has a device being sync'ed and the bd is for that device the
990 * call must block and wait until the sync is over before adding the bd to the
991 * modified list. Once a sync happens for a device no bd's can be added the
992 * modified list. The disk image is forced to be snapshot at that moment in
993 * time.
994 *
995 * @note Do not lower the group user count as the modified list is a user of
996 * the buffer.
997 *
998 * @param bd The bd to queue to the cache's modified list.
999 */
1000static void
1001rtems_bdbuf_append_modified (rtems_bdbuf_buffer* bd)
1002{
1003  /*
1004   * If the cache has a device being sync'ed check if this bd is for that
1005   * device. If it is unlock the cache and block on the sync lock. Once we have
1006   * the sync lock release it.
1007   */
1008  if (bdbuf_cache.sync_active && (bdbuf_cache.sync_device == bd->dev))
1009  {
1010    rtems_bdbuf_unlock_cache ();
1011    /* Wait for the sync lock */
1012    rtems_bdbuf_lock_sync ();
1013    rtems_bdbuf_unlock_sync ();
1014    rtems_bdbuf_lock_cache ();
1015  }
1016     
1017  bd->state = RTEMS_BDBUF_STATE_MODIFIED;
1018
1019  rtems_chain_append (&bdbuf_cache.modified, &bd->link);
1020}
1021
1022/**
1023 * Wait the swapper task.
1024 */
1025static void
1026rtems_bdbuf_wake_swapper (void)
1027{
1028  rtems_status_code sc = rtems_event_send (bdbuf_cache.swapout,
1029                                           RTEMS_BDBUF_SWAPOUT_SYNC);
1030  if (sc != RTEMS_SUCCESSFUL)
1031    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE);
1032}
1033
1034/**
1035 * Compute the number of BDs per group for a given buffer size.
1036 *
1037 * @param size The buffer size. It can be any size and we scale up.
1038 */
1039static size_t
1040rtems_bdbuf_bds_per_group (size_t size)
1041{
1042  size_t bufs_per_size;
1043  size_t bds_per_size;
1044 
1045  if (size > rtems_bdbuf_configuration.buffer_max)
1046    return 0;
1047 
1048  bufs_per_size = ((size - 1) / bdbuf_config.buffer_min) + 1;
1049 
1050  for (bds_per_size = 1;
1051       bds_per_size < bufs_per_size;
1052       bds_per_size <<= 1)
1053    ;
1054
1055  return bdbuf_cache.max_bds_per_group / bds_per_size;
1056}
1057
1058/**
1059 * Reallocate a group. The BDs currently allocated in the group are removed
1060 * from the ALV tree and any lists then the new BD's are prepended to the ready
1061 * list of the cache.
1062 *
1063 * @param group The group to reallocate.
1064 * @param new_bds_per_group The new count of BDs per group.
1065 */
1066static void
1067rtems_bdbuf_group_realloc (rtems_bdbuf_group* group, size_t new_bds_per_group)
1068{
1069  rtems_bdbuf_buffer* bd;
1070  size_t              b;
1071  size_t              bufs_per_bd;
1072
1073  if (rtems_bdbuf_tracer)
1074    printf ("bdbuf:realloc: %tu: %zd -> %zd\n",
1075            group - bdbuf_cache.groups, group->bds_per_group,
1076            new_bds_per_group);
1077 
1078  bufs_per_bd = bdbuf_cache.max_bds_per_group / group->bds_per_group;
1079 
1080  for (b = 0, bd = group->bdbuf;
1081       b < group->bds_per_group;
1082       b++, bd += bufs_per_bd)
1083  {
1084    switch (bd->state)
1085    {
1086      case RTEMS_BDBUF_STATE_EMPTY:
1087        break;
1088      case RTEMS_BDBUF_STATE_CACHED:
1089      case RTEMS_BDBUF_STATE_READ_AHEAD:
1090        if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
1091          rtems_fatal_error_occurred ((bd->state << 16) |
1092                                      RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_1);
1093        break;
1094      default:
1095        rtems_fatal_error_occurred ((bd->state << 16) |
1096                                    RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_8);
1097    }
1098   
1099    rtems_chain_extract (&bd->link);
1100  }
1101 
1102  group->bds_per_group = new_bds_per_group;
1103  bufs_per_bd = bdbuf_cache.max_bds_per_group / new_bds_per_group;
1104 
1105  for (b = 0, bd = group->bdbuf;
1106       b < group->bds_per_group;
1107       b++, bd += bufs_per_bd)
1108  {
1109    bd->state = RTEMS_BDBUF_STATE_EMPTY;
1110    rtems_chain_prepend (&bdbuf_cache.ready, &bd->link);
1111  }
1112}
1113
1114/**
1115 * Get the next BD from the list. This call assumes the cache is locked.
1116 *
1117 * @param bds_per_group The number of BDs per block we are need.
1118 * @param list The list to find the BD on.
1119 * @return The next BD if found or NULL is none are available.
1120 */
1121static rtems_bdbuf_buffer*
1122rtems_bdbuf_get_next_bd (size_t               bds_per_group,
1123                         rtems_chain_control* list)
1124{
1125  rtems_chain_node* node = rtems_chain_first (list);
1126  while (!rtems_chain_is_tail (list, node))
1127  {
1128    rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
1129
1130    if (rtems_bdbuf_tracer)
1131      printf ("bdbuf:next-bd: %tu (%td:%" PRId32 ") %zd -> %zd\n",
1132              bd - bdbuf_cache.bds,
1133              bd->group - bdbuf_cache.groups, bd->group->users,
1134              bd->group->bds_per_group, bds_per_group);
1135
1136    /*
1137     * If this bd is already part of a group that supports the same number of
1138     * BDs per group return it. If the bd is part of another group check the
1139     * number of users and if 0 we can take this group and resize it.
1140     */
1141    if (bd->group->bds_per_group == bds_per_group)
1142    {
1143      rtems_chain_extract (node);
1144      return bd;
1145    }
1146
1147    if (bd->group->users == 0)
1148    {
1149      /*
1150       * We use the group to locate the start of the BDs for this group.
1151       */
1152      rtems_bdbuf_group_realloc (bd->group, bds_per_group);
1153      bd = (rtems_bdbuf_buffer*) rtems_chain_get (&bdbuf_cache.ready);
1154      return bd;
1155    }
1156
1157    node = rtems_chain_next (node);
1158  }
1159 
1160  return NULL;
1161}
1162
1163/**
1164 * Initialise the cache.
1165 *
1166 * @return rtems_status_code The initialisation status.
1167 */
1168rtems_status_code
1169rtems_bdbuf_init (void)
1170{
1171  rtems_bdbuf_group*  group;
1172  rtems_bdbuf_buffer* bd;
1173  uint8_t*            buffer;
1174  size_t              b;
1175  size_t              cache_aligment;
1176  rtems_status_code   sc;
1177  rtems_mode          prev_mode;
1178
1179  if (rtems_bdbuf_tracer)
1180    printf ("bdbuf:init\n");
1181
1182  if (rtems_interrupt_is_in_progress())
1183    return RTEMS_CALLED_FROM_ISR;
1184
1185  /*
1186   * Check the configuration table values.
1187   */
1188  if ((bdbuf_config.buffer_max % bdbuf_config.buffer_min) != 0)
1189    return RTEMS_INVALID_NUMBER;
1190 
1191  /*
1192   * We use a special variable to manage the initialisation incase we have
1193   * completing threads doing this. You may get errors if the another thread
1194   * makes a call and we have not finished initialisation.
1195   */
1196  prev_mode = rtems_bdbuf_disable_preemption ();
1197  if (bdbuf_cache.initialised)
1198  {
1199    rtems_bdbuf_restore_preemption (prev_mode);
1200
1201    return RTEMS_RESOURCE_IN_USE;
1202  }
1203  memset(&bdbuf_cache, 0, sizeof(bdbuf_cache));
1204  bdbuf_cache.initialised = true;
1205  rtems_bdbuf_restore_preemption (prev_mode);
1206 
1207  /*
1208   * For unspecified cache alignments we use the CPU alignment.
1209   */
1210  cache_aligment = 32; /* FIXME rtems_cache_get_data_line_size() */
1211  if (cache_aligment <= 0)
1212    cache_aligment = CPU_ALIGNMENT;
1213
1214  bdbuf_cache.sync_device = BDBUF_INVALID_DEV;
1215
1216  rtems_chain_initialize_empty (&bdbuf_cache.swapout_workers);
1217  rtems_chain_initialize_empty (&bdbuf_cache.ready);
1218  rtems_chain_initialize_empty (&bdbuf_cache.lru);
1219  rtems_chain_initialize_empty (&bdbuf_cache.modified);
1220  rtems_chain_initialize_empty (&bdbuf_cache.sync);
1221
1222  /*
1223   * Create the locks for the cache.
1224   */
1225  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'l'),
1226                               1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0,
1227                               &bdbuf_cache.lock);
1228  if (sc != RTEMS_SUCCESSFUL)
1229    goto error;
1230
1231  rtems_bdbuf_lock_cache ();
1232 
1233  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 's'),
1234                               1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0,
1235                               &bdbuf_cache.sync_lock);
1236  if (sc != RTEMS_SUCCESSFUL)
1237    goto error;
1238 
1239  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'a'),
1240                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1241                               &bdbuf_cache.access_waiters.sema);
1242  if (sc != RTEMS_SUCCESSFUL)
1243    goto error;
1244
1245  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 't'),
1246                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1247                               &bdbuf_cache.transfer_waiters.sema);
1248  if (sc != RTEMS_SUCCESSFUL)
1249    goto error;
1250
1251  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'w'),
1252                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1253                               &bdbuf_cache.buffer_waiters.sema);
1254  if (sc != RTEMS_SUCCESSFUL)
1255    goto error;
1256 
1257  /*
1258   * Compute the various number of elements in the cache.
1259   */
1260  bdbuf_cache.buffer_min_count =
1261    bdbuf_config.size / bdbuf_config.buffer_min;
1262  bdbuf_cache.max_bds_per_group =
1263    bdbuf_config.buffer_max / bdbuf_config.buffer_min;
1264  bdbuf_cache.group_count =
1265    bdbuf_cache.buffer_min_count / bdbuf_cache.max_bds_per_group;
1266
1267  /*
1268   * Allocate the memory for the buffer descriptors.
1269   */
1270  bdbuf_cache.bds = calloc (sizeof (rtems_bdbuf_buffer),
1271                            bdbuf_cache.buffer_min_count);
1272  if (!bdbuf_cache.bds)
1273    goto error;
1274
1275  /*
1276   * Allocate the memory for the buffer descriptors.
1277   */
1278  bdbuf_cache.groups = calloc (sizeof (rtems_bdbuf_group),
1279                               bdbuf_cache.group_count);
1280  if (!bdbuf_cache.groups)
1281    goto error;
1282 
1283  /*
1284   * Allocate memory for buffer memory. The buffer memory will be cache
1285   * aligned. It is possible to free the memory allocated by rtems_memalign()
1286   * with free(). Return 0 if allocated.
1287   *
1288   * The memory allocate allows a
1289   */
1290  if (rtems_memalign ((void **) &bdbuf_cache.buffers,
1291                      cache_aligment,
1292                      bdbuf_cache.buffer_min_count * bdbuf_config.buffer_min) != 0)
1293    goto error;
1294
1295  /*
1296   * The cache is empty after opening so we need to add all the buffers to it
1297   * and initialise the groups.
1298   */
1299  for (b = 0, group = bdbuf_cache.groups,
1300         bd = bdbuf_cache.bds, buffer = bdbuf_cache.buffers;
1301       b < bdbuf_cache.buffer_min_count;
1302       b++, bd++, buffer += bdbuf_config.buffer_min)
1303  {
1304    bd->dev        = BDBUF_INVALID_DEV;
1305    bd->group      = group;
1306    bd->buffer     = buffer;
1307    bd->avl.left   = NULL;
1308    bd->avl.right  = NULL;
1309    bd->state      = RTEMS_BDBUF_STATE_EMPTY;
1310    bd->error      = 0;
1311    bd->waiters    = 0;
1312    bd->hold_timer = 0;
1313    bd->references = 0;
1314    bd->user       = NULL;
1315   
1316    rtems_chain_append (&bdbuf_cache.ready, &bd->link);
1317
1318    if ((b % bdbuf_cache.max_bds_per_group) ==
1319        (bdbuf_cache.max_bds_per_group - 1))
1320      group++;
1321  }
1322
1323  for (b = 0,
1324         group = bdbuf_cache.groups,
1325         bd = bdbuf_cache.bds;
1326       b < bdbuf_cache.group_count;
1327       b++,
1328         group++,
1329         bd += bdbuf_cache.max_bds_per_group)
1330  {
1331    group->bds_per_group = bdbuf_cache.max_bds_per_group;
1332    group->users = 0;
1333    group->bdbuf = bd;
1334  }
1335         
1336  /*
1337   * Create and start swapout task. This task will create and manage the worker
1338   * threads.
1339   */
1340  bdbuf_cache.swapout_enabled = true;
1341 
1342  sc = rtems_task_create (rtems_build_name('B', 'S', 'W', 'P'),
1343                          (bdbuf_config.swapout_priority ?
1344                           bdbuf_config.swapout_priority :
1345                           RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT),
1346                          SWAPOUT_TASK_STACK_SIZE,
1347                          RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
1348                          RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT,
1349                          &bdbuf_cache.swapout);
1350  if (sc != RTEMS_SUCCESSFUL)
1351    goto error;
1352
1353  sc = rtems_task_start (bdbuf_cache.swapout,
1354                         rtems_bdbuf_swapout_task,
1355                         (rtems_task_argument) &bdbuf_cache);
1356  if (sc != RTEMS_SUCCESSFUL)
1357    goto error;
1358
1359  rtems_bdbuf_unlock_cache ();
1360
1361  return RTEMS_SUCCESSFUL;
1362
1363error:
1364
1365  if (bdbuf_cache.swapout != 0)
1366    rtems_task_delete (bdbuf_cache.swapout);
1367
1368  free (bdbuf_cache.buffers);
1369  free (bdbuf_cache.groups);
1370  free (bdbuf_cache.bds);
1371
1372  rtems_semaphore_delete (bdbuf_cache.buffer_waiters.sema);
1373  rtems_semaphore_delete (bdbuf_cache.access_waiters.sema);
1374  rtems_semaphore_delete (bdbuf_cache.transfer_waiters.sema);
1375  rtems_semaphore_delete (bdbuf_cache.sync_lock);
1376
1377  if (bdbuf_cache.lock != 0)
1378  {
1379    rtems_bdbuf_unlock_cache ();
1380    rtems_semaphore_delete (bdbuf_cache.lock);
1381  }
1382
1383  bdbuf_cache.initialised = false;
1384
1385  return RTEMS_UNSATISFIED;
1386}
1387
1388/**
1389 * Get a buffer for this device and block. This function returns a buffer once
1390 * placed into the AVL tree. If no buffer is available and it is not a read
1391 * ahead request and no buffers are waiting to the written to disk wait until a
1392 * buffer is available. If buffers are waiting to be written to disk and none
1393 * are available expire the hold timer's of the queued buffers and wake the
1394 * swap out task. If the buffer is for a read ahead transfer return NULL if
1395 * there are no buffers available or the buffer is already in the cache.
1396 *
1397 * The AVL tree of buffers for the cache is searched and if not found obtain a
1398 * buffer and insert it into the AVL tree. Buffers are first obtained from the
1399 * ready list until all empty/ready buffers are used. Once all buffers are in
1400 * use the LRU list is searched for a buffer of the same group size or a group
1401 * that has no active buffers in use. A buffer taken from the LRU list is
1402 * removed from the AVL tree and assigned the new block number. The ready or
1403 * LRU list buffer is initialised to this device and block. If no buffers are
1404 * available due to the ready and LRU lists being empty a check is made of the
1405 * modified list. Buffers may be queued waiting for the hold timer to
1406 * expire. These buffers should be written to disk and returned to the LRU list
1407 * where they can be used. If buffers are on the modified list the max. write
1408 * block size of buffers have their hold timer's expired and the swap out task
1409 * woken. The caller then blocks on the waiting semaphore and counter. When
1410 * buffers return from the upper layers (access) or lower driver (transfer) the
1411 * blocked caller task is woken and this procedure is repeated. The repeat
1412 * handles a case of a another thread pre-empting getting a buffer first and
1413 * adding it to the AVL tree.
1414 *
1415 * A buffer located in the AVL tree means it is already in the cache and maybe
1416 * in use somewhere. The buffer can be either:
1417 *
1418 * # Cached. Not being accessed or part of a media transfer.
1419 * # Access or modifed access. Is with an upper layer being accessed.
1420 * # Transfer. Is with the driver and part of a media transfer.
1421 *
1422 * If cached we assign the new state, extract it from any list it maybe part of
1423 * and return to the user.
1424 *
1425 * This function assumes the cache the buffer is being taken from is locked and
1426 * it will make sure the cache is locked when it returns. The cache will be
1427 * unlocked if the call could block.
1428 *
1429 * Variable sized buffer is handled by groups. A group is the size of the
1430 * maximum buffer that can be allocated. The group can size in multiples of the
1431 * minimum buffer size where the mulitples are 1,2,4,8, etc. If the buffer is
1432 * found in the AVL tree the number of BDs in the group is check and if
1433 * different the buffer size for the block has changed. The buffer needs to be
1434 * invalidated.
1435 *
1436 * @param dd The disk device. Has the configured block size.
1437 * @param bds_per_group The number of BDs in a group for this block.
1438 * @param block Absolute media block number for the device
1439 * @param read_ahead The get is for a read ahead buffer if true
1440 * @return RTEMS status code (if operation completed successfully or error
1441 *         code if error is occured)
1442 */
1443static rtems_bdbuf_buffer*
1444rtems_bdbuf_get_buffer (rtems_disk_device* dd,
1445                        size_t             bds_per_group,
1446                        rtems_blkdev_bnum  block,
1447                        bool               read_ahead)
1448{
1449  dev_t               device = dd->dev;
1450  rtems_bdbuf_buffer* bd;
1451  bool                available;
1452 
1453  /*
1454   * Loop until we get a buffer. Under load we could find no buffers are
1455   * available requiring this task to wait until some become available before
1456   * proceeding. There is no timeout. If this call is to block and the buffer
1457   * is for a read ahead buffer return NULL. The read ahead is nice but not
1458   * that important.
1459   *
1460   * The search procedure is repeated as another thread could have pre-empted
1461   * us while we waited for a buffer, obtained an empty buffer and loaded the
1462   * AVL tree with the one we are after. In this case we move down and wait for
1463   * the buffer to return to the cache.
1464   */
1465  do
1466  {
1467    /*
1468     * Search for buffer descriptor for this dev/block key.
1469     */
1470    bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, device, block);
1471
1472    /*
1473     * No buffer in the cache for this block. We need to obtain a buffer and
1474     * this means take a buffer that is ready to use. If all buffers are in use
1475     * take the least recently used buffer. If there are none then the cache is
1476     * empty. All the buffers are either queued to be written to disk or with
1477     * the user. We cannot do much with the buffers with the user how-ever with
1478     * the modified buffers waiting to be written to disk flush the maximum
1479     * number transfered in a block to disk. After this all that can be done is
1480     * to wait for a buffer to return to the cache.
1481     */
1482    if (!bd)
1483    {
1484      /*
1485       * Assign new buffer descriptor from the ready list if one is present. If
1486       * the ready queue is empty get the oldest buffer from LRU list. If the
1487       * LRU list is empty there are no available buffers check the modified
1488       * list.
1489       */
1490      bd = rtems_bdbuf_get_next_bd (bds_per_group, &bdbuf_cache.ready);
1491
1492      if (!bd)
1493      {
1494        /*
1495         * No unused or read-ahead buffers.
1496         *
1497         * If this is a read ahead buffer just return. No need to place further
1498         * pressure on the cache by reading something that may be needed when
1499         * we have data in the cache that was needed and may still be in the
1500         * future.
1501         */
1502        if (read_ahead)
1503          return NULL;
1504
1505        /*
1506         * Check the LRU list.
1507         */
1508        bd = rtems_bdbuf_get_next_bd (bds_per_group, &bdbuf_cache.lru);
1509       
1510        if (bd)
1511        {
1512          /*
1513           * Remove the buffer from the AVL tree if the state says it is in the
1514           * cache or a read ahead buffer. The buffer could be in the empty
1515           * state as a result of reallocations.
1516           */
1517          switch (bd->state)
1518          {
1519            case RTEMS_BDBUF_STATE_CACHED:
1520            case RTEMS_BDBUF_STATE_READ_AHEAD:
1521              if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
1522                rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_2);
1523              break;
1524            default:
1525              break;
1526          }
1527        }
1528        else
1529        {
1530          /*
1531           * If there are buffers on the modified list expire the hold timer
1532           * and wake the swap out task then wait else just go and wait.
1533           *
1534           * The check for an empty list is made so the swapper is only woken
1535           * when if timers are changed.
1536           */
1537          if (!rtems_chain_is_empty (&bdbuf_cache.modified))
1538          {
1539            rtems_chain_node* node = rtems_chain_first (&bdbuf_cache.modified);
1540            uint32_t          write_blocks = 0;
1541           
1542            while ((write_blocks < bdbuf_config.max_write_blocks) &&
1543                   !rtems_chain_is_tail (&bdbuf_cache.modified, node))
1544            {
1545              rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
1546              bd->hold_timer = 0;
1547              write_blocks++;
1548              node = rtems_chain_next (node);
1549            }
1550
1551            rtems_bdbuf_wake_swapper ();
1552          }
1553         
1554          /*
1555           * Wait for a buffer to be returned to the cache. The buffer will be
1556           * placed on the LRU list.
1557           */
1558          rtems_bdbuf_wait (&bdbuf_cache.buffer_waiters);
1559        }
1560      }
1561      else
1562      {
1563        /*
1564         * We have a new buffer for this block.
1565         */
1566        if ((bd->state != RTEMS_BDBUF_STATE_EMPTY) &&
1567            (bd->state != RTEMS_BDBUF_STATE_READ_AHEAD))
1568          rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_3);
1569
1570        if (bd->state == RTEMS_BDBUF_STATE_READ_AHEAD)
1571        {
1572          if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
1573            rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_4);
1574        }
1575      }
1576
1577      if (bd)
1578      {
1579        bd->dev       = device;
1580        bd->block     = block;
1581        bd->avl.left  = NULL;
1582        bd->avl.right = NULL;
1583        bd->state     = RTEMS_BDBUF_STATE_EMPTY;
1584        bd->error     = 0;
1585        bd->waiters   = 0;
1586
1587        if (rtems_bdbuf_avl_insert (&bdbuf_cache.tree, bd) != 0)
1588          rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_5);
1589
1590        return bd;
1591      }
1592    }
1593    else
1594    {
1595      /*
1596       * We have the buffer for the block from the cache. Check if the buffer
1597       * in the cache is the same size and the requested size we are after.
1598       */
1599      if (bd->group->bds_per_group != bds_per_group)
1600      {
1601        /*
1602         * Remove the buffer from the AVL tree.
1603         */
1604        if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
1605          rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_2);
1606        bd->state = RTEMS_BDBUF_STATE_EMPTY;
1607        rtems_chain_extract (&bd->link);
1608        rtems_chain_prepend (&bdbuf_cache.ready, &bd->link);
1609        bd = NULL;
1610      }
1611    }
1612  }
1613  while (!bd);
1614
1615  /*
1616   * If the buffer is for read ahead and it exists in the AVL cache or is being
1617   * accessed or being transfered then return NULL stopping further read ahead
1618   * requests.
1619   */
1620  if (read_ahead)
1621    return NULL;
1622
1623  /*
1624   * Loop waiting for the buffer to enter the cached state. If the buffer is in
1625   * the access or transfer state then wait until it is not.
1626   */
1627  available = false;
1628  while (!available)
1629  {
1630    switch (bd->state)
1631    {
1632      case RTEMS_BDBUF_STATE_CACHED:
1633      case RTEMS_BDBUF_STATE_MODIFIED:
1634      case RTEMS_BDBUF_STATE_READ_AHEAD:
1635        available = true;
1636        break;
1637
1638      case RTEMS_BDBUF_STATE_ACCESS:
1639      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1640        bd->waiters++;
1641        rtems_bdbuf_wait (&bdbuf_cache.access_waiters);
1642        bd->waiters--;
1643        break;
1644
1645      case RTEMS_BDBUF_STATE_SYNC:
1646      case RTEMS_BDBUF_STATE_TRANSFER:
1647        bd->waiters++;
1648        rtems_bdbuf_wait (&bdbuf_cache.transfer_waiters);
1649        bd->waiters--;
1650        break;
1651
1652      default:
1653        rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_6);
1654    }
1655  }
1656
1657  /*
1658   * Buffer is linked to the LRU, modifed, or sync lists. Remove it from there.
1659   */
1660  rtems_chain_extract (&bd->link);
1661
1662  return bd;
1663}
1664
1665rtems_status_code
1666rtems_bdbuf_get (dev_t                device,
1667                 rtems_blkdev_bnum    block,
1668                 rtems_bdbuf_buffer** bdp)
1669{
1670  rtems_disk_device*  dd;
1671  rtems_bdbuf_buffer* bd;
1672  rtems_blkdev_bnum   media_block;
1673  size_t              bds_per_group;
1674
1675  if (!bdbuf_cache.initialised)
1676    return RTEMS_NOT_CONFIGURED;
1677
1678  /*
1679   * Do not hold the cache lock when obtaining the disk table.
1680   */
1681  dd = rtems_disk_obtain (device);
1682  if (!dd)
1683    return RTEMS_INVALID_ID;
1684
1685  /*
1686   * Compute the media block number. Drivers work with media block number not
1687   * the block number a BD may have as this depends on the block size set by
1688   * the user.
1689   */
1690  media_block = rtems_bdbuf_media_block (block,
1691                                         dd->block_size,
1692                                         dd->media_block_size);
1693  if (media_block >= dd->size)
1694  {
1695    rtems_disk_release(dd);
1696    return RTEMS_INVALID_NUMBER;
1697  }
1698
1699  bds_per_group = rtems_bdbuf_bds_per_group (dd->block_size);
1700  if (!bds_per_group)
1701  {
1702    rtems_disk_release (dd);
1703    return RTEMS_INVALID_NUMBER;
1704  }
1705
1706  media_block += dd->start;
1707
1708  rtems_bdbuf_lock_cache ();
1709
1710  /*
1711   * Print the block index relative to the physical disk.
1712   */
1713  if (rtems_bdbuf_tracer)
1714    printf ("bdbuf:get: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
1715            media_block, block, (unsigned) device);
1716
1717  bd = rtems_bdbuf_get_buffer (dd, bds_per_group, media_block, false);
1718
1719  /*
1720   * This could be considered a bug in the caller because you should not be
1721   * getting an already modified buffer but user may have modified a byte in a
1722   * block then decided to seek the start and write the whole block and the
1723   * file system will have no record of this so just gets the block to fill.
1724   */
1725  if (bd->state == RTEMS_BDBUF_STATE_MODIFIED)
1726    bd->state = RTEMS_BDBUF_STATE_ACCESS_MODIFIED;
1727  else
1728  {
1729    bd->state = RTEMS_BDBUF_STATE_ACCESS;
1730    /*
1731     * Indicate a buffer in this group is being used.
1732     */
1733    bd->group->users++;
1734  }
1735 
1736  if (rtems_bdbuf_tracer)
1737  {
1738    rtems_bdbuf_show_users ("get", bd);
1739    rtems_bdbuf_show_usage ();
1740  }
1741
1742  rtems_bdbuf_unlock_cache ();
1743
1744  rtems_disk_release(dd);
1745
1746  *bdp = bd;
1747
1748  return RTEMS_SUCCESSFUL;
1749}
1750
1751/**
1752 * Call back handler called by the low level driver when the transfer has
1753 * completed. This function may be invoked from interrupt handler.
1754 *
1755 * @param arg Arbitrary argument specified in block device request
1756 *            structure (in this case - pointer to the appropriate
1757 *            block device request structure).
1758 * @param status I/O completion status
1759 * @param error errno error code if status != RTEMS_SUCCESSFUL
1760 */
1761static void
1762rtems_bdbuf_read_done (void* arg, rtems_status_code status, int error)
1763{
1764  rtems_blkdev_request* req = (rtems_blkdev_request*) arg;
1765
1766  req->error = error;
1767  req->status = status;
1768
1769  rtems_event_send (req->io_task, RTEMS_BDBUF_TRANSFER_SYNC);
1770}
1771
1772rtems_status_code
1773rtems_bdbuf_read (dev_t                device,
1774                  rtems_blkdev_bnum    block,
1775                  rtems_bdbuf_buffer** bdp)
1776{
1777  rtems_disk_device*    dd;
1778  rtems_bdbuf_buffer*   bd = NULL;
1779  uint32_t              read_ahead_count;
1780  rtems_blkdev_request* req;
1781  size_t                bds_per_group;
1782  rtems_blkdev_bnum     media_block;
1783  rtems_blkdev_bnum     media_block_count;
1784 
1785  if (!bdbuf_cache.initialised)
1786    return RTEMS_NOT_CONFIGURED;
1787
1788  /*
1789   * @todo This type of request structure is wrong and should be removed.
1790   */
1791#define bdbuf_alloc(size) __builtin_alloca (size)
1792
1793  req = bdbuf_alloc (sizeof (rtems_blkdev_request) +
1794                     (sizeof ( rtems_blkdev_sg_buffer) *
1795                      rtems_bdbuf_configuration.max_read_ahead_blocks));
1796
1797  /*
1798   * Do not hold the cache lock when obtaining the disk table.
1799   */
1800  dd = rtems_disk_obtain (device);
1801  if (!dd)
1802    return RTEMS_INVALID_ID;
1803 
1804  /*
1805   * Compute the media block number. Drivers work with media block number not
1806   * the block number a BD may have as this depends on the block size set by
1807   * the user.
1808   */
1809  media_block = rtems_bdbuf_media_block (block,
1810                                         dd->block_size,
1811                                         dd->media_block_size);
1812  if (media_block >= dd->size)
1813  {
1814    rtems_disk_release(dd);
1815    return RTEMS_INVALID_NUMBER;
1816  }
1817 
1818  bds_per_group = rtems_bdbuf_bds_per_group (dd->block_size);
1819  if (!bds_per_group)
1820  {
1821    rtems_disk_release (dd);
1822    return RTEMS_INVALID_NUMBER;
1823  }
1824 
1825  /*
1826   * Print the block index relative to the physical disk and the user block
1827   * number
1828   */
1829  if (rtems_bdbuf_tracer)
1830    printf ("bdbuf:read: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
1831            media_block + dd->start, block, (unsigned) device);
1832
1833  /*
1834   * Read the block plus the required number of blocks ahead. The number of
1835   * blocks to read ahead is configured by the user and limited by the size of
1836   * the disk or reaching a read ahead block that is also cached.
1837   *
1838   * Limit the blocks read by the size of the disk.
1839   */
1840  if ((rtems_bdbuf_configuration.max_read_ahead_blocks + media_block) < dd->size)
1841    read_ahead_count = rtems_bdbuf_configuration.max_read_ahead_blocks;
1842  else
1843    read_ahead_count = dd->size - media_block;
1844
1845  media_block_count = dd->block_size / dd->media_block_size;
1846 
1847  req->bufnum = 0;
1848
1849  rtems_bdbuf_lock_cache ();
1850
1851  while (req->bufnum < read_ahead_count)
1852  {
1853    /*
1854     * Get the buffer for the requested block. If the block is cached then
1855     * return it. If it is not cached transfer the block from the disk media
1856     * into memory.
1857     *
1858     * We need to clean up any buffers allocated and not passed back to the
1859     * caller.
1860     */
1861    bd = rtems_bdbuf_get_buffer (dd, bds_per_group, media_block + dd->start,
1862                                 req->bufnum == 0 ? false : true);
1863
1864    /*
1865     * Read ahead buffer is in the cache or none available. Read what we
1866     * can.
1867     */
1868    if (!bd)
1869      break;
1870
1871    /*
1872     * Is the block we are interested in the cache ?
1873     */
1874    if ((bd->state == RTEMS_BDBUF_STATE_CACHED) ||
1875        (bd->state == RTEMS_BDBUF_STATE_MODIFIED))
1876      break;
1877
1878    bd->state = RTEMS_BDBUF_STATE_TRANSFER;
1879    bd->error = 0;
1880
1881    /*
1882     * The buffer will be passed to the driver so this buffer has a user.
1883     */
1884    bd->group->users++;
1885
1886    if (rtems_bdbuf_tracer)
1887      rtems_bdbuf_show_users ("reading", bd);
1888   
1889    /*
1890     * @todo The use of these req blocks is not a great design. The req is a
1891     *       struct with a single 'bufs' declared in the req struct and the
1892     *       others are added in the outer level struct. This relies on the
1893     *       structs joining as a single array and that assumes the compiler
1894     *       packs the structs. Why not just place on a list ? The BD has a
1895     *       node that can be used.
1896     */
1897    req->bufs[req->bufnum].user   = bd;
1898    req->bufs[req->bufnum].block  = media_block + dd->start;
1899    req->bufs[req->bufnum].length = dd->block_size;
1900    req->bufs[req->bufnum].buffer = bd->buffer;
1901    req->bufnum++;
1902
1903    /*
1904     * Move the media block count by the number of media blocks in the
1905     * disk device's set block size.
1906     */
1907    media_block += media_block_count;
1908  }
1909
1910  /*
1911   * Transfer any requested buffers. If the request count is 0 we have found
1912   * the block in the cache so return it.
1913   */
1914  if (req->bufnum)
1915  {
1916    /*
1917     * Unlock the cache. We have the buffer for the block and it will be in the
1918     * access or transfer state. We may also have a number of read ahead blocks
1919     * if we need to transfer data. At this point any other threads can gain
1920     * access to the cache and if they are after any of the buffers we have
1921     * they will block and be woken when the buffer is returned to the cache.
1922     *
1923     * If a transfer is needed the I/O operation will occur with pre-emption
1924     * enabled and the cache unlocked. This is a change to the previous version
1925     * of the bdbuf code.
1926     */
1927    rtems_event_set out;
1928    int             result;
1929    uint32_t        b;
1930    bool            wake_transfer;
1931
1932    /*
1933     * Flush any events.
1934     */
1935    rtems_event_receive (RTEMS_BDBUF_TRANSFER_SYNC,
1936                         RTEMS_EVENT_ALL | RTEMS_NO_WAIT,
1937                         0, &out);
1938                         
1939    rtems_bdbuf_unlock_cache ();
1940
1941    req->req = RTEMS_BLKDEV_REQ_READ;
1942    req->req_done = rtems_bdbuf_read_done;
1943    req->done_arg = req;
1944    req->io_task = rtems_task_self ();
1945    req->status = RTEMS_RESOURCE_IN_USE;
1946    req->error = 0;
1947 
1948    result = dd->ioctl (dd, RTEMS_BLKIO_REQUEST, req);
1949
1950    /*
1951     * Inspection of the DOS FS code shows the result from this function is
1952     * handled and a buffer must be returned.
1953     */
1954    if (result < 0)
1955    {
1956      req->error = errno;
1957      req->status = RTEMS_IO_ERROR;
1958    }
1959    else
1960    {
1961      rtems_status_code sc;
1962     
1963      sc = rtems_event_receive (RTEMS_BDBUF_TRANSFER_SYNC,
1964                                RTEMS_EVENT_ALL | RTEMS_WAIT,
1965                                0, &out);
1966
1967      if (sc != RTEMS_SUCCESSFUL)
1968        rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
1969    }
1970
1971    wake_transfer = false;
1972   
1973    rtems_bdbuf_lock_cache ();
1974
1975    for (b = 1; b < req->bufnum; b++)
1976    {
1977      bd = req->bufs[b].user;
1978      if (!bd->error)
1979        bd->error = req->error;
1980      bd->state = RTEMS_BDBUF_STATE_READ_AHEAD;
1981      bd->group->users--;
1982
1983      if (rtems_bdbuf_tracer)
1984        rtems_bdbuf_show_users ("read-ahead", bd);
1985
1986      rtems_chain_prepend (&bdbuf_cache.ready, &bd->link);
1987
1988      /*
1989       * If there is an error remove the BD from the AVL tree as it is invalid,
1990       * then wake any threads that may be waiting. A thread may have been
1991       * waiting for this block and assumed it was in the tree.
1992       */
1993      if (bd->error)
1994      {
1995        bd->state = RTEMS_BDBUF_STATE_EMPTY;
1996        if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
1997          rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_9);
1998      }
1999
2000      if (bd->waiters)
2001        wake_transfer = true;
2002    }
2003
2004    if (wake_transfer)
2005      rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
2006    else
2007      rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
2008   
2009    bd = req->bufs[0].user;
2010
2011    /*
2012     * One less user for the BD we return. The loop above is only for the read
2013     * head buffers. We do this here then increment again so the case of the
2014     * buffer in the cache or modified and no read leaves the user counts at
2015     * the correct level.
2016     */
2017    bd->group->users--;
2018
2019    if (rtems_bdbuf_tracer)
2020      rtems_bdbuf_show_users ("read-done", bd);
2021  }
2022
2023  /*
2024   * The data for this block is cached in the buffer.
2025   */
2026  if (bd->state == RTEMS_BDBUF_STATE_MODIFIED)
2027    bd->state = RTEMS_BDBUF_STATE_ACCESS_MODIFIED;
2028  else
2029  {
2030    /*
2031     * The file system is a user of the buffer.
2032     */
2033    bd->group->users++;
2034    bd->state = RTEMS_BDBUF_STATE_ACCESS;
2035  }
2036
2037  if (rtems_bdbuf_tracer)
2038  {
2039    rtems_bdbuf_show_users ("read", bd);
2040    rtems_bdbuf_show_usage ();
2041  }
2042 
2043  rtems_bdbuf_unlock_cache ();
2044  rtems_disk_release (dd);
2045
2046  *bdp = bd;
2047
2048  return RTEMS_SUCCESSFUL;
2049}
2050
2051rtems_status_code
2052rtems_bdbuf_release (rtems_bdbuf_buffer* bd)
2053{
2054  if (!bdbuf_cache.initialised)
2055    return RTEMS_NOT_CONFIGURED;
2056
2057  if (bd == NULL)
2058    return RTEMS_INVALID_ADDRESS;
2059
2060  rtems_bdbuf_lock_cache ();
2061
2062  if (rtems_bdbuf_tracer)
2063    printf ("bdbuf:release: %" PRIu32 "\n", bd->block);
2064 
2065  if (bd->state == RTEMS_BDBUF_STATE_ACCESS_MODIFIED)
2066  {
2067    rtems_bdbuf_append_modified (bd);
2068  }
2069  else
2070  {
2071    bd->state = RTEMS_BDBUF_STATE_CACHED;
2072    rtems_chain_append (&bdbuf_cache.lru, &bd->link);
2073
2074    /*
2075     * One less user for the group of bds.
2076     */
2077    bd->group->users--;
2078  }
2079 
2080  if (rtems_bdbuf_tracer)
2081    rtems_bdbuf_show_users ("release", bd);
2082 
2083  /*
2084   * If there are threads waiting to access the buffer wake them. Wake any
2085   * waiters if this buffer is placed back onto the LRU queue.
2086   */
2087  if (bd->waiters)
2088    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
2089  else
2090    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
2091 
2092  if (rtems_bdbuf_tracer)
2093    rtems_bdbuf_show_usage ();
2094 
2095  rtems_bdbuf_unlock_cache ();
2096
2097  return RTEMS_SUCCESSFUL;
2098}
2099
2100rtems_status_code
2101rtems_bdbuf_release_modified (rtems_bdbuf_buffer* bd)
2102{
2103  if (!bdbuf_cache.initialised)
2104    return RTEMS_NOT_CONFIGURED;
2105
2106  if (!bd)
2107    return RTEMS_INVALID_ADDRESS;
2108
2109  rtems_bdbuf_lock_cache ();
2110
2111  if (rtems_bdbuf_tracer)
2112    printf ("bdbuf:release modified: %" PRIu32 "\n", bd->block);
2113
2114  bd->hold_timer = rtems_bdbuf_configuration.swap_block_hold;
2115 
2116  if (rtems_bdbuf_tracer)
2117    rtems_bdbuf_show_users ("release-modified", bd);
2118 
2119  rtems_bdbuf_append_modified (bd);
2120
2121  if (bd->waiters)
2122    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
2123 
2124  if (rtems_bdbuf_tracer)
2125    rtems_bdbuf_show_usage ();
2126 
2127  rtems_bdbuf_unlock_cache ();
2128
2129  return RTEMS_SUCCESSFUL;
2130}
2131
2132rtems_status_code
2133rtems_bdbuf_sync (rtems_bdbuf_buffer* bd)
2134{
2135  bool available;
2136
2137  if (rtems_bdbuf_tracer)
2138    printf ("bdbuf:sync: %" PRIu32 "\n", bd->block);
2139 
2140  if (!bdbuf_cache.initialised)
2141    return RTEMS_NOT_CONFIGURED;
2142
2143  if (!bd)
2144    return RTEMS_INVALID_ADDRESS;
2145
2146  rtems_bdbuf_lock_cache ();
2147
2148  bd->state = RTEMS_BDBUF_STATE_SYNC;
2149
2150  rtems_chain_append (&bdbuf_cache.sync, &bd->link);
2151
2152  rtems_bdbuf_wake_swapper ();
2153
2154  available = false;
2155  while (!available)
2156  {
2157    switch (bd->state)
2158    {
2159      case RTEMS_BDBUF_STATE_CACHED:
2160      case RTEMS_BDBUF_STATE_READ_AHEAD:
2161      case RTEMS_BDBUF_STATE_MODIFIED:
2162      case RTEMS_BDBUF_STATE_ACCESS:
2163      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2164        available = true;
2165        break;
2166
2167      case RTEMS_BDBUF_STATE_SYNC:
2168      case RTEMS_BDBUF_STATE_TRANSFER:
2169        bd->waiters++;
2170        rtems_bdbuf_wait (&bdbuf_cache.transfer_waiters);
2171        bd->waiters--;
2172        break;
2173
2174      default:
2175        rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_7);
2176    }
2177  }
2178
2179  rtems_bdbuf_unlock_cache ();
2180 
2181  return RTEMS_SUCCESSFUL;
2182}
2183
2184rtems_status_code
2185rtems_bdbuf_syncdev (dev_t dev)
2186{
2187  rtems_disk_device*  dd;
2188  rtems_status_code   sc;
2189  rtems_event_set     out;
2190
2191  if (rtems_bdbuf_tracer)
2192    printf ("bdbuf:syncdev: %08x\n", (unsigned) dev);
2193
2194  if (!bdbuf_cache.initialised)
2195    return RTEMS_NOT_CONFIGURED;
2196
2197  /*
2198   * Do not hold the cache lock when obtaining the disk table.
2199   */
2200  dd = rtems_disk_obtain (dev);
2201  if (!dd)
2202    return RTEMS_INVALID_ID;
2203
2204  /*
2205   * Take the sync lock before locking the cache. Once we have the sync lock we
2206   * can lock the cache. If another thread has the sync lock it will cause this
2207   * thread to block until it owns the sync lock then it can own the cache. The
2208   * sync lock can only be obtained with the cache unlocked.
2209   */
2210 
2211  rtems_bdbuf_lock_sync ();
2212  rtems_bdbuf_lock_cache (); 
2213
2214  /*
2215   * Set the cache to have a sync active for a specific device and let the swap
2216   * out task know the id of the requester to wake when done.
2217   *
2218   * The swap out task will negate the sync active flag when no more buffers
2219   * for the device are held on the "modified for sync" queues.
2220   */
2221  bdbuf_cache.sync_active    = true;
2222  bdbuf_cache.sync_requester = rtems_task_self ();
2223  bdbuf_cache.sync_device    = dev;
2224 
2225  rtems_bdbuf_wake_swapper ();
2226  rtems_bdbuf_unlock_cache ();
2227 
2228  sc = rtems_event_receive (RTEMS_BDBUF_TRANSFER_SYNC,
2229                            RTEMS_EVENT_ALL | RTEMS_WAIT,
2230                            0, &out);
2231
2232  if (sc != RTEMS_SUCCESSFUL)
2233    rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
2234     
2235  rtems_bdbuf_unlock_sync ();
2236 
2237  return rtems_disk_release (dd);
2238}
2239
2240/**
2241 * Call back handler called by the low level driver when the transfer has
2242 * completed. This function may be invoked from interrupt handlers.
2243 *
2244 * @param arg Arbitrary argument specified in block device request
2245 *            structure (in this case - pointer to the appropriate
2246 *            block device request structure).
2247 * @param status I/O completion status
2248 * @param error errno error code if status != RTEMS_SUCCESSFUL
2249 */
2250static void
2251rtems_bdbuf_write_done(void *arg, rtems_status_code status, int error)
2252{
2253  rtems_blkdev_request* req = (rtems_blkdev_request*) arg;
2254
2255  req->error = error;
2256  req->status = status;
2257
2258  rtems_event_send (req->io_task, RTEMS_BDBUF_TRANSFER_SYNC);
2259}
2260
2261/**
2262 * Swapout transfer to the driver. The driver will break this I/O into groups
2263 * of consecutive write requests is multiple consecutive buffers are required
2264 * by the driver.
2265 *
2266 * @param transfer The transfer transaction.
2267 */
2268static void
2269rtems_bdbuf_swapout_write (rtems_bdbuf_swapout_transfer* transfer)
2270{
2271  rtems_disk_device* dd;
2272 
2273  if (rtems_bdbuf_tracer)
2274    printf ("bdbuf:swapout transfer: %08x\n", (unsigned) transfer->dev);
2275
2276  /*
2277   * If there are buffers to transfer to the media transfer them.
2278   */
2279  if (!rtems_chain_is_empty (&transfer->bds))
2280  {
2281    /*
2282     * Obtain the disk device. The cache's mutex has been released to avoid a
2283     * dead lock.
2284     */
2285    dd = rtems_disk_obtain (transfer->dev);
2286    if (dd)
2287    {
2288      /*
2289       * The last block number used when the driver only supports
2290       * continuous blocks in a single request.
2291       */
2292      uint32_t last_block = 0;
2293
2294      /*
2295       * Number of buffers per bd. This is used to detect the next
2296       * block.
2297       */
2298      uint32_t bufs_per_bd = dd->block_size / bdbuf_config.buffer_min;
2299     
2300      /*
2301       * Take as many buffers as configured and pass to the driver. Note, the
2302       * API to the drivers has an array of buffers and if a chain was passed
2303       * we could have just passed the list. If the driver API is updated it
2304       * should be possible to make this change with little effect in this
2305       * code. The array that is passed is broken in design and should be
2306       * removed. Merging members of a struct into the first member is
2307       * trouble waiting to happen.
2308       */
2309      transfer->write_req->status = RTEMS_RESOURCE_IN_USE;
2310      transfer->write_req->error = 0;
2311      transfer->write_req->bufnum = 0;
2312
2313      while (!rtems_chain_is_empty (&transfer->bds))
2314      {
2315        rtems_bdbuf_buffer* bd =
2316          (rtems_bdbuf_buffer*) rtems_chain_get (&transfer->bds);
2317
2318        bool write = false;
2319       
2320        /*
2321         * If the device only accepts sequential buffers and this is not the
2322         * first buffer (the first is always sequential, and the buffer is not
2323         * sequential then put the buffer back on the transfer chain and write
2324         * the committed buffers.
2325         */
2326       
2327        if (rtems_bdbuf_tracer)
2328          printf ("bdbuf:swapout write: bd:%" PRIu32 ", bufnum:%" PRIu32 " mode:%s\n",
2329                  bd->block, transfer->write_req->bufnum,
2330                  dd->phys_dev->capabilities &
2331                  RTEMS_BLKDEV_CAP_MULTISECTOR_CONT ? "MULIT" : "SCAT");
2332       
2333        if ((dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_MULTISECTOR_CONT) &&
2334            transfer->write_req->bufnum &&
2335            (bd->block != (last_block + bufs_per_bd)))
2336        {
2337          rtems_chain_prepend (&transfer->bds, &bd->link);
2338          write = true;
2339        }
2340        else
2341        {
2342          rtems_blkdev_sg_buffer* buf;
2343          buf = &transfer->write_req->bufs[transfer->write_req->bufnum];
2344          transfer->write_req->bufnum++;
2345          buf->user   = bd;
2346          buf->block  = bd->block;
2347          buf->length = dd->block_size;
2348          buf->buffer = bd->buffer;
2349          last_block  = bd->block;
2350        }
2351
2352        /*
2353         * Perform the transfer if there are no more buffers, or the transfer
2354         * size has reached the configured max. value.
2355         */
2356
2357        if (rtems_chain_is_empty (&transfer->bds) ||
2358            (transfer->write_req->bufnum >= rtems_bdbuf_configuration.max_write_blocks))
2359          write = true;
2360
2361        if (write)
2362        {
2363          int result;
2364          uint32_t b;
2365
2366          if (rtems_bdbuf_tracer)
2367            printf ("bdbuf:swapout write: writing bufnum:%" PRIu32 "\n",
2368                    transfer->write_req->bufnum);
2369
2370          /*
2371           * Perform the transfer. No cache locks, no preemption, only the disk
2372           * device is being held.
2373           */
2374          result = dd->ioctl (dd, RTEMS_BLKIO_REQUEST, transfer->write_req); 
2375          if (result < 0)
2376          {
2377            rtems_bdbuf_lock_cache ();
2378             
2379            for (b = 0; b < transfer->write_req->bufnum; b++)
2380            {
2381              bd = transfer->write_req->bufs[b].user;
2382              bd->state  = RTEMS_BDBUF_STATE_MODIFIED;
2383              bd->error = errno;
2384
2385              /*
2386               * Place back on the cache's modified queue and try again.
2387               *
2388               * @warning Not sure this is the best option but I do not know
2389               *          what else can be done.
2390               */
2391              rtems_chain_append (&bdbuf_cache.modified, &bd->link);
2392            }
2393          }
2394          else
2395          {
2396            rtems_status_code sc = 0;
2397            rtems_event_set   out;
2398
2399            sc = rtems_event_receive (RTEMS_BDBUF_TRANSFER_SYNC,
2400                                      RTEMS_EVENT_ALL | RTEMS_WAIT,
2401                                      0, &out);
2402
2403            if (sc != RTEMS_SUCCESSFUL)
2404              rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
2405
2406            rtems_bdbuf_lock_cache ();
2407
2408            for (b = 0; b < transfer->write_req->bufnum; b++)
2409            {
2410              bd = transfer->write_req->bufs[b].user;
2411              bd->state = RTEMS_BDBUF_STATE_CACHED;
2412              bd->error = 0;
2413
2414              /*
2415               * The buffer is now not modified so lower the user count for the group.
2416               */
2417              bd->group->users--;
2418
2419              if (rtems_bdbuf_tracer)
2420                rtems_bdbuf_show_users ("write", bd);
2421
2422              rtems_chain_append (&bdbuf_cache.lru, &bd->link);
2423             
2424              if (bd->waiters)
2425                rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
2426              else
2427                rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
2428            }
2429          }
2430
2431          if (rtems_bdbuf_tracer)
2432            rtems_bdbuf_show_usage ();
2433
2434          rtems_bdbuf_unlock_cache ();
2435
2436          transfer->write_req->status = RTEMS_RESOURCE_IN_USE;
2437          transfer->write_req->error = 0;
2438          transfer->write_req->bufnum = 0;
2439        }
2440      }
2441         
2442      rtems_disk_release (dd);
2443    }
2444    else
2445    {
2446      /*
2447       * We have buffers but no device. Put the BDs back onto the
2448       * ready queue and exit.
2449       */
2450      /* @todo fixme */
2451    }
2452  }
2453}
2454
2455/**
2456 * Process the modified list of buffers. There is a sync or modified list that
2457 * needs to be handled so we have a common function to do the work.
2458 *
2459 * @param dev The device to handle. If BDBUF_INVALID_DEV no device is selected
2460 * so select the device of the first buffer to be written to disk.
2461 * @param chain The modified chain to process.
2462 * @param transfer The chain to append buffers to be written too.
2463 * @param sync_active If true this is a sync operation so expire all timers.
2464 * @param update_timers If true update the timers.
2465 * @param timer_delta It update_timers is true update the timers by this
2466 *                    amount.
2467 */
2468static void
2469rtems_bdbuf_swapout_modified_processing (dev_t*               dev,
2470                                         rtems_chain_control* chain,
2471                                         rtems_chain_control* transfer,
2472                                         bool                 sync_active,
2473                                         bool                 update_timers,
2474                                         uint32_t             timer_delta)
2475{
2476  if (!rtems_chain_is_empty (chain))
2477  {
2478    rtems_chain_node* node = rtems_chain_head (chain);
2479    node = node->next;
2480
2481    while (!rtems_chain_is_tail (chain, node))
2482    {
2483      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2484   
2485      /*
2486       * Check if the buffer's hold timer has reached 0. If a sync is active
2487       * force all the timers to 0.
2488       *
2489       * @note Lots of sync requests will skew this timer. It should be based
2490       *       on TOD to be accurate. Does it matter ?
2491       */
2492      if (sync_active)
2493        bd->hold_timer = 0;
2494 
2495      if (bd->hold_timer)
2496      {
2497        if (update_timers)
2498        {
2499          if (bd->hold_timer > timer_delta)
2500            bd->hold_timer -= timer_delta;
2501          else
2502            bd->hold_timer = 0;
2503        }
2504
2505        if (bd->hold_timer)
2506        {
2507          node = node->next;
2508          continue;
2509        }
2510      }
2511
2512      /*
2513       * This assumes we can set dev_t to BDBUF_INVALID_DEV which is just an
2514       * assumption. Cannot use the transfer list being empty the sync dev
2515       * calls sets the dev to use.
2516       */
2517      if (*dev == BDBUF_INVALID_DEV)
2518        *dev = bd->dev;
2519
2520      if (bd->dev == *dev)
2521      {
2522        rtems_chain_node* next_node = node->next;
2523        rtems_chain_node* tnode = rtems_chain_tail (transfer);
2524   
2525        /*
2526         * The blocks on the transfer list are sorted in block order. This
2527         * means multi-block transfers for drivers that require consecutive
2528         * blocks perform better with sorted blocks and for real disks it may
2529         * help lower head movement.
2530         */
2531
2532        bd->state = RTEMS_BDBUF_STATE_TRANSFER;
2533
2534        rtems_chain_extract (node);
2535
2536        tnode = tnode->previous;
2537         
2538        while (node && !rtems_chain_is_head (transfer, tnode))
2539        {
2540          rtems_bdbuf_buffer* tbd = (rtems_bdbuf_buffer*) tnode;
2541
2542          if (bd->block > tbd->block)
2543          {
2544            rtems_chain_insert (tnode, node);
2545            node = NULL;
2546          }
2547          else
2548            tnode = tnode->previous;
2549        }
2550       
2551        if (node)
2552          rtems_chain_prepend (transfer, node);
2553         
2554        node = next_node;
2555      }
2556      else
2557      {
2558        node = node->next;
2559      }
2560    }
2561  }
2562}
2563
2564/**
2565 * Process the cache's modified buffers. Check the sync list first then the
2566 * modified list extracting the buffers suitable to be written to disk. We have
2567 * a device at a time. The task level loop will repeat this operation while
2568 * there are buffers to be written. If the transfer fails place the buffers
2569 * back on the modified list and try again later. The cache is unlocked while
2570 * the buffers are being written to disk.
2571 *
2572 * @param timer_delta It update_timers is true update the timers by this
2573 *                    amount.
2574 * @param update_timers If true update the timers.
2575 * @param transfer The transfer transaction data.
2576 *
2577 * @retval true Buffers where written to disk so scan again.
2578 * @retval false No buffers where written to disk.
2579 */
2580static bool
2581rtems_bdbuf_swapout_processing (unsigned long                 timer_delta,
2582                                bool                          update_timers,
2583                                rtems_bdbuf_swapout_transfer* transfer)
2584{
2585  rtems_bdbuf_swapout_worker* worker;
2586  bool                        transfered_buffers = false;
2587
2588  rtems_bdbuf_lock_cache ();
2589
2590  /*
2591   * If a sync is active do not use a worker because the current code does not
2592   * cleaning up after. We need to know the buffers have been written when
2593   * syncing to the release sync lock and currently worker threads do not
2594   * return to here. We do not know the worker is the last in a sequence of
2595   * sync writes until after we have it running so we do not know to tell it to
2596   * release the lock. The simplest solution is to get the main swap out task
2597   * perform all sync operations.
2598   */
2599  if (bdbuf_cache.sync_active)
2600    worker = NULL;
2601  else
2602  {
2603    worker = (rtems_bdbuf_swapout_worker*)
2604      rtems_chain_get (&bdbuf_cache.swapout_workers);
2605    if (worker)
2606      transfer = &worker->transfer;
2607  }
2608 
2609  rtems_chain_initialize_empty (&transfer->bds);
2610  transfer->dev = BDBUF_INVALID_DEV;
2611 
2612  /*
2613   * When the sync is for a device limit the sync to that device. If the sync
2614   * is for a buffer handle process the devices in the order on the sync
2615   * list. This means the dev is BDBUF_INVALID_DEV.
2616   */
2617  if (bdbuf_cache.sync_active)
2618    transfer->dev = bdbuf_cache.sync_device;
2619 
2620  /*
2621   * If we have any buffers in the sync queue move them to the modified
2622   * list. The first sync buffer will select the device we use.
2623   */
2624  rtems_bdbuf_swapout_modified_processing (&transfer->dev,
2625                                           &bdbuf_cache.sync,
2626                                           &transfer->bds,
2627                                           true, false,
2628                                           timer_delta);
2629
2630  /*
2631   * Process the cache's modified list.
2632   */
2633  rtems_bdbuf_swapout_modified_processing (&transfer->dev,
2634                                           &bdbuf_cache.modified,
2635                                           &transfer->bds,
2636                                           bdbuf_cache.sync_active,
2637                                           update_timers,
2638                                           timer_delta);
2639
2640  /*
2641   * We have all the buffers that have been modified for this device so the
2642   * cache can be unlocked because the state of each buffer has been set to
2643   * TRANSFER.
2644   */
2645  rtems_bdbuf_unlock_cache ();
2646
2647  /*
2648   * If there are buffers to transfer to the media transfer them.
2649   */
2650  if (!rtems_chain_is_empty (&transfer->bds))
2651  {
2652    if (worker)
2653    {
2654      rtems_status_code sc = rtems_event_send (worker->id,
2655                                               RTEMS_BDBUF_SWAPOUT_SYNC);
2656      if (sc != RTEMS_SUCCESSFUL)
2657        rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE);
2658    }
2659    else
2660    {
2661      rtems_bdbuf_swapout_write (transfer);
2662    }
2663   
2664    transfered_buffers = true;
2665  }
2666   
2667  if (bdbuf_cache.sync_active && !transfered_buffers)
2668  {
2669    rtems_id sync_requester;
2670    rtems_bdbuf_lock_cache ();
2671    sync_requester = bdbuf_cache.sync_requester;
2672    bdbuf_cache.sync_active = false;
2673    bdbuf_cache.sync_requester = 0;
2674    rtems_bdbuf_unlock_cache ();
2675    if (sync_requester)
2676      rtems_event_send (sync_requester, RTEMS_BDBUF_TRANSFER_SYNC);
2677  }
2678 
2679  return transfered_buffers;
2680}
2681
2682/**
2683 * Allocate the write request and initialise it for good measure.
2684 *
2685 * @return rtems_blkdev_request* The write reference memory.
2686 */
2687static rtems_blkdev_request*
2688rtems_bdbuf_swapout_writereq_alloc (void)
2689{
2690  /*
2691   * @note chrisj The rtems_blkdev_request and the array at the end is a hack.
2692   * I am disappointment at finding code like this in RTEMS. The request should
2693   * have been a rtems_chain_control. Simple, fast and less storage as the node
2694   * is already part of the buffer structure.
2695   */
2696  rtems_blkdev_request* write_req =
2697    malloc (sizeof (rtems_blkdev_request) +
2698            (rtems_bdbuf_configuration.max_write_blocks *
2699             sizeof (rtems_blkdev_sg_buffer)));
2700
2701  if (!write_req)
2702    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM);
2703
2704  write_req->req = RTEMS_BLKDEV_REQ_WRITE;
2705  write_req->req_done = rtems_bdbuf_write_done;
2706  write_req->done_arg = write_req;
2707  write_req->io_task = rtems_task_self ();
2708
2709  return write_req;
2710}
2711
2712/**
2713 * The swapout worker thread body.
2714 *
2715 * @param arg A pointer to the worker thread's private data.
2716 * @return rtems_task Not used.
2717 */
2718static rtems_task
2719rtems_bdbuf_swapout_worker_task (rtems_task_argument arg)
2720{
2721  rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) arg;
2722
2723  while (worker->enabled)
2724  {
2725    rtems_event_set   out;
2726    rtems_status_code sc;
2727   
2728    sc = rtems_event_receive (RTEMS_BDBUF_SWAPOUT_SYNC,
2729                              RTEMS_EVENT_ALL | RTEMS_WAIT,
2730                              RTEMS_NO_TIMEOUT,
2731                              &out);
2732
2733    if (sc != RTEMS_SUCCESSFUL)
2734      rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
2735
2736    rtems_bdbuf_swapout_write (&worker->transfer);
2737
2738    rtems_bdbuf_lock_cache ();
2739
2740    rtems_chain_initialize_empty (&worker->transfer.bds);
2741    worker->transfer.dev = BDBUF_INVALID_DEV;
2742
2743    rtems_chain_append (&bdbuf_cache.swapout_workers, &worker->link);
2744   
2745    rtems_bdbuf_unlock_cache ();
2746  }
2747
2748  free (worker->transfer.write_req);
2749  free (worker);
2750
2751  rtems_task_delete (RTEMS_SELF);
2752}
2753
2754/**
2755 * Open the swapout worker threads.
2756 */
2757static void
2758rtems_bdbuf_swapout_workers_open (void)
2759{
2760  rtems_status_code sc;
2761  size_t            w;
2762 
2763  rtems_bdbuf_lock_cache ();
2764 
2765  for (w = 0; w < rtems_bdbuf_configuration.swapout_workers; w++)
2766  {
2767    rtems_bdbuf_swapout_worker* worker;
2768
2769    worker = malloc (sizeof (rtems_bdbuf_swapout_worker));
2770    if (!worker)
2771      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM);
2772
2773    rtems_chain_append (&bdbuf_cache.swapout_workers, &worker->link);
2774    worker->enabled = true;
2775    worker->transfer.write_req = rtems_bdbuf_swapout_writereq_alloc ();
2776   
2777    rtems_chain_initialize_empty (&worker->transfer.bds);
2778    worker->transfer.dev = BDBUF_INVALID_DEV;
2779
2780    sc = rtems_task_create (rtems_build_name('B', 'D', 'o', 'a' + w),
2781                            (rtems_bdbuf_configuration.swapout_priority ?
2782                             rtems_bdbuf_configuration.swapout_priority :
2783                             RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT),
2784                            SWAPOUT_TASK_STACK_SIZE,
2785                            RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
2786                            RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT,
2787                            &worker->id);
2788    if (sc != RTEMS_SUCCESSFUL)
2789      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_CREATE);
2790
2791    sc = rtems_task_start (worker->id,
2792                           rtems_bdbuf_swapout_worker_task,
2793                           (rtems_task_argument) worker);
2794    if (sc != RTEMS_SUCCESSFUL)
2795      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_START);
2796  }
2797 
2798  rtems_bdbuf_unlock_cache ();
2799}
2800
2801/**
2802 * Close the swapout worker threads.
2803 */
2804static void
2805rtems_bdbuf_swapout_workers_close (void)
2806{
2807  rtems_chain_node* node;
2808 
2809  rtems_bdbuf_lock_cache ();
2810 
2811  node = rtems_chain_first (&bdbuf_cache.swapout_workers);
2812  while (!rtems_chain_is_tail (&bdbuf_cache.swapout_workers, node))
2813  {
2814    rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) node;
2815    worker->enabled = false;
2816    rtems_event_send (worker->id, RTEMS_BDBUF_SWAPOUT_SYNC);
2817    node = rtems_chain_next (node);
2818  }
2819 
2820  rtems_bdbuf_unlock_cache ();
2821}
2822
2823/**
2824 * Body of task which takes care on flushing modified buffers to the disk.
2825 *
2826 * @param arg A pointer to the global cache data. Use the global variable and
2827 *            not this.
2828 * @return rtems_task Not used.
2829 */
2830static rtems_task
2831rtems_bdbuf_swapout_task (rtems_task_argument arg)
2832{
2833  rtems_bdbuf_swapout_transfer transfer;
2834  uint32_t                     period_in_ticks;
2835  const uint32_t               period_in_msecs = bdbuf_config.swapout_period;;
2836  uint32_t                     timer_delta;
2837
2838  transfer.write_req = rtems_bdbuf_swapout_writereq_alloc ();
2839  rtems_chain_initialize_empty (&transfer.bds);
2840  transfer.dev = BDBUF_INVALID_DEV;
2841
2842  /*
2843   * Localise the period.
2844   */
2845  period_in_ticks = RTEMS_MICROSECONDS_TO_TICKS (period_in_msecs * 1000);
2846
2847  /*
2848   * This is temporary. Needs to be changed to use the real time clock.
2849   */
2850  timer_delta = period_in_msecs;
2851
2852  /*
2853   * Create the worker threads.
2854   */
2855  rtems_bdbuf_swapout_workers_open ();
2856 
2857  while (bdbuf_cache.swapout_enabled)
2858  {
2859    rtems_event_set   out;
2860    rtems_status_code sc;
2861
2862    /*
2863     * Only update the timers once in the processing cycle.
2864     */
2865    bool update_timers = true;
2866   
2867    /*
2868     * If we write buffers to any disk perform a check again. We only write a
2869     * single device at a time and the cache may have more than one device's
2870     * buffers modified waiting to be written.
2871     */
2872    bool transfered_buffers;
2873
2874    do
2875    {
2876      transfered_buffers = false;
2877
2878      /*
2879       * Extact all the buffers we find for a specific device. The device is
2880       * the first one we find on a modified list. Process the sync queue of
2881       * buffers first.
2882       */
2883      if (rtems_bdbuf_swapout_processing (timer_delta,
2884                                          update_timers,
2885                                          &transfer))
2886      {
2887        transfered_buffers = true;
2888      }
2889     
2890      /*
2891       * Only update the timers once.
2892       */
2893      update_timers = false;
2894    }
2895    while (transfered_buffers);
2896
2897    sc = rtems_event_receive (RTEMS_BDBUF_SWAPOUT_SYNC,
2898                              RTEMS_EVENT_ALL | RTEMS_WAIT,
2899                              period_in_ticks,
2900                              &out);
2901
2902    if ((sc != RTEMS_SUCCESSFUL) && (sc != RTEMS_TIMEOUT))
2903      rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
2904  }
2905
2906  rtems_bdbuf_swapout_workers_close ();
2907 
2908  free (transfer.write_req);
2909
2910  rtems_task_delete (RTEMS_SELF);
2911}
Note: See TracBrowser for help on using the repository browser.