source: rtems/cpukit/libblock/src/bdbuf.c @ 5c587596

4.104.11
Last change on this file since 5c587596 was 5c587596, checked in by Thomas Doerfler <Thomas.Doerfler@…>, on Jan 19, 2010 at 9:10:03 AM

libblock API update

  • Property mode set to 100644
File size: 76.4 KB
Line 
1/**
2 * @file
3 *
4 * @ingroup rtems_bdbuf
5 *
6 * Block device buffer management.
7 */
8
9/*
10 * Disk I/O buffering
11 * Buffer managment
12 *
13 * Copyright (C) 2001 OKTET Ltd., St.-Peterburg, Russia
14 * Author: Andrey G. Ivanov <Andrey.Ivanov@oktet.ru>
15 *         Victor V. Vengerov <vvv@oktet.ru>
16 *         Alexander Kukuta <kam@oktet.ru>
17 *
18 * Copyright (C) 2008,2009 Chris Johns <chrisj@rtems.org>
19 *    Rewritten to remove score mutex access. Fixes many performance
20 *    issues.
21 *
22 * Copyright (c) 2009 embedded brains GmbH.
23 *
24 * @(#) bdbuf.c,v 1.14 2004/04/17 08:15:17 ralf Exp
25 */
26
27/**
28 * Set to 1 to enable debug tracing.
29 */
30#define RTEMS_BDBUF_TRACE 0
31
32#if HAVE_CONFIG_H
33#include "config.h"
34#endif
35#include <limits.h>
36#include <errno.h>
37#include <assert.h>
38#include <stdio.h>
39#include <string.h>
40#include <inttypes.h>
41
42#include <rtems.h>
43#include <rtems/error.h>
44#include <rtems/malloc.h>
45
46#include "rtems/bdbuf.h"
47
48#define BDBUF_INVALID_DEV ((dev_t) -1)
49
50/*
51 * Simpler label for this file.
52 */
53#define bdbuf_config rtems_bdbuf_configuration
54
55/**
56 * A swapout transfer transaction data. This data is passed to a worked thread
57 * to handle the write phase of the transfer.
58 */
59typedef struct rtems_bdbuf_swapout_transfer
60{
61  rtems_chain_control   bds;         /**< The transfer list of BDs. */
62  dev_t                 dev;         /**< The device the transfer is for. */
63  rtems_blkdev_request* write_req;   /**< The write request array. */
64  uint32_t              bufs_per_bd; /**< Number of buffers per bd. */
65} rtems_bdbuf_swapout_transfer;
66
67/**
68 * Swapout worker thread. These are available to take processing from the
69 * main swapout thread and handle the I/O operation.
70 */
71typedef struct rtems_bdbuf_swapout_worker
72{
73  rtems_chain_node             link;     /**< The threads sit on a chain when
74                                          * idle. */
75  rtems_id                     id;       /**< The id of the task so we can wake
76                                          * it. */
77  volatile bool                enabled;  /**< The worked is enabled. */
78  rtems_bdbuf_swapout_transfer transfer; /**< The transfer data for this
79                                          * thread. */
80} rtems_bdbuf_swapout_worker;
81
82/**
83 * Buffer waiters synchronization.
84 */
85typedef struct rtems_bdbuf_waiters {
86  volatile unsigned count;
87  rtems_id sema;
88} rtems_bdbuf_waiters;
89
90/**
91 * The BD buffer cache.
92 */
93typedef struct rtems_bdbuf_cache
94{
95  rtems_id            swapout;           /**< Swapout task ID */
96  volatile bool       swapout_enabled;   /**< Swapout is only running if
97                                          * enabled. Set to false to kill the
98                                          * swap out task. It deletes itself. */
99  rtems_chain_control swapout_workers;   /**< The work threads for the swapout
100                                          * task. */
101
102  rtems_bdbuf_buffer* bds;               /**< Pointer to table of buffer
103                                          * descriptors. */
104  void*               buffers;           /**< The buffer's memory. */
105  size_t              buffer_min_count;  /**< Number of minimum size buffers
106                                          * that fit the buffer memory. */
107  size_t              max_bds_per_group; /**< The number of BDs of minimum
108                                          * buffer size that fit in a group. */
109  uint32_t            flags;             /**< Configuration flags. */
110
111  rtems_id            lock;              /**< The cache lock. It locks all
112                                          * cache data, BD and lists. */
113  rtems_id            sync_lock;         /**< Sync calls block writes. */
114  volatile bool       sync_active;       /**< True if a sync is active. */
115  volatile rtems_id   sync_requester;    /**< The sync requester. */
116  volatile dev_t      sync_device;       /**< The device to sync and
117                                          * BDBUF_INVALID_DEV not a device
118                                          * sync. */
119
120  rtems_bdbuf_buffer* tree;              /**< Buffer descriptor lookup AVL tree
121                                          * root. There is only one. */
122  rtems_chain_control lru;               /**< Least recently used list */
123  rtems_chain_control modified;          /**< Modified buffers list */
124  rtems_chain_control sync;              /**< Buffers to sync list */
125
126  rtems_bdbuf_waiters access_waiters;    /**< Wait for a buffer in
127                                          * ACCESS_CACHED, ACCESS_MODIFIED or
128                                          * ACCESS_EMPTY
129                                          * state. */
130  rtems_bdbuf_waiters transfer_waiters;  /**< Wait for a buffer in TRANSFER
131                                          * state. */
132  rtems_bdbuf_waiters buffer_waiters;    /**< Wait for a buffer and no one is
133                                          * available. */
134
135  size_t              group_count;       /**< The number of groups. */
136  rtems_bdbuf_group*  groups;            /**< The groups. */
137
138  bool                initialised;       /**< Initialised state. */
139} rtems_bdbuf_cache;
140
141/**
142 * Fatal errors
143 */
144#define RTEMS_BLKDEV_FATAL_ERROR(n) \
145  (((uint32_t)'B' << 24) | ((uint32_t)(n) & (uint32_t)0x00FFFFFF))
146
147#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_4       RTEMS_BLKDEV_FATAL_ERROR(2)
148#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_5       RTEMS_BLKDEV_FATAL_ERROR(3)
149#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_6       RTEMS_BLKDEV_FATAL_ERROR(4)
150#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_7       RTEMS_BLKDEV_FATAL_ERROR(5)
151#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_8       RTEMS_BLKDEV_FATAL_ERROR(6)
152#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_9       RTEMS_BLKDEV_FATAL_ERROR(7)
153#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_10      RTEMS_BLKDEV_FATAL_ERROR(8)
154#define RTEMS_BLKDEV_FATAL_BDBUF_TREE_RM       RTEMS_BLKDEV_FATAL_ERROR(9)
155#define RTEMS_BLKDEV_FATAL_BDBUF_SWAPOUT       RTEMS_BLKDEV_FATAL_ERROR(10)
156#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK     RTEMS_BLKDEV_FATAL_ERROR(11)
157#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK   RTEMS_BLKDEV_FATAL_ERROR(12)
158#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_LOCK    RTEMS_BLKDEV_FATAL_ERROR(13)
159#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_UNLOCK  RTEMS_BLKDEV_FATAL_ERROR(14)
160#define RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_DIS   RTEMS_BLKDEV_FATAL_ERROR(15)
161#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_2  RTEMS_BLKDEV_FATAL_ERROR(16)
162#define RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_RST   RTEMS_BLKDEV_FATAL_ERROR(17)
163#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_TO RTEMS_BLKDEV_FATAL_ERROR(18)
164#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAKE    RTEMS_BLKDEV_FATAL_ERROR(19)
165#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE       RTEMS_BLKDEV_FATAL_ERROR(20)
166#define RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM      RTEMS_BLKDEV_FATAL_ERROR(21)
167#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_CREATE  RTEMS_BLKDEV_FATAL_ERROR(22)
168#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_START   RTEMS_BLKDEV_FATAL_ERROR(23)
169#define BLKDEV_FATAL_BDBUF_SWAPOUT_RE          RTEMS_BLKDEV_FATAL_ERROR(24)
170#define BLKDEV_FATAL_BDBUF_SWAPOUT_TS          RTEMS_BLKDEV_FATAL_ERROR(25)
171#define RTEMS_BLKDEV_FATAL_BDBUF_WAIT_EVNT     RTEMS_BLKDEV_FATAL_ERROR(26)
172#define RTEMS_BLKDEV_FATAL_BDBUF_RECYCLE       RTEMS_BLKDEV_FATAL_ERROR(27)
173#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_0       RTEMS_BLKDEV_FATAL_ERROR(28)
174#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_1       RTEMS_BLKDEV_FATAL_ERROR(29)
175#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_2       RTEMS_BLKDEV_FATAL_ERROR(30)
176#define RTEMS_BLKDEV_FATAL_BDBUF_DISK_REL      RTEMS_BLKDEV_FATAL_ERROR(31)
177
178/**
179 * The events used in this code. These should be system events rather than
180 * application events.
181 */
182#define RTEMS_BDBUF_TRANSFER_SYNC  RTEMS_EVENT_1
183#define RTEMS_BDBUF_SWAPOUT_SYNC   RTEMS_EVENT_2
184
185/**
186 * The swap out task size. Should be more than enough for most drivers with
187 * tracing turned on.
188 */
189#define SWAPOUT_TASK_STACK_SIZE (8 * 1024)
190
191/**
192 * Lock semaphore attributes. This is used for locking type mutexes.
193 *
194 * @warning Priority inheritance is on.
195 */
196#define RTEMS_BDBUF_CACHE_LOCK_ATTRIBS \
197  (RTEMS_PRIORITY | RTEMS_BINARY_SEMAPHORE | \
198   RTEMS_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
199
200/**
201 * Waiter semaphore attributes.
202 *
203 * @warning Do not configure as inherit priority. If a driver is in the driver
204 *          initialisation table this locked semaphore will have the IDLE task
205 *          as the holder and a blocking task will raise the priority of the
206 *          IDLE task which can cause unsual side effects.
207 */
208#define RTEMS_BDBUF_CACHE_WAITER_ATTRIBS \
209  (RTEMS_PRIORITY | RTEMS_SIMPLE_BINARY_SEMAPHORE | \
210   RTEMS_NO_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
211
212/**
213 * Waiter timeout. Set to non-zero to find some info on a waiter that is
214 * waiting too long.
215 */
216#define RTEMS_BDBUF_WAIT_TIMEOUT RTEMS_NO_TIMEOUT
217#if !defined (RTEMS_BDBUF_WAIT_TIMEOUT)
218#define RTEMS_BDBUF_WAIT_TIMEOUT \
219  (TOD_MICROSECONDS_TO_TICKS (20000000))
220#endif
221
222/*
223 * The swap out task.
224 */
225static rtems_task rtems_bdbuf_swapout_task(rtems_task_argument arg);
226
227/**
228 * The Buffer Descriptor cache.
229 */
230static rtems_bdbuf_cache bdbuf_cache;
231
232#if RTEMS_BDBUF_TRACE
233/**
234 * If true output the trace message.
235 */
236bool rtems_bdbuf_tracer;
237
238/**
239 * Return the number of items on the list.
240 *
241 * @param list The chain control.
242 * @return uint32_t The number of items on the list.
243 */
244uint32_t
245rtems_bdbuf_list_count (rtems_chain_control* list)
246{
247  rtems_chain_node* node = rtems_chain_first (list);
248  uint32_t          count = 0;
249  while (!rtems_chain_is_tail (list, node))
250  {
251    count++;
252    node = rtems_chain_next (node);
253  }
254  return count;
255}
256
257/**
258 * Show the usage for the bdbuf cache.
259 */
260void
261rtems_bdbuf_show_usage (void)
262{
263  uint32_t group;
264  uint32_t total = 0;
265  uint32_t val;
266
267  for (group = 0; group < bdbuf_cache.group_count; group++)
268    total += bdbuf_cache.groups[group].users;
269  printf ("bdbuf:group users=%lu", total);
270  val = rtems_bdbuf_list_count (&bdbuf_cache.lru);
271  printf (", lru=%lu", val);
272  total = val;
273  val = rtems_bdbuf_list_count (&bdbuf_cache.modified);
274  printf (", mod=%lu", val);
275  total += val;
276  val = rtems_bdbuf_list_count (&bdbuf_cache.sync);
277  printf (", sync=%lu", val);
278  total += val;
279  printf (", total=%lu\n", total);
280}
281
282/**
283 * Show the users for a group of a bd.
284 *
285 * @param where A label to show the context of output.
286 * @param bd The bd to show the users of.
287 */
288void
289rtems_bdbuf_show_users (const char* where, rtems_bdbuf_buffer* bd)
290{
291  const char* states[] =
292    { "EM", "FR", "CH", "AC", "AM", "MD", "SY", "TR" };
293
294  printf ("bdbuf:users: %15s: [%" PRIu32 " (%s)] %td:%td = %" PRIu32 " %s\n",
295          where,
296          bd->block, states[bd->state],
297          bd->group - bdbuf_cache.groups,
298          bd - bdbuf_cache.bds,
299          bd->group->users,
300          bd->group->users > 8 ? "<<<<<<<" : "");
301}
302#else
303#define rtems_bdbuf_tracer (0)
304#define rtems_bdbuf_show_usage() ((void) 0)
305#define rtems_bdbuf_show_users(_w, _b) ((void) 0)
306#endif
307
308/**
309 * The default maximum height of 32 allows for AVL trees having between
310 * 5,704,880 and 4,294,967,295 nodes, depending on order of insertion.  You may
311 * change this compile-time constant as you wish.
312 */
313#ifndef RTEMS_BDBUF_AVL_MAX_HEIGHT
314#define RTEMS_BDBUF_AVL_MAX_HEIGHT (32)
315#endif
316
317static void
318rtems_bdbuf_fatal (rtems_bdbuf_buf_state state, uint32_t error)
319{
320  rtems_fatal_error_occurred ((((uint32_t) state) << 16) | error);
321}
322
323/**
324 * Searches for the node with specified dev/block.
325 *
326 * @param root pointer to the root node of the AVL-Tree
327 * @param dev device search key
328 * @param block block search key
329 * @retval NULL node with the specified dev/block is not found
330 * @return pointer to the node with specified dev/block
331 */
332static rtems_bdbuf_buffer *
333rtems_bdbuf_avl_search (rtems_bdbuf_buffer** root,
334                        dev_t                dev,
335                        rtems_blkdev_bnum    block)
336{
337  rtems_bdbuf_buffer* p = *root;
338
339  while ((p != NULL) && ((p->dev != dev) || (p->block != block)))
340  {
341    if ((p->dev < dev) || ((p->dev == dev) && (p->block < block)))
342    {
343      p = p->avl.right;
344    }
345    else
346    {
347      p = p->avl.left;
348    }
349  }
350
351  return p;
352}
353
354/**
355 * Inserts the specified node to the AVl-Tree.
356 *
357 * @param root pointer to the root node of the AVL-Tree
358 * @param node Pointer to the node to add.
359 * @retval 0 The node added successfully
360 * @retval -1 An error occured
361 */
362static int
363rtems_bdbuf_avl_insert(rtems_bdbuf_buffer** root,
364                       rtems_bdbuf_buffer*  node)
365{
366  dev_t             dev = node->dev;
367  rtems_blkdev_bnum block = node->block;
368
369  rtems_bdbuf_buffer*  p = *root;
370  rtems_bdbuf_buffer*  q;
371  rtems_bdbuf_buffer*  p1;
372  rtems_bdbuf_buffer*  p2;
373  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
374  rtems_bdbuf_buffer** buf_prev = buf_stack;
375
376  bool modified = false;
377
378  if (p == NULL)
379  {
380    *root = node;
381    node->avl.left = NULL;
382    node->avl.right = NULL;
383    node->avl.bal = 0;
384    return 0;
385  }
386
387  while (p != NULL)
388  {
389    *buf_prev++ = p;
390
391    if ((p->dev < dev) || ((p->dev == dev) && (p->block < block)))
392    {
393      p->avl.cache = 1;
394      q = p->avl.right;
395      if (q == NULL)
396      {
397        q = node;
398        p->avl.right = q = node;
399        break;
400      }
401    }
402    else if ((p->dev != dev) || (p->block != block))
403    {
404      p->avl.cache = -1;
405      q = p->avl.left;
406      if (q == NULL)
407      {
408        q = node;
409        p->avl.left = q;
410        break;
411      }
412    }
413    else
414    {
415      return -1;
416    }
417
418    p = q;
419  }
420
421  q->avl.left = q->avl.right = NULL;
422  q->avl.bal = 0;
423  modified = true;
424  buf_prev--;
425
426  while (modified)
427  {
428    if (p->avl.cache == -1)
429    {
430      switch (p->avl.bal)
431      {
432        case 1:
433          p->avl.bal = 0;
434          modified = false;
435          break;
436
437        case 0:
438          p->avl.bal = -1;
439          break;
440
441        case -1:
442          p1 = p->avl.left;
443          if (p1->avl.bal == -1) /* simple LL-turn */
444          {
445            p->avl.left = p1->avl.right;
446            p1->avl.right = p;
447            p->avl.bal = 0;
448            p = p1;
449          }
450          else /* double LR-turn */
451          {
452            p2 = p1->avl.right;
453            p1->avl.right = p2->avl.left;
454            p2->avl.left = p1;
455            p->avl.left = p2->avl.right;
456            p2->avl.right = p;
457            if (p2->avl.bal == -1) p->avl.bal = +1; else p->avl.bal = 0;
458            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
459            p = p2;
460          }
461          p->avl.bal = 0;
462          modified = false;
463          break;
464
465        default:
466          break;
467      }
468    }
469    else
470    {
471      switch (p->avl.bal)
472      {
473        case -1:
474          p->avl.bal = 0;
475          modified = false;
476          break;
477
478        case 0:
479          p->avl.bal = 1;
480          break;
481
482        case 1:
483          p1 = p->avl.right;
484          if (p1->avl.bal == 1) /* simple RR-turn */
485          {
486            p->avl.right = p1->avl.left;
487            p1->avl.left = p;
488            p->avl.bal = 0;
489            p = p1;
490          }
491          else /* double RL-turn */
492          {
493            p2 = p1->avl.left;
494            p1->avl.left = p2->avl.right;
495            p2->avl.right = p1;
496            p->avl.right = p2->avl.left;
497            p2->avl.left = p;
498            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
499            if (p2->avl.bal == -1) p1->avl.bal = +1; else p1->avl.bal = 0;
500            p = p2;
501          }
502          p->avl.bal = 0;
503          modified = false;
504          break;
505
506        default:
507          break;
508      }
509    }
510    q = p;
511    if (buf_prev > buf_stack)
512    {
513      p = *--buf_prev;
514
515      if (p->avl.cache == -1)
516      {
517        p->avl.left = q;
518      }
519      else
520      {
521        p->avl.right = q;
522      }
523    }
524    else
525    {
526      *root = p;
527      break;
528    }
529  };
530
531  return 0;
532}
533
534
535/**
536 * Removes the node from the tree.
537 *
538 * @param root Pointer to pointer to the root node
539 * @param node Pointer to the node to remove
540 * @retval 0 Item removed
541 * @retval -1 No such item found
542 */
543static int
544rtems_bdbuf_avl_remove(rtems_bdbuf_buffer**      root,
545                       const rtems_bdbuf_buffer* node)
546{
547  dev_t             dev = node->dev;
548  rtems_blkdev_bnum block = node->block;
549
550  rtems_bdbuf_buffer*  p = *root;
551  rtems_bdbuf_buffer*  q;
552  rtems_bdbuf_buffer*  r;
553  rtems_bdbuf_buffer*  s;
554  rtems_bdbuf_buffer*  p1;
555  rtems_bdbuf_buffer*  p2;
556  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
557  rtems_bdbuf_buffer** buf_prev = buf_stack;
558
559  bool modified = false;
560
561  memset (buf_stack, 0, sizeof(buf_stack));
562
563  while (p != NULL)
564  {
565    *buf_prev++ = p;
566
567    if ((p->dev < dev) || ((p->dev == dev) && (p->block < block)))
568    {
569      p->avl.cache = 1;
570      p = p->avl.right;
571    }
572    else if ((p->dev != dev) || (p->block != block))
573    {
574      p->avl.cache = -1;
575      p = p->avl.left;
576    }
577    else
578    {
579      /* node found */
580      break;
581    }
582  }
583
584  if (p == NULL)
585  {
586    /* there is no such node */
587    return -1;
588  }
589
590  q = p;
591
592  buf_prev--;
593  if (buf_prev > buf_stack)
594  {
595    p = *(buf_prev - 1);
596  }
597  else
598  {
599    p = NULL;
600  }
601
602  /* at this moment q - is a node to delete, p is q's parent */
603  if (q->avl.right == NULL)
604  {
605    r = q->avl.left;
606    if (r != NULL)
607    {
608      r->avl.bal = 0;
609    }
610    q = r;
611  }
612  else
613  {
614    rtems_bdbuf_buffer **t;
615
616    r = q->avl.right;
617
618    if (r->avl.left == NULL)
619    {
620      r->avl.left = q->avl.left;
621      r->avl.bal = q->avl.bal;
622      r->avl.cache = 1;
623      *buf_prev++ = q = r;
624    }
625    else
626    {
627      t = buf_prev++;
628      s = r;
629
630      while (s->avl.left != NULL)
631      {
632        *buf_prev++ = r = s;
633        s = r->avl.left;
634        r->avl.cache = -1;
635      }
636
637      s->avl.left = q->avl.left;
638      r->avl.left = s->avl.right;
639      s->avl.right = q->avl.right;
640      s->avl.bal = q->avl.bal;
641      s->avl.cache = 1;
642
643      *t = q = s;
644    }
645  }
646
647  if (p != NULL)
648  {
649    if (p->avl.cache == -1)
650    {
651      p->avl.left = q;
652    }
653    else
654    {
655      p->avl.right = q;
656    }
657  }
658  else
659  {
660    *root = q;
661  }
662
663  modified = true;
664
665  while (modified)
666  {
667    if (buf_prev > buf_stack)
668    {
669      p = *--buf_prev;
670    }
671    else
672    {
673      break;
674    }
675
676    if (p->avl.cache == -1)
677    {
678      /* rebalance left branch */
679      switch (p->avl.bal)
680      {
681        case -1:
682          p->avl.bal = 0;
683          break;
684        case  0:
685          p->avl.bal = 1;
686          modified = false;
687          break;
688
689        case +1:
690          p1 = p->avl.right;
691
692          if (p1->avl.bal >= 0) /* simple RR-turn */
693          {
694            p->avl.right = p1->avl.left;
695            p1->avl.left = p;
696
697            if (p1->avl.bal == 0)
698            {
699              p1->avl.bal = -1;
700              modified = false;
701            }
702            else
703            {
704              p->avl.bal = 0;
705              p1->avl.bal = 0;
706            }
707            p = p1;
708          }
709          else /* double RL-turn */
710          {
711            p2 = p1->avl.left;
712
713            p1->avl.left = p2->avl.right;
714            p2->avl.right = p1;
715            p->avl.right = p2->avl.left;
716            p2->avl.left = p;
717
718            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
719            if (p2->avl.bal == -1) p1->avl.bal = 1; else p1->avl.bal = 0;
720
721            p = p2;
722            p2->avl.bal = 0;
723          }
724          break;
725
726        default:
727          break;
728      }
729    }
730    else
731    {
732      /* rebalance right branch */
733      switch (p->avl.bal)
734      {
735        case +1:
736          p->avl.bal = 0;
737          break;
738
739        case  0:
740          p->avl.bal = -1;
741          modified = false;
742          break;
743
744        case -1:
745          p1 = p->avl.left;
746
747          if (p1->avl.bal <= 0) /* simple LL-turn */
748          {
749            p->avl.left = p1->avl.right;
750            p1->avl.right = p;
751            if (p1->avl.bal == 0)
752            {
753              p1->avl.bal = 1;
754              modified = false;
755            }
756            else
757            {
758              p->avl.bal = 0;
759              p1->avl.bal = 0;
760            }
761            p = p1;
762          }
763          else /* double LR-turn */
764          {
765            p2 = p1->avl.right;
766
767            p1->avl.right = p2->avl.left;
768            p2->avl.left = p1;
769            p->avl.left = p2->avl.right;
770            p2->avl.right = p;
771
772            if (p2->avl.bal == -1) p->avl.bal = 1; else p->avl.bal = 0;
773            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
774
775            p = p2;
776            p2->avl.bal = 0;
777          }
778          break;
779
780        default:
781          break;
782      }
783    }
784
785    if (buf_prev > buf_stack)
786    {
787      q = *(buf_prev - 1);
788
789      if (q->avl.cache == -1)
790      {
791        q->avl.left = p;
792      }
793      else
794      {
795        q->avl.right = p;
796      }
797    }
798    else
799    {
800      *root = p;
801      break;
802    }
803
804  }
805
806  return 0;
807}
808
809static void
810rtems_bdbuf_set_state (rtems_bdbuf_buffer *bd, rtems_bdbuf_buf_state state)
811{
812  bd->state = state;
813}
814
815/**
816 * Change the block number for the block size to the block number for the media
817 * block size. We have to use 64bit maths. There is no short cut here.
818 *
819 * @param block The logical block number in the block size terms.
820 * @param block_size The block size.
821 * @param media_block_size The block size of the media.
822 * @return rtems_blkdev_bnum The media block number.
823 */
824static rtems_blkdev_bnum
825rtems_bdbuf_media_block (rtems_blkdev_bnum block,
826                         size_t            block_size,
827                         size_t            media_block_size)
828{
829  return (rtems_blkdev_bnum)
830    ((((uint64_t) block) * block_size) / media_block_size);
831}
832
833/**
834 * Lock the mutex. A single task can nest calls.
835 *
836 * @param lock The mutex to lock.
837 * @param fatal_error_code The error code if the call fails.
838 */
839static void
840rtems_bdbuf_lock (rtems_id lock, uint32_t fatal_error_code)
841{
842  rtems_status_code sc = rtems_semaphore_obtain (lock,
843                                                 RTEMS_WAIT,
844                                                 RTEMS_NO_TIMEOUT);
845  if (sc != RTEMS_SUCCESSFUL)
846    rtems_fatal_error_occurred (fatal_error_code);
847}
848
849/**
850 * Unlock the mutex.
851 *
852 * @param lock The mutex to unlock.
853 * @param fatal_error_code The error code if the call fails.
854 */
855static void
856rtems_bdbuf_unlock (rtems_id lock, uint32_t fatal_error_code)
857{
858  rtems_status_code sc = rtems_semaphore_release (lock);
859  if (sc != RTEMS_SUCCESSFUL)
860    rtems_fatal_error_occurred (fatal_error_code);
861}
862
863/**
864 * Lock the cache. A single task can nest calls.
865 */
866static void
867rtems_bdbuf_lock_cache (void)
868{
869  rtems_bdbuf_lock (bdbuf_cache.lock, RTEMS_BLKDEV_FATAL_BDBUF_CACHE_LOCK);
870}
871
872/**
873 * Unlock the cache.
874 */
875static void
876rtems_bdbuf_unlock_cache (void)
877{
878  rtems_bdbuf_unlock (bdbuf_cache.lock, RTEMS_BLKDEV_FATAL_BDBUF_CACHE_UNLOCK);
879}
880
881/**
882 * Lock the cache's sync. A single task can nest calls.
883 */
884static void
885rtems_bdbuf_lock_sync (void)
886{
887  rtems_bdbuf_lock (bdbuf_cache.sync_lock, RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK);
888}
889
890/**
891 * Unlock the cache's sync lock. Any blocked writers are woken.
892 */
893static void
894rtems_bdbuf_unlock_sync (void)
895{
896  rtems_bdbuf_unlock (bdbuf_cache.sync_lock,
897                      RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK);
898}
899
900static void
901rtems_bdbuf_group_obtain (rtems_bdbuf_buffer *bd)
902{
903  ++bd->group->users;
904}
905
906static void
907rtems_bdbuf_group_release (rtems_bdbuf_buffer *bd)
908{
909  --bd->group->users;
910}
911
912static rtems_mode
913rtems_bdbuf_disable_preemption (void)
914{
915  rtems_status_code sc = RTEMS_SUCCESSFUL;
916  rtems_mode prev_mode = 0;
917
918  sc = rtems_task_mode (RTEMS_NO_PREEMPT, RTEMS_PREEMPT_MASK, &prev_mode);
919  if (sc != RTEMS_SUCCESSFUL)
920    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_DIS);
921
922  return prev_mode;
923}
924
925static void
926rtems_bdbuf_restore_preemption (rtems_mode prev_mode)
927{
928  rtems_status_code sc = RTEMS_SUCCESSFUL;
929
930  sc = rtems_task_mode (prev_mode, RTEMS_ALL_MODE_MASKS, &prev_mode);
931  if (sc != RTEMS_SUCCESSFUL)
932    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_RST);
933}
934
935/**
936 * Wait until woken. Semaphores are used so a number of tasks can wait and can
937 * be woken at once. Task events would require we maintain a list of tasks to
938 * be woken and this would require storage and we do not know the number of
939 * tasks that could be waiting.
940 *
941 * While we have the cache locked we can try and claim the semaphore and
942 * therefore know when we release the lock to the cache we will block until the
943 * semaphore is released. This may even happen before we get to block.
944 *
945 * A counter is used to save the release call when no one is waiting.
946 *
947 * The function assumes the cache is locked on entry and it will be locked on
948 * exit.
949 */
950static void
951rtems_bdbuf_anonymous_wait (rtems_bdbuf_waiters *waiters)
952{
953  rtems_status_code sc;
954  rtems_mode        prev_mode;
955
956  /*
957   * Indicate we are waiting.
958   */
959  ++waiters->count;
960
961  /*
962   * Disable preemption then unlock the cache and block.  There is no POSIX
963   * condition variable in the core API so this is a work around.
964   *
965   * The issue is a task could preempt after the cache is unlocked because it is
966   * blocking or just hits that window, and before this task has blocked on the
967   * semaphore. If the preempting task flushes the queue this task will not see
968   * the flush and may block for ever or until another transaction flushes this
969   * semaphore.
970   */
971  prev_mode = rtems_bdbuf_disable_preemption ();
972
973  /*
974   * Unlock the cache, wait, and lock the cache when we return.
975   */
976  rtems_bdbuf_unlock_cache ();
977
978  sc = rtems_semaphore_obtain (waiters->sema, RTEMS_WAIT, RTEMS_BDBUF_WAIT_TIMEOUT);
979
980  if (sc == RTEMS_TIMEOUT)
981    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_TO);
982
983  if (sc != RTEMS_UNSATISFIED)
984    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_2);
985
986  rtems_bdbuf_lock_cache ();
987
988  rtems_bdbuf_restore_preemption (prev_mode);
989
990  --waiters->count;
991}
992
993static void
994rtems_bdbuf_wait (rtems_bdbuf_buffer *bd, rtems_bdbuf_waiters *waiters)
995{
996  rtems_bdbuf_group_obtain (bd);
997  ++bd->waiters;
998  rtems_bdbuf_anonymous_wait (waiters);
999  --bd->waiters;
1000  rtems_bdbuf_group_release (bd);
1001}
1002
1003/**
1004 * Wake a blocked resource. The resource has a counter that lets us know if
1005 * there are any waiters.
1006 */
1007static void
1008rtems_bdbuf_wake (const rtems_bdbuf_waiters *waiters)
1009{
1010  rtems_status_code sc = RTEMS_SUCCESSFUL;
1011
1012  if (waiters->count > 0)
1013  {
1014    sc = rtems_semaphore_flush (waiters->sema);
1015    if (sc != RTEMS_SUCCESSFUL)
1016      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAKE);
1017  }
1018}
1019
1020static void
1021rtems_bdbuf_wake_swapper (void)
1022{
1023  rtems_status_code sc = rtems_event_send (bdbuf_cache.swapout,
1024                                           RTEMS_BDBUF_SWAPOUT_SYNC);
1025  if (sc != RTEMS_SUCCESSFUL)
1026    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE);
1027}
1028
1029static bool
1030rtems_bdbuf_has_buffer_waiters (void)
1031{
1032  return bdbuf_cache.buffer_waiters.count;
1033}
1034
1035static void
1036rtems_bdbuf_add_to_modified_list_after_access (rtems_bdbuf_buffer *bd)
1037{
1038  if (bdbuf_cache.sync_active && bdbuf_cache.sync_device == bd->dev)
1039  {
1040    rtems_bdbuf_unlock_cache ();
1041
1042    /*
1043     * Wait for the sync lock.
1044     */
1045    rtems_bdbuf_lock_sync ();
1046
1047    rtems_bdbuf_unlock_sync ();
1048    rtems_bdbuf_lock_cache ();
1049  }
1050
1051  /*
1052   * Only the first modified release sets the timer and any further user
1053   * accesses do not change the timer value which should move down. This
1054   * assumes the user's hold of the buffer is much less than the time on the
1055   * modified list. Resetting the timer on each access which could result in a
1056   * buffer never getting to 0 and never being forced onto disk. This raises a
1057   * difficult question. Is a snapshot of a block that is changing better than
1058   * nothing being written? We have tended to think we should hold changes for
1059   * only a specific period of time even if still changing and get onto disk
1060   * and letting the file system try and recover this position if it can.
1061   */
1062  if (bd->state == RTEMS_BDBUF_STATE_ACCESS_CACHED
1063        || bd->state == RTEMS_BDBUF_STATE_ACCESS_EMPTY)
1064    bd->hold_timer = bdbuf_config.swap_block_hold;
1065
1066  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_MODIFIED);
1067  rtems_chain_append (&bdbuf_cache.modified, &bd->link);
1068
1069  if (bd->waiters)
1070    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1071  else if (rtems_bdbuf_has_buffer_waiters ())
1072    rtems_bdbuf_wake_swapper ();
1073}
1074
1075static void
1076rtems_bdbuf_add_to_lru_list_after_access (rtems_bdbuf_buffer *bd)
1077{
1078  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_CACHED);
1079  rtems_chain_append (&bdbuf_cache.lru, &bd->link);
1080  rtems_bdbuf_group_release (bd);
1081
1082  if (bd->waiters)
1083    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1084  else
1085    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1086}
1087
1088static void
1089rtems_bdbuf_add_to_sync_list_after_access (rtems_bdbuf_buffer *bd)
1090{
1091  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
1092
1093  rtems_chain_append (&bdbuf_cache.sync, &bd->link);
1094
1095  if (bd->waiters)
1096    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1097}
1098
1099/**
1100 * Compute the number of BDs per group for a given buffer size.
1101 *
1102 * @param size The buffer size. It can be any size and we scale up.
1103 */
1104static size_t
1105rtems_bdbuf_bds_per_group (size_t size)
1106{
1107  size_t bufs_per_size;
1108  size_t bds_per_size;
1109
1110  if (size > bdbuf_config.buffer_max)
1111    return 0;
1112
1113  bufs_per_size = ((size - 1) / bdbuf_config.buffer_min) + 1;
1114
1115  for (bds_per_size = 1;
1116       bds_per_size < bufs_per_size;
1117       bds_per_size <<= 1)
1118    ;
1119
1120  return bdbuf_cache.max_bds_per_group / bds_per_size;
1121}
1122
1123static void
1124rtems_bdbuf_remove_from_tree (rtems_bdbuf_buffer *bd)
1125{
1126  if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
1127    rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_TREE_RM);
1128}
1129
1130static void
1131rtems_bdbuf_remove_from_tree_and_lru_list (rtems_bdbuf_buffer *bd)
1132{
1133  switch (bd->state)
1134  {
1135    case RTEMS_BDBUF_STATE_FREE:
1136      break;
1137    case RTEMS_BDBUF_STATE_CACHED:
1138      rtems_bdbuf_remove_from_tree (bd);
1139      break;
1140    default:
1141      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_10);
1142  }
1143
1144  rtems_chain_extract (&bd->link);
1145}
1146
1147static void
1148rtems_bdbuf_make_free_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
1149{
1150  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_FREE);
1151  rtems_chain_prepend (&bdbuf_cache.lru, &bd->link);
1152}
1153
1154static void
1155rtems_bdbuf_make_empty_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
1156{
1157  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_EMPTY);
1158  rtems_chain_append (&bdbuf_cache.lru, &bd->link);
1159}
1160
1161static void
1162rtems_bdbuf_release_empty_buffer (rtems_bdbuf_buffer *bd)
1163{
1164  rtems_bdbuf_group_release (bd);
1165
1166  if (bd->waiters)
1167  {
1168    rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_EMPTY);
1169    rtems_chain_append (&bdbuf_cache.lru, &bd->link);
1170    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1171  }
1172  else
1173  {
1174    rtems_bdbuf_remove_from_tree (bd);
1175    rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1176    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1177  }
1178}
1179
1180/**
1181 * Reallocate a group. The BDs currently allocated in the group are removed
1182 * from the ALV tree and any lists then the new BD's are prepended to the ready
1183 * list of the cache.
1184 *
1185 * @param group The group to reallocate.
1186 * @param new_bds_per_group The new count of BDs per group.
1187 * @return A buffer of this group.
1188 */
1189static rtems_bdbuf_buffer *
1190rtems_bdbuf_group_realloc (rtems_bdbuf_group* group, size_t new_bds_per_group)
1191{
1192  rtems_bdbuf_buffer* bd;
1193  size_t              b;
1194  size_t              bufs_per_bd;
1195
1196  if (rtems_bdbuf_tracer)
1197    printf ("bdbuf:realloc: %tu: %zd -> %zd\n",
1198            group - bdbuf_cache.groups, group->bds_per_group,
1199            new_bds_per_group);
1200
1201  bufs_per_bd = bdbuf_cache.max_bds_per_group / group->bds_per_group;
1202
1203  for (b = 0, bd = group->bdbuf;
1204       b < group->bds_per_group;
1205       b++, bd += bufs_per_bd)
1206    rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1207
1208  group->bds_per_group = new_bds_per_group;
1209  bufs_per_bd = bdbuf_cache.max_bds_per_group / new_bds_per_group;
1210
1211  for (b = 1, bd = group->bdbuf + bufs_per_bd;
1212       b < group->bds_per_group;
1213       b++, bd += bufs_per_bd)
1214    rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1215
1216  if (b > 1)
1217    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1218
1219  return group->bdbuf;
1220}
1221
1222static void
1223rtems_bdbuf_recycle_buffer (rtems_bdbuf_buffer *bd,
1224                            dev_t               dev,
1225                            rtems_blkdev_bnum   block)
1226{
1227  bd->dev       = dev;
1228  bd->block     = block;
1229  bd->avl.left  = NULL;
1230  bd->avl.right = NULL;
1231  bd->waiters   = 0;
1232
1233  if (rtems_bdbuf_avl_insert (&bdbuf_cache.tree, bd) != 0)
1234    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_RECYCLE);
1235
1236  rtems_bdbuf_make_empty_and_add_to_lru_list (bd);
1237}
1238
1239static rtems_bdbuf_buffer *
1240rtems_bdbuf_get_buffer_from_lru_list (dev_t             dev,
1241                                      rtems_blkdev_bnum block,
1242                                      size_t            bds_per_group)
1243{
1244  rtems_chain_node *node = rtems_chain_first (&bdbuf_cache.lru);
1245
1246  while (!rtems_chain_is_tail (&bdbuf_cache.lru, node))
1247  {
1248    rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
1249    rtems_bdbuf_buffer *recycle_bd = NULL;
1250
1251    if (rtems_bdbuf_tracer)
1252      printf ("bdbuf:next-bd: %tu (%td:%" PRId32 ") %zd -> %zd\n",
1253              bd - bdbuf_cache.bds,
1254              bd->group - bdbuf_cache.groups, bd->group->users,
1255              bd->group->bds_per_group, bds_per_group);
1256
1257    /*
1258     * If nobody waits for this BD, we may recycle it.
1259     */
1260    if (bd->waiters == 0)
1261    {
1262      if (bd->group->bds_per_group == bds_per_group)
1263      {
1264        rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1265
1266        recycle_bd = bd;
1267      }
1268      else if (bd->group->users == 0)
1269        recycle_bd = rtems_bdbuf_group_realloc (bd->group, bds_per_group);
1270    }
1271
1272    if (recycle_bd != NULL)
1273    {
1274      rtems_bdbuf_recycle_buffer (recycle_bd, dev, block);
1275
1276      return recycle_bd;
1277    }
1278
1279    node = rtems_chain_next (node);
1280  }
1281
1282  return NULL;
1283}
1284
1285/**
1286 * Initialise the cache.
1287 *
1288 * @return rtems_status_code The initialisation status.
1289 */
1290rtems_status_code
1291rtems_bdbuf_init (void)
1292{
1293  rtems_bdbuf_group*  group;
1294  rtems_bdbuf_buffer* bd;
1295  uint8_t*            buffer;
1296  size_t              b;
1297  size_t              cache_aligment;
1298  rtems_status_code   sc;
1299  rtems_mode          prev_mode;
1300
1301  if (rtems_bdbuf_tracer)
1302    printf ("bdbuf:init\n");
1303
1304  if (rtems_interrupt_is_in_progress())
1305    return RTEMS_CALLED_FROM_ISR;
1306
1307  /*
1308   * Check the configuration table values.
1309   */
1310  if ((bdbuf_config.buffer_max % bdbuf_config.buffer_min) != 0)
1311    return RTEMS_INVALID_NUMBER;
1312
1313  /*
1314   * We use a special variable to manage the initialisation incase we have
1315   * completing threads doing this. You may get errors if the another thread
1316   * makes a call and we have not finished initialisation.
1317   */
1318  prev_mode = rtems_bdbuf_disable_preemption ();
1319  if (bdbuf_cache.initialised)
1320  {
1321    rtems_bdbuf_restore_preemption (prev_mode);
1322
1323    return RTEMS_RESOURCE_IN_USE;
1324  }
1325  memset(&bdbuf_cache, 0, sizeof(bdbuf_cache));
1326  bdbuf_cache.initialised = true;
1327  rtems_bdbuf_restore_preemption (prev_mode);
1328
1329  /*
1330   * For unspecified cache alignments we use the CPU alignment.
1331   */
1332  cache_aligment = 32; /* FIXME rtems_cache_get_data_line_size() */
1333  if (cache_aligment <= 0)
1334    cache_aligment = CPU_ALIGNMENT;
1335
1336  bdbuf_cache.sync_device = BDBUF_INVALID_DEV;
1337
1338  rtems_chain_initialize_empty (&bdbuf_cache.swapout_workers);
1339  rtems_chain_initialize_empty (&bdbuf_cache.lru);
1340  rtems_chain_initialize_empty (&bdbuf_cache.modified);
1341  rtems_chain_initialize_empty (&bdbuf_cache.sync);
1342
1343  /*
1344   * Create the locks for the cache.
1345   */
1346  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'l'),
1347                               1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0,
1348                               &bdbuf_cache.lock);
1349  if (sc != RTEMS_SUCCESSFUL)
1350    goto error;
1351
1352  rtems_bdbuf_lock_cache ();
1353
1354  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 's'),
1355                               1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0,
1356                               &bdbuf_cache.sync_lock);
1357  if (sc != RTEMS_SUCCESSFUL)
1358    goto error;
1359
1360  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'a'),
1361                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1362                               &bdbuf_cache.access_waiters.sema);
1363  if (sc != RTEMS_SUCCESSFUL)
1364    goto error;
1365
1366  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 't'),
1367                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1368                               &bdbuf_cache.transfer_waiters.sema);
1369  if (sc != RTEMS_SUCCESSFUL)
1370    goto error;
1371
1372  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'b'),
1373                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1374                               &bdbuf_cache.buffer_waiters.sema);
1375  if (sc != RTEMS_SUCCESSFUL)
1376    goto error;
1377
1378  /*
1379   * Compute the various number of elements in the cache.
1380   */
1381  bdbuf_cache.buffer_min_count =
1382    bdbuf_config.size / bdbuf_config.buffer_min;
1383  bdbuf_cache.max_bds_per_group =
1384    bdbuf_config.buffer_max / bdbuf_config.buffer_min;
1385  bdbuf_cache.group_count =
1386    bdbuf_cache.buffer_min_count / bdbuf_cache.max_bds_per_group;
1387
1388  /*
1389   * Allocate the memory for the buffer descriptors.
1390   */
1391  bdbuf_cache.bds = calloc (sizeof (rtems_bdbuf_buffer),
1392                            bdbuf_cache.buffer_min_count);
1393  if (!bdbuf_cache.bds)
1394    goto error;
1395
1396  /*
1397   * Allocate the memory for the buffer descriptors.
1398   */
1399  bdbuf_cache.groups = calloc (sizeof (rtems_bdbuf_group),
1400                               bdbuf_cache.group_count);
1401  if (!bdbuf_cache.groups)
1402    goto error;
1403
1404  /*
1405   * Allocate memory for buffer memory. The buffer memory will be cache
1406   * aligned. It is possible to free the memory allocated by rtems_memalign()
1407   * with free(). Return 0 if allocated.
1408   *
1409   * The memory allocate allows a
1410   */
1411  if (rtems_memalign ((void **) &bdbuf_cache.buffers,
1412                      cache_aligment,
1413                      bdbuf_cache.buffer_min_count * bdbuf_config.buffer_min) != 0)
1414    goto error;
1415
1416  /*
1417   * The cache is empty after opening so we need to add all the buffers to it
1418   * and initialise the groups.
1419   */
1420  for (b = 0, group = bdbuf_cache.groups,
1421         bd = bdbuf_cache.bds, buffer = bdbuf_cache.buffers;
1422       b < bdbuf_cache.buffer_min_count;
1423       b++, bd++, buffer += bdbuf_config.buffer_min)
1424  {
1425    bd->dev    = BDBUF_INVALID_DEV;
1426    bd->group  = group;
1427    bd->buffer = buffer;
1428
1429    rtems_chain_append (&bdbuf_cache.lru, &bd->link);
1430
1431    if ((b % bdbuf_cache.max_bds_per_group) ==
1432        (bdbuf_cache.max_bds_per_group - 1))
1433      group++;
1434  }
1435
1436  for (b = 0,
1437         group = bdbuf_cache.groups,
1438         bd = bdbuf_cache.bds;
1439       b < bdbuf_cache.group_count;
1440       b++,
1441         group++,
1442         bd += bdbuf_cache.max_bds_per_group)
1443  {
1444    group->bds_per_group = bdbuf_cache.max_bds_per_group;
1445    group->bdbuf = bd;
1446  }
1447
1448  /*
1449   * Create and start swapout task. This task will create and manage the worker
1450   * threads.
1451   */
1452  bdbuf_cache.swapout_enabled = true;
1453
1454  sc = rtems_task_create (rtems_build_name('B', 'S', 'W', 'P'),
1455                          bdbuf_config.swapout_priority ?
1456                            bdbuf_config.swapout_priority :
1457                            RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT,
1458                          SWAPOUT_TASK_STACK_SIZE,
1459                          RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
1460                          RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT,
1461                          &bdbuf_cache.swapout);
1462  if (sc != RTEMS_SUCCESSFUL)
1463    goto error;
1464
1465  sc = rtems_task_start (bdbuf_cache.swapout,
1466                         rtems_bdbuf_swapout_task,
1467                         (rtems_task_argument) &bdbuf_cache);
1468  if (sc != RTEMS_SUCCESSFUL)
1469    goto error;
1470
1471  rtems_bdbuf_unlock_cache ();
1472
1473  return RTEMS_SUCCESSFUL;
1474
1475error:
1476
1477  if (bdbuf_cache.swapout != 0)
1478    rtems_task_delete (bdbuf_cache.swapout);
1479
1480  free (bdbuf_cache.buffers);
1481  free (bdbuf_cache.groups);
1482  free (bdbuf_cache.bds);
1483
1484  rtems_semaphore_delete (bdbuf_cache.buffer_waiters.sema);
1485  rtems_semaphore_delete (bdbuf_cache.access_waiters.sema);
1486  rtems_semaphore_delete (bdbuf_cache.transfer_waiters.sema);
1487  rtems_semaphore_delete (bdbuf_cache.sync_lock);
1488
1489  if (bdbuf_cache.lock != 0)
1490  {
1491    rtems_bdbuf_unlock_cache ();
1492    rtems_semaphore_delete (bdbuf_cache.lock);
1493  }
1494
1495  bdbuf_cache.initialised = false;
1496
1497  return RTEMS_UNSATISFIED;
1498}
1499
1500static void
1501rtems_bdbuf_wait_for_event (rtems_event_set event)
1502{
1503  rtems_status_code sc = RTEMS_SUCCESSFUL;
1504  rtems_event_set   out = 0;
1505
1506  sc = rtems_event_receive (event,
1507                            RTEMS_EVENT_ALL | RTEMS_WAIT,
1508                            RTEMS_NO_TIMEOUT,
1509                            &out);
1510
1511  if (sc != RTEMS_SUCCESSFUL || out != event)
1512    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_WAIT_EVNT);
1513}
1514
1515static void
1516rtems_bdbuf_wait_for_access (rtems_bdbuf_buffer *bd)
1517{
1518  while (true)
1519  {
1520    switch (bd->state)
1521    {
1522      case RTEMS_BDBUF_STATE_MODIFIED:
1523        rtems_bdbuf_group_release (bd);
1524        /* Fall through */
1525      case RTEMS_BDBUF_STATE_CACHED:
1526      case RTEMS_BDBUF_STATE_EMPTY:
1527        rtems_chain_extract (&bd->link);
1528        return;
1529      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1530      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1531      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1532        rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
1533        break;
1534      case RTEMS_BDBUF_STATE_TRANSFER:
1535      case RTEMS_BDBUF_STATE_SYNC:
1536        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1537        break;
1538      default:
1539        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_7);
1540    }
1541  }
1542}
1543
1544static void
1545rtems_bdbuf_request_sync_for_modified_buffer (rtems_bdbuf_buffer *bd)
1546{
1547  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
1548  rtems_chain_extract (&bd->link);
1549  rtems_chain_append (&bdbuf_cache.sync, &bd->link);
1550  rtems_bdbuf_wake_swapper ();
1551}
1552
1553/**
1554 * @brief Waits until the buffer is ready for recycling.
1555 *
1556 * @retval @c true Buffer is valid and may be recycled.
1557 * @retval @c false Buffer is invalid and has to searched again.
1558 */
1559static bool
1560rtems_bdbuf_wait_for_recycle (rtems_bdbuf_buffer *bd)
1561{
1562  while (true)
1563  {
1564    switch (bd->state)
1565    {
1566      case RTEMS_BDBUF_STATE_FREE:
1567        return true;
1568      case RTEMS_BDBUF_STATE_MODIFIED:
1569        rtems_bdbuf_request_sync_for_modified_buffer (bd);
1570        break;
1571      case RTEMS_BDBUF_STATE_CACHED:
1572      case RTEMS_BDBUF_STATE_EMPTY:
1573        if (bd->waiters == 0)
1574          return true;
1575        else
1576        {
1577          /*
1578           * It is essential that we wait here without a special wait count and
1579           * without the group in use.  Otherwise we could trigger a wait ping
1580           * pong with another recycle waiter.  The state of the buffer is
1581           * arbitrary afterwards.
1582           */
1583          rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
1584          return false;
1585        }
1586      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1587      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1588      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1589        rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
1590        break;
1591      case RTEMS_BDBUF_STATE_TRANSFER:
1592      case RTEMS_BDBUF_STATE_SYNC:
1593        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1594        break;
1595      default:
1596        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_8);
1597    }
1598  }
1599}
1600
1601static void
1602rtems_bdbuf_wait_for_sync_done (rtems_bdbuf_buffer *bd)
1603{
1604  while (true)
1605  {
1606    switch (bd->state)
1607    {
1608      case RTEMS_BDBUF_STATE_CACHED:
1609      case RTEMS_BDBUF_STATE_EMPTY:
1610      case RTEMS_BDBUF_STATE_MODIFIED:
1611      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1612      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1613      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1614        return;
1615      case RTEMS_BDBUF_STATE_SYNC:
1616      case RTEMS_BDBUF_STATE_TRANSFER:
1617        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1618        break;
1619      default:
1620        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_9);
1621    }
1622  }
1623}
1624
1625static void
1626rtems_bdbuf_wait_for_buffer (void)
1627{
1628  if (!rtems_chain_is_empty (&bdbuf_cache.modified))
1629    rtems_bdbuf_wake_swapper ();
1630
1631  rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
1632}
1633
1634static rtems_bdbuf_buffer *
1635rtems_bdbuf_get_buffer_for_read_ahead (dev_t             dev,
1636                                       rtems_blkdev_bnum block,
1637                                       size_t            bds_per_group)
1638{
1639  rtems_bdbuf_buffer *bd = NULL;
1640
1641  bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dev, block);
1642
1643  if (bd == NULL)
1644  {
1645    bd = rtems_bdbuf_get_buffer_from_lru_list (dev, block, bds_per_group);
1646
1647    if (bd != NULL)
1648      rtems_bdbuf_group_obtain (bd);
1649  }
1650  else
1651    /*
1652     * The buffer is in the cache.  So it is already available or in use, and
1653     * thus no need for a read ahead.
1654     */
1655    bd = NULL;
1656
1657  return bd;
1658}
1659
1660static rtems_bdbuf_buffer *
1661rtems_bdbuf_get_buffer_for_access (dev_t             dev,
1662                                   rtems_blkdev_bnum block,
1663                                   size_t            bds_per_group)
1664{
1665  rtems_bdbuf_buffer *bd = NULL;
1666
1667  do
1668  {
1669    bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dev, block);
1670
1671    if (bd != NULL)
1672    {
1673      if (bd->group->bds_per_group != bds_per_group)
1674      {
1675        if (rtems_bdbuf_wait_for_recycle (bd))
1676        {
1677          rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1678          rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1679          rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1680        }
1681        bd = NULL;
1682      }
1683    }
1684    else
1685    {
1686      bd = rtems_bdbuf_get_buffer_from_lru_list (dev, block, bds_per_group);
1687
1688      if (bd == NULL)
1689        rtems_bdbuf_wait_for_buffer ();
1690    }
1691  }
1692  while (bd == NULL);
1693
1694  rtems_bdbuf_wait_for_access (bd);
1695  rtems_bdbuf_group_obtain (bd);
1696
1697  return bd;
1698}
1699
1700static rtems_status_code
1701rtems_bdbuf_obtain_disk (dev_t               dev,
1702                         rtems_blkdev_bnum   block,
1703                         rtems_disk_device **dd_ptr,
1704                         rtems_blkdev_bnum  *media_block_ptr,
1705                         size_t             *bds_per_group_ptr)
1706{
1707  rtems_disk_device *dd = NULL;
1708
1709  if (!bdbuf_cache.initialised)
1710    return RTEMS_NOT_CONFIGURED;
1711
1712  /*
1713   * Do not hold the cache lock when obtaining the disk table.
1714   */
1715  dd = rtems_disk_obtain (dev);
1716  if (dd == NULL)
1717    return RTEMS_INVALID_ID;
1718
1719  *dd_ptr = dd;
1720
1721  if (media_block_ptr != NULL)
1722  {
1723    /*
1724     * Compute the media block number. Drivers work with media block number not
1725     * the block number a BD may have as this depends on the block size set by
1726     * the user.
1727     */
1728    rtems_blkdev_bnum mb = rtems_bdbuf_media_block (block,
1729                                                    dd->block_size,
1730                                                    dd->media_block_size);
1731    if (mb >= dd->size)
1732    {
1733      rtems_disk_release(dd);
1734      return RTEMS_INVALID_NUMBER;
1735    }
1736
1737    *media_block_ptr = mb + dd->start;
1738  }
1739
1740  if (bds_per_group_ptr != NULL)
1741  {
1742    size_t bds_per_group = rtems_bdbuf_bds_per_group (dd->block_size);
1743
1744    if (bds_per_group == 0)
1745    {
1746      rtems_disk_release (dd);
1747      return RTEMS_INVALID_NUMBER;
1748    }
1749
1750    *bds_per_group_ptr = bds_per_group;
1751  }
1752
1753  return RTEMS_SUCCESSFUL;
1754}
1755
1756static void
1757rtems_bdbuf_release_disk (rtems_disk_device *dd)
1758{
1759  rtems_status_code sc = RTEMS_SUCCESSFUL;
1760
1761  sc = rtems_disk_release (dd);
1762  if (sc != RTEMS_SUCCESSFUL)
1763    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_DISK_REL);
1764}
1765
1766rtems_status_code
1767rtems_bdbuf_get (dev_t                dev,
1768                 rtems_blkdev_bnum    block,
1769                 rtems_bdbuf_buffer **bd_ptr)
1770{
1771  rtems_status_code   sc = RTEMS_SUCCESSFUL;
1772  rtems_disk_device  *dd = NULL;
1773  rtems_bdbuf_buffer *bd = NULL;
1774  rtems_blkdev_bnum   media_block = 0;
1775  size_t              bds_per_group = 0;
1776
1777  sc = rtems_bdbuf_obtain_disk (dev, block, &dd, &media_block, &bds_per_group);
1778  if (sc != RTEMS_SUCCESSFUL)
1779    return sc;
1780
1781  rtems_bdbuf_lock_cache ();
1782
1783  /*
1784   * Print the block index relative to the physical disk.
1785   */
1786  if (rtems_bdbuf_tracer)
1787    printf ("bdbuf:get: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
1788            media_block, block, (unsigned) dev);
1789
1790  bd = rtems_bdbuf_get_buffer_for_access (dev, media_block, bds_per_group);
1791
1792  switch (bd->state)
1793  {
1794    case RTEMS_BDBUF_STATE_CACHED:
1795      rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
1796      break;
1797    case RTEMS_BDBUF_STATE_EMPTY:
1798      rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_EMPTY);
1799      break;
1800    case RTEMS_BDBUF_STATE_MODIFIED:
1801      /*
1802       * To get a modified buffer could be considered a bug in the caller
1803       * because you should not be getting an already modified buffer but user
1804       * may have modified a byte in a block then decided to seek the start and
1805       * write the whole block and the file system will have no record of this
1806       * so just gets the block to fill.
1807       */
1808      rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
1809      break;
1810    default:
1811      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_2);
1812      break;
1813  }
1814
1815  if (rtems_bdbuf_tracer)
1816  {
1817    rtems_bdbuf_show_users ("get", bd);
1818    rtems_bdbuf_show_usage ();
1819  }
1820
1821  rtems_bdbuf_unlock_cache ();
1822
1823  rtems_bdbuf_release_disk (dd);
1824
1825  *bd_ptr = bd;
1826
1827  return RTEMS_SUCCESSFUL;
1828}
1829
1830/**
1831 * Call back handler called by the low level driver when the transfer has
1832 * completed. This function may be invoked from interrupt handler.
1833 *
1834 * @param arg Arbitrary argument specified in block device request
1835 *            structure (in this case - pointer to the appropriate
1836 *            block device request structure).
1837 * @param status I/O completion status
1838 */
1839static void
1840rtems_bdbuf_transfer_done (void* arg, rtems_status_code status)
1841{
1842  rtems_blkdev_request* req = (rtems_blkdev_request*) arg;
1843
1844  req->status = status;
1845
1846  rtems_event_send (req->io_task, RTEMS_BDBUF_TRANSFER_SYNC);
1847}
1848
1849static void
1850rtems_bdbuf_create_read_request (const rtems_disk_device *dd,
1851                                 rtems_blkdev_bnum        media_block,
1852                                 size_t                   bds_per_group,
1853                                 rtems_blkdev_request    *req,
1854                                 rtems_bdbuf_buffer     **bd_ptr)
1855{
1856  rtems_bdbuf_buffer *bd = NULL;
1857  rtems_blkdev_bnum   media_block_end = dd->start + dd->size;
1858  rtems_blkdev_bnum   media_block_count = dd->block_size / dd->media_block_size;
1859  dev_t               dev = dd->dev;
1860  uint32_t            block_size = dd->block_size;
1861  uint32_t            transfer_index = 1;
1862  uint32_t            transfer_count = bdbuf_config.max_read_ahead_blocks + 1;
1863
1864  if (media_block_end - media_block < transfer_count)
1865    transfer_count = media_block_end - media_block;
1866
1867  req->req = RTEMS_BLKDEV_REQ_READ;
1868  req->req_done = rtems_bdbuf_transfer_done;
1869  req->done_arg = req;
1870  req->io_task = rtems_task_self ();
1871  req->status = RTEMS_RESOURCE_IN_USE;
1872  req->bufnum = 0;
1873
1874  bd = rtems_bdbuf_get_buffer_for_access (dev, media_block, bds_per_group);
1875
1876  *bd_ptr = bd;
1877
1878  req->bufs [0].user   = bd;
1879  req->bufs [0].block  = media_block;
1880  req->bufs [0].length = block_size;
1881  req->bufs [0].buffer = bd->buffer;
1882
1883  if (rtems_bdbuf_tracer)
1884    rtems_bdbuf_show_users ("read", bd);
1885
1886  switch (bd->state)
1887  {
1888    case RTEMS_BDBUF_STATE_CACHED:
1889    case RTEMS_BDBUF_STATE_MODIFIED:
1890      return;
1891    case RTEMS_BDBUF_STATE_EMPTY:
1892      rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
1893      break;
1894    default:
1895      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_1);
1896      break;
1897  }
1898
1899  while (transfer_index < transfer_count)
1900  {
1901    media_block += media_block_count;
1902
1903    bd = rtems_bdbuf_get_buffer_for_read_ahead (dev, media_block,
1904                                                bds_per_group);
1905
1906    if (bd == NULL)
1907      break;
1908
1909    rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
1910
1911    req->bufs [transfer_index].user   = bd;
1912    req->bufs [transfer_index].block  = media_block;
1913    req->bufs [transfer_index].length = block_size;
1914    req->bufs [transfer_index].buffer = bd->buffer;
1915
1916    if (rtems_bdbuf_tracer)
1917      rtems_bdbuf_show_users ("read-ahead", bd);
1918
1919    ++transfer_index;
1920  }
1921
1922  req->bufnum = transfer_index;
1923}
1924
1925static rtems_status_code
1926rtems_bdbuf_execute_transfer_request (const rtems_disk_device *dd,
1927                                      rtems_blkdev_request    *req,
1928                                      bool                     cache_locked)
1929{
1930  rtems_status_code sc = RTEMS_SUCCESSFUL;
1931  int result = 0;
1932  uint32_t transfer_index = 0;
1933  bool wake_transfer = false;
1934  bool wake_buffer = false;
1935
1936  if (cache_locked)
1937    rtems_bdbuf_unlock_cache ();
1938
1939  result = dd->ioctl (dd->phys_dev, RTEMS_BLKIO_REQUEST, req);
1940
1941  if (result == 0)
1942  {
1943    rtems_bdbuf_wait_for_event (RTEMS_BDBUF_TRANSFER_SYNC);
1944    sc = req->status;
1945  }
1946  else
1947    sc = RTEMS_IO_ERROR;
1948
1949  rtems_bdbuf_lock_cache ();
1950
1951  for (transfer_index = 0; transfer_index < req->bufnum; ++transfer_index)
1952  {
1953    rtems_bdbuf_buffer *bd = req->bufs [transfer_index].user;
1954    bool waiters = bd->waiters > 0;
1955
1956    if (waiters)
1957      wake_transfer = true;
1958    else
1959      wake_buffer = true;
1960
1961    rtems_bdbuf_group_release (bd);
1962
1963    if (sc == RTEMS_SUCCESSFUL)
1964    {
1965      rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_CACHED);
1966      rtems_chain_append (&bdbuf_cache.lru, &bd->link);
1967    }
1968    else if (waiters)
1969    {
1970      rtems_bdbuf_make_empty_and_add_to_lru_list (bd);
1971    }
1972    else
1973    {
1974      rtems_bdbuf_remove_from_tree (bd);
1975      rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1976    }
1977
1978    if (rtems_bdbuf_tracer)
1979      rtems_bdbuf_show_users ("transfer", bd);
1980  }
1981
1982  if (wake_transfer)
1983    rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
1984
1985  if (wake_buffer)
1986    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1987
1988  if (!cache_locked)
1989    rtems_bdbuf_unlock_cache ();
1990
1991  if (sc == RTEMS_SUCCESSFUL || sc == RTEMS_UNSATISFIED)
1992    return sc;
1993  else
1994    return RTEMS_IO_ERROR;
1995}
1996
1997rtems_status_code
1998rtems_bdbuf_read (dev_t                dev,
1999                  rtems_blkdev_bnum    block,
2000                  rtems_bdbuf_buffer **bd_ptr)
2001{
2002  rtems_status_code     sc = RTEMS_SUCCESSFUL;
2003  rtems_disk_device    *dd = NULL;
2004  rtems_blkdev_request *req = NULL;
2005  rtems_bdbuf_buffer   *bd = NULL;
2006  rtems_blkdev_bnum     media_block = 0;
2007  size_t                bds_per_group = 0;
2008
2009  sc = rtems_bdbuf_obtain_disk (dev, block, &dd, &media_block, &bds_per_group);
2010  if (sc != RTEMS_SUCCESSFUL)
2011    return sc;
2012
2013  /*
2014   * TODO: This type of request structure is wrong and should be removed.
2015   */
2016#define bdbuf_alloc(size) __builtin_alloca (size)
2017
2018  req = bdbuf_alloc (sizeof (rtems_blkdev_request) +
2019                     sizeof ( rtems_blkdev_sg_buffer) *
2020                      (bdbuf_config.max_read_ahead_blocks + 1));
2021
2022  if (rtems_bdbuf_tracer)
2023    printf ("bdbuf:read: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
2024            media_block + dd->start, block, (unsigned) dev);
2025
2026  rtems_bdbuf_lock_cache ();
2027  rtems_bdbuf_create_read_request (dd, media_block, bds_per_group, req, &bd);
2028
2029  if (req->bufnum > 0)
2030  {
2031    sc = rtems_bdbuf_execute_transfer_request (dd, req, true);
2032    if (sc == RTEMS_SUCCESSFUL)
2033    {
2034      rtems_chain_extract (&bd->link);
2035      rtems_bdbuf_group_obtain (bd);
2036    }
2037  }
2038
2039  if (sc == RTEMS_SUCCESSFUL)
2040  {
2041    switch (bd->state)
2042    {
2043      case RTEMS_BDBUF_STATE_CACHED:
2044        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
2045        break;
2046      case RTEMS_BDBUF_STATE_MODIFIED:
2047        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
2048        break;
2049      default:
2050        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_4);
2051        break;
2052    }
2053
2054    if (rtems_bdbuf_tracer)
2055    {
2056      rtems_bdbuf_show_users ("read", bd);
2057      rtems_bdbuf_show_usage ();
2058    }
2059
2060    *bd_ptr = bd;
2061  }
2062  else
2063    *bd_ptr = NULL;
2064
2065  rtems_bdbuf_unlock_cache ();
2066  rtems_bdbuf_release_disk (dd);
2067
2068  return sc;
2069}
2070
2071static rtems_status_code
2072rtems_bdbuf_check_bd_and_lock_cache (rtems_bdbuf_buffer *bd, const char *kind)
2073{
2074  if (!bdbuf_cache.initialised)
2075    return RTEMS_NOT_CONFIGURED;
2076  if (bd == NULL)
2077    return RTEMS_INVALID_ADDRESS;
2078  if (rtems_bdbuf_tracer)
2079  {
2080    printf ("bdbuf:%s: %" PRIu32 "\n", kind, bd->block);
2081    rtems_bdbuf_show_users (kind, bd);
2082  }
2083  rtems_bdbuf_lock_cache();
2084
2085  return RTEMS_SUCCESSFUL;
2086}
2087
2088rtems_status_code
2089rtems_bdbuf_release (rtems_bdbuf_buffer *bd)
2090{
2091  rtems_status_code sc = RTEMS_SUCCESSFUL;
2092
2093  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release");
2094  if (sc != RTEMS_SUCCESSFUL)
2095    return sc;
2096
2097  switch (bd->state)
2098  {
2099    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2100      rtems_bdbuf_add_to_lru_list_after_access (bd);
2101      break;
2102    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2103      rtems_bdbuf_release_empty_buffer (bd);
2104      break;
2105    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2106      rtems_bdbuf_add_to_modified_list_after_access (bd);
2107      break;
2108    default:
2109      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_0);
2110      break;
2111  }
2112
2113  if (rtems_bdbuf_tracer)
2114    rtems_bdbuf_show_usage ();
2115
2116  rtems_bdbuf_unlock_cache ();
2117
2118  return RTEMS_SUCCESSFUL;
2119}
2120
2121rtems_status_code
2122rtems_bdbuf_release_modified (rtems_bdbuf_buffer *bd)
2123{
2124  rtems_status_code sc = RTEMS_SUCCESSFUL;
2125
2126  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release modified");
2127  if (sc != RTEMS_SUCCESSFUL)
2128    return sc;
2129
2130  switch (bd->state)
2131  {
2132    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2133    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2134    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2135      rtems_bdbuf_add_to_modified_list_after_access (bd);
2136      break;
2137    default:
2138      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_6);
2139      break;
2140  }
2141
2142  if (rtems_bdbuf_tracer)
2143    rtems_bdbuf_show_usage ();
2144
2145  rtems_bdbuf_unlock_cache ();
2146
2147  return RTEMS_SUCCESSFUL;
2148}
2149
2150rtems_status_code
2151rtems_bdbuf_sync (rtems_bdbuf_buffer *bd)
2152{
2153  rtems_status_code sc = RTEMS_SUCCESSFUL;
2154
2155  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "sync");
2156  if (sc != RTEMS_SUCCESSFUL)
2157    return sc;
2158
2159  switch (bd->state)
2160  {
2161    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2162    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2163    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2164      rtems_bdbuf_add_to_sync_list_after_access (bd);
2165      break;
2166    default:
2167      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_5);
2168      break;
2169  }
2170
2171  if (rtems_bdbuf_tracer)
2172    rtems_bdbuf_show_usage ();
2173
2174  rtems_bdbuf_wake_swapper ();
2175  rtems_bdbuf_wait_for_sync_done (bd);
2176
2177  /*
2178   * If no one intercepts the sync, we created a cached buffer which may be
2179   * recycled.
2180   */
2181  if (bd->waiters == 0
2182        && (bd->state == RTEMS_BDBUF_STATE_CACHED
2183          || bd->state == RTEMS_BDBUF_STATE_EMPTY))
2184  {
2185    if (bd->state == RTEMS_BDBUF_STATE_EMPTY)
2186    {
2187      rtems_bdbuf_remove_from_tree (bd);
2188      rtems_bdbuf_make_free_and_add_to_lru_list (bd);
2189    }
2190    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
2191  }
2192
2193  rtems_bdbuf_unlock_cache ();
2194
2195  return RTEMS_SUCCESSFUL;
2196}
2197
2198rtems_status_code
2199rtems_bdbuf_syncdev (dev_t dev)
2200{
2201  rtems_status_code  sc = RTEMS_SUCCESSFUL;
2202  rtems_disk_device *dd = NULL;
2203
2204  if (rtems_bdbuf_tracer)
2205    printf ("bdbuf:syncdev: %08x\n", (unsigned) dev);
2206
2207  sc = rtems_bdbuf_obtain_disk (dev, 0, &dd, NULL, NULL);
2208  if (sc != RTEMS_SUCCESSFUL)
2209    return sc;
2210
2211  /*
2212   * Take the sync lock before locking the cache. Once we have the sync lock we
2213   * can lock the cache. If another thread has the sync lock it will cause this
2214   * thread to block until it owns the sync lock then it can own the cache. The
2215   * sync lock can only be obtained with the cache unlocked.
2216   */
2217  rtems_bdbuf_lock_sync ();
2218  rtems_bdbuf_lock_cache ();
2219
2220  /*
2221   * Set the cache to have a sync active for a specific device and let the swap
2222   * out task know the id of the requester to wake when done.
2223   *
2224   * The swap out task will negate the sync active flag when no more buffers
2225   * for the device are held on the "modified for sync" queues.
2226   */
2227  bdbuf_cache.sync_active    = true;
2228  bdbuf_cache.sync_requester = rtems_task_self ();
2229  bdbuf_cache.sync_device    = dev;
2230
2231  rtems_bdbuf_wake_swapper ();
2232  rtems_bdbuf_unlock_cache ();
2233  rtems_bdbuf_wait_for_event (RTEMS_BDBUF_TRANSFER_SYNC);
2234  rtems_bdbuf_unlock_sync ();
2235  rtems_bdbuf_release_disk (dd);
2236
2237  return RTEMS_SUCCESSFUL;
2238}
2239
2240static int
2241rtems_bdbuf_null_disk_ioctl (rtems_disk_device *dd, uint32_t req, void *arg)
2242{
2243  return -1;
2244}
2245
2246/**
2247 * Swapout transfer to the driver. The driver will break this I/O into groups
2248 * of consecutive write requests is multiple consecutive buffers are required
2249 * by the driver.
2250 *
2251 * @param transfer The transfer transaction.
2252 */
2253static void
2254rtems_bdbuf_swapout_write (rtems_bdbuf_swapout_transfer* transfer)
2255{
2256  static rtems_disk_device null_disk = {
2257    .capabilities = 0,
2258    .ioctl = rtems_bdbuf_null_disk_ioctl
2259  };
2260
2261  if (rtems_bdbuf_tracer)
2262    printf ("bdbuf:swapout transfer: %08x\n", (unsigned) transfer->dev);
2263
2264  /*
2265   * If there are buffers to transfer to the media transfer them.
2266   */
2267  if (!rtems_chain_is_empty (&transfer->bds))
2268  {
2269    /*
2270     * The last block number used when the driver only supports
2271     * continuous blocks in a single request.
2272     */
2273    uint32_t last_block = 0;
2274
2275    /*
2276     * Number of buffers per bd. This is used to detect the next
2277     * block.
2278     */
2279    uint32_t bufs_per_bd = 0;
2280
2281    /*
2282     * Obtain the disk device. The cache's mutex has been released to avoid a
2283     * dead lock.
2284     */
2285    rtems_disk_device *dd = rtems_disk_obtain (transfer->dev);
2286
2287    if (dd == NULL)
2288      dd = &null_disk;
2289
2290    bufs_per_bd = dd->block_size / bdbuf_config.buffer_min;
2291
2292    /*
2293     * Take as many buffers as configured and pass to the driver. Note, the
2294     * API to the drivers has an array of buffers and if a chain was passed
2295     * we could have just passed the list. If the driver API is updated it
2296     * should be possible to make this change with little effect in this
2297     * code. The array that is passed is broken in design and should be
2298     * removed. Merging members of a struct into the first member is
2299     * trouble waiting to happen.
2300     */
2301    transfer->write_req->status = RTEMS_RESOURCE_IN_USE;
2302    transfer->write_req->bufnum = 0;
2303
2304    while (!rtems_chain_is_empty (&transfer->bds))
2305    {
2306      rtems_bdbuf_buffer* bd =
2307        (rtems_bdbuf_buffer*) rtems_chain_get (&transfer->bds);
2308
2309      bool write = false;
2310
2311      /*
2312       * If the device only accepts sequential buffers and this is not the
2313       * first buffer (the first is always sequential, and the buffer is not
2314       * sequential then put the buffer back on the transfer chain and write
2315       * the committed buffers.
2316       */
2317
2318      if (rtems_bdbuf_tracer)
2319        printf ("bdbuf:swapout write: bd:%" PRIu32 ", bufnum:%" PRIu32 " mode:%s\n",
2320                bd->block, transfer->write_req->bufnum,
2321                dd->phys_dev->capabilities &
2322                RTEMS_BLKDEV_CAP_MULTISECTOR_CONT ? "MULIT" : "SCAT");
2323
2324      if ((dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_MULTISECTOR_CONT) &&
2325          transfer->write_req->bufnum &&
2326          (bd->block != (last_block + bufs_per_bd)))
2327      {
2328        rtems_chain_prepend (&transfer->bds, &bd->link);
2329        write = true;
2330      }
2331      else
2332      {
2333        rtems_blkdev_sg_buffer* buf;
2334        buf = &transfer->write_req->bufs[transfer->write_req->bufnum];
2335        transfer->write_req->bufnum++;
2336        buf->user   = bd;
2337        buf->block  = bd->block;
2338        buf->length = dd->block_size;
2339        buf->buffer = bd->buffer;
2340        last_block  = bd->block;
2341      }
2342
2343      /*
2344       * Perform the transfer if there are no more buffers, or the transfer
2345       * size has reached the configured max. value.
2346       */
2347
2348      if (rtems_chain_is_empty (&transfer->bds) ||
2349          (transfer->write_req->bufnum >= bdbuf_config.max_write_blocks))
2350        write = true;
2351
2352      if (write)
2353      {
2354        rtems_bdbuf_execute_transfer_request (dd, transfer->write_req, false);
2355
2356        transfer->write_req->status = RTEMS_RESOURCE_IN_USE;
2357        transfer->write_req->bufnum = 0;
2358      }
2359    }
2360
2361    if (dd != &null_disk)
2362      rtems_disk_release (dd);
2363  }
2364}
2365
2366/**
2367 * Process the modified list of buffers. There is a sync or modified list that
2368 * needs to be handled so we have a common function to do the work.
2369 *
2370 * @param dev The device to handle. If BDBUF_INVALID_DEV no device is selected
2371 * so select the device of the first buffer to be written to disk.
2372 * @param chain The modified chain to process.
2373 * @param transfer The chain to append buffers to be written too.
2374 * @param sync_active If true this is a sync operation so expire all timers.
2375 * @param update_timers If true update the timers.
2376 * @param timer_delta It update_timers is true update the timers by this
2377 *                    amount.
2378 */
2379static void
2380rtems_bdbuf_swapout_modified_processing (dev_t*               dev,
2381                                         rtems_chain_control* chain,
2382                                         rtems_chain_control* transfer,
2383                                         bool                 sync_active,
2384                                         bool                 update_timers,
2385                                         uint32_t             timer_delta)
2386{
2387  if (!rtems_chain_is_empty (chain))
2388  {
2389    rtems_chain_node* node = rtems_chain_head (chain);
2390    node = node->next;
2391
2392    while (!rtems_chain_is_tail (chain, node))
2393    {
2394      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2395
2396      /*
2397       * Check if the buffer's hold timer has reached 0. If a sync is active
2398       * or someone waits for a buffer force all the timers to 0.
2399       *
2400       * @note Lots of sync requests will skew this timer. It should be based
2401       *       on TOD to be accurate. Does it matter ?
2402       */
2403      if (sync_active || rtems_bdbuf_has_buffer_waiters ())
2404        bd->hold_timer = 0;
2405
2406      if (bd->hold_timer)
2407      {
2408        if (update_timers)
2409        {
2410          if (bd->hold_timer > timer_delta)
2411            bd->hold_timer -= timer_delta;
2412          else
2413            bd->hold_timer = 0;
2414        }
2415
2416        if (bd->hold_timer)
2417        {
2418          node = node->next;
2419          continue;
2420        }
2421      }
2422
2423      /*
2424       * This assumes we can set dev_t to BDBUF_INVALID_DEV which is just an
2425       * assumption. Cannot use the transfer list being empty the sync dev
2426       * calls sets the dev to use.
2427       */
2428      if (*dev == BDBUF_INVALID_DEV)
2429        *dev = bd->dev;
2430
2431      if (bd->dev == *dev)
2432      {
2433        rtems_chain_node* next_node = node->next;
2434        rtems_chain_node* tnode = rtems_chain_tail (transfer);
2435
2436        /*
2437         * The blocks on the transfer list are sorted in block order. This
2438         * means multi-block transfers for drivers that require consecutive
2439         * blocks perform better with sorted blocks and for real disks it may
2440         * help lower head movement.
2441         */
2442
2443        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
2444
2445        rtems_chain_extract (node);
2446
2447        tnode = tnode->previous;
2448
2449        while (node && !rtems_chain_is_head (transfer, tnode))
2450        {
2451          rtems_bdbuf_buffer* tbd = (rtems_bdbuf_buffer*) tnode;
2452
2453          if (bd->block > tbd->block)
2454          {
2455            rtems_chain_insert (tnode, node);
2456            node = NULL;
2457          }
2458          else
2459            tnode = tnode->previous;
2460        }
2461
2462        if (node)
2463          rtems_chain_prepend (transfer, node);
2464
2465        node = next_node;
2466      }
2467      else
2468      {
2469        node = node->next;
2470      }
2471    }
2472  }
2473}
2474
2475/**
2476 * Process the cache's modified buffers. Check the sync list first then the
2477 * modified list extracting the buffers suitable to be written to disk. We have
2478 * a device at a time. The task level loop will repeat this operation while
2479 * there are buffers to be written. If the transfer fails place the buffers
2480 * back on the modified list and try again later. The cache is unlocked while
2481 * the buffers are being written to disk.
2482 *
2483 * @param timer_delta It update_timers is true update the timers by this
2484 *                    amount.
2485 * @param update_timers If true update the timers.
2486 * @param transfer The transfer transaction data.
2487 *
2488 * @retval true Buffers where written to disk so scan again.
2489 * @retval false No buffers where written to disk.
2490 */
2491static bool
2492rtems_bdbuf_swapout_processing (unsigned long                 timer_delta,
2493                                bool                          update_timers,
2494                                rtems_bdbuf_swapout_transfer* transfer)
2495{
2496  rtems_bdbuf_swapout_worker* worker;
2497  bool                        transfered_buffers = false;
2498
2499  rtems_bdbuf_lock_cache ();
2500
2501  /*
2502   * If a sync is active do not use a worker because the current code does not
2503   * cleaning up after. We need to know the buffers have been written when
2504   * syncing to the release sync lock and currently worker threads do not
2505   * return to here. We do not know the worker is the last in a sequence of
2506   * sync writes until after we have it running so we do not know to tell it to
2507   * release the lock. The simplest solution is to get the main swap out task
2508   * perform all sync operations.
2509   */
2510  if (bdbuf_cache.sync_active)
2511    worker = NULL;
2512  else
2513  {
2514    worker = (rtems_bdbuf_swapout_worker*)
2515      rtems_chain_get (&bdbuf_cache.swapout_workers);
2516    if (worker)
2517      transfer = &worker->transfer;
2518  }
2519
2520  rtems_chain_initialize_empty (&transfer->bds);
2521  transfer->dev = BDBUF_INVALID_DEV;
2522
2523  /*
2524   * When the sync is for a device limit the sync to that device. If the sync
2525   * is for a buffer handle process the devices in the order on the sync
2526   * list. This means the dev is BDBUF_INVALID_DEV.
2527   */
2528  if (bdbuf_cache.sync_active)
2529    transfer->dev = bdbuf_cache.sync_device;
2530
2531  /*
2532   * If we have any buffers in the sync queue move them to the modified
2533   * list. The first sync buffer will select the device we use.
2534   */
2535  rtems_bdbuf_swapout_modified_processing (&transfer->dev,
2536                                           &bdbuf_cache.sync,
2537                                           &transfer->bds,
2538                                           true, false,
2539                                           timer_delta);
2540
2541  /*
2542   * Process the cache's modified list.
2543   */
2544  rtems_bdbuf_swapout_modified_processing (&transfer->dev,
2545                                           &bdbuf_cache.modified,
2546                                           &transfer->bds,
2547                                           bdbuf_cache.sync_active,
2548                                           update_timers,
2549                                           timer_delta);
2550
2551  /*
2552   * We have all the buffers that have been modified for this device so the
2553   * cache can be unlocked because the state of each buffer has been set to
2554   * TRANSFER.
2555   */
2556  rtems_bdbuf_unlock_cache ();
2557
2558  /*
2559   * If there are buffers to transfer to the media transfer them.
2560   */
2561  if (!rtems_chain_is_empty (&transfer->bds))
2562  {
2563    if (worker)
2564    {
2565      rtems_status_code sc = rtems_event_send (worker->id,
2566                                               RTEMS_BDBUF_SWAPOUT_SYNC);
2567      if (sc != RTEMS_SUCCESSFUL)
2568        rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE);
2569    }
2570    else
2571    {
2572      rtems_bdbuf_swapout_write (transfer);
2573    }
2574
2575    transfered_buffers = true;
2576  }
2577
2578  if (bdbuf_cache.sync_active && !transfered_buffers)
2579  {
2580    rtems_id sync_requester;
2581    rtems_bdbuf_lock_cache ();
2582    sync_requester = bdbuf_cache.sync_requester;
2583    bdbuf_cache.sync_active = false;
2584    bdbuf_cache.sync_requester = 0;
2585    rtems_bdbuf_unlock_cache ();
2586    if (sync_requester)
2587      rtems_event_send (sync_requester, RTEMS_BDBUF_TRANSFER_SYNC);
2588  }
2589
2590  return transfered_buffers;
2591}
2592
2593/**
2594 * Allocate the write request and initialise it for good measure.
2595 *
2596 * @return rtems_blkdev_request* The write reference memory.
2597 */
2598static rtems_blkdev_request*
2599rtems_bdbuf_swapout_writereq_alloc (void)
2600{
2601  /*
2602   * @note chrisj The rtems_blkdev_request and the array at the end is a hack.
2603   * I am disappointment at finding code like this in RTEMS. The request should
2604   * have been a rtems_chain_control. Simple, fast and less storage as the node
2605   * is already part of the buffer structure.
2606   */
2607  rtems_blkdev_request* write_req =
2608    malloc (sizeof (rtems_blkdev_request) +
2609            (bdbuf_config.max_write_blocks * sizeof (rtems_blkdev_sg_buffer)));
2610
2611  if (!write_req)
2612    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM);
2613
2614  write_req->req = RTEMS_BLKDEV_REQ_WRITE;
2615  write_req->req_done = rtems_bdbuf_transfer_done;
2616  write_req->done_arg = write_req;
2617  write_req->io_task = rtems_task_self ();
2618
2619  return write_req;
2620}
2621
2622/**
2623 * The swapout worker thread body.
2624 *
2625 * @param arg A pointer to the worker thread's private data.
2626 * @return rtems_task Not used.
2627 */
2628static rtems_task
2629rtems_bdbuf_swapout_worker_task (rtems_task_argument arg)
2630{
2631  rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) arg;
2632
2633  while (worker->enabled)
2634  {
2635    rtems_bdbuf_wait_for_event (RTEMS_BDBUF_SWAPOUT_SYNC);
2636
2637    rtems_bdbuf_swapout_write (&worker->transfer);
2638
2639    rtems_bdbuf_lock_cache ();
2640
2641    rtems_chain_initialize_empty (&worker->transfer.bds);
2642    worker->transfer.dev = BDBUF_INVALID_DEV;
2643
2644    rtems_chain_append (&bdbuf_cache.swapout_workers, &worker->link);
2645
2646    rtems_bdbuf_unlock_cache ();
2647  }
2648
2649  free (worker->transfer.write_req);
2650  free (worker);
2651
2652  rtems_task_delete (RTEMS_SELF);
2653}
2654
2655/**
2656 * Open the swapout worker threads.
2657 */
2658static void
2659rtems_bdbuf_swapout_workers_open (void)
2660{
2661  rtems_status_code sc;
2662  size_t            w;
2663
2664  rtems_bdbuf_lock_cache ();
2665
2666  for (w = 0; w < bdbuf_config.swapout_workers; w++)
2667  {
2668    rtems_bdbuf_swapout_worker* worker;
2669
2670    worker = malloc (sizeof (rtems_bdbuf_swapout_worker));
2671    if (!worker)
2672      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM);
2673
2674    rtems_chain_append (&bdbuf_cache.swapout_workers, &worker->link);
2675    worker->enabled = true;
2676    worker->transfer.write_req = rtems_bdbuf_swapout_writereq_alloc ();
2677
2678    rtems_chain_initialize_empty (&worker->transfer.bds);
2679    worker->transfer.dev = BDBUF_INVALID_DEV;
2680
2681    sc = rtems_task_create (rtems_build_name('B', 'D', 'o', 'a' + w),
2682                            (bdbuf_config.swapout_priority ?
2683                             bdbuf_config.swapout_priority :
2684                             RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT),
2685                            SWAPOUT_TASK_STACK_SIZE,
2686                            RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
2687                            RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT,
2688                            &worker->id);
2689    if (sc != RTEMS_SUCCESSFUL)
2690      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_CREATE);
2691
2692    sc = rtems_task_start (worker->id,
2693                           rtems_bdbuf_swapout_worker_task,
2694                           (rtems_task_argument) worker);
2695    if (sc != RTEMS_SUCCESSFUL)
2696      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_START);
2697  }
2698
2699  rtems_bdbuf_unlock_cache ();
2700}
2701
2702/**
2703 * Close the swapout worker threads.
2704 */
2705static void
2706rtems_bdbuf_swapout_workers_close (void)
2707{
2708  rtems_chain_node* node;
2709
2710  rtems_bdbuf_lock_cache ();
2711
2712  node = rtems_chain_first (&bdbuf_cache.swapout_workers);
2713  while (!rtems_chain_is_tail (&bdbuf_cache.swapout_workers, node))
2714  {
2715    rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) node;
2716    worker->enabled = false;
2717    rtems_event_send (worker->id, RTEMS_BDBUF_SWAPOUT_SYNC);
2718    node = rtems_chain_next (node);
2719  }
2720
2721  rtems_bdbuf_unlock_cache ();
2722}
2723
2724/**
2725 * Body of task which takes care on flushing modified buffers to the disk.
2726 *
2727 * @param arg A pointer to the global cache data. Use the global variable and
2728 *            not this.
2729 * @return rtems_task Not used.
2730 */
2731static rtems_task
2732rtems_bdbuf_swapout_task (rtems_task_argument arg)
2733{
2734  rtems_bdbuf_swapout_transfer transfer;
2735  uint32_t                     period_in_ticks;
2736  const uint32_t               period_in_msecs = bdbuf_config.swapout_period;;
2737  uint32_t                     timer_delta;
2738
2739  transfer.write_req = rtems_bdbuf_swapout_writereq_alloc ();
2740  rtems_chain_initialize_empty (&transfer.bds);
2741  transfer.dev = BDBUF_INVALID_DEV;
2742
2743  /*
2744   * Localise the period.
2745   */
2746  period_in_ticks = RTEMS_MICROSECONDS_TO_TICKS (period_in_msecs * 1000);
2747
2748  /*
2749   * This is temporary. Needs to be changed to use the real time clock.
2750   */
2751  timer_delta = period_in_msecs;
2752
2753  /*
2754   * Create the worker threads.
2755   */
2756  rtems_bdbuf_swapout_workers_open ();
2757
2758  while (bdbuf_cache.swapout_enabled)
2759  {
2760    rtems_event_set   out;
2761    rtems_status_code sc;
2762
2763    /*
2764     * Only update the timers once in the processing cycle.
2765     */
2766    bool update_timers = true;
2767
2768    /*
2769     * If we write buffers to any disk perform a check again. We only write a
2770     * single device at a time and the cache may have more than one device's
2771     * buffers modified waiting to be written.
2772     */
2773    bool transfered_buffers;
2774
2775    do
2776    {
2777      transfered_buffers = false;
2778
2779      /*
2780       * Extact all the buffers we find for a specific device. The device is
2781       * the first one we find on a modified list. Process the sync queue of
2782       * buffers first.
2783       */
2784      if (rtems_bdbuf_swapout_processing (timer_delta,
2785                                          update_timers,
2786                                          &transfer))
2787      {
2788        transfered_buffers = true;
2789      }
2790
2791      /*
2792       * Only update the timers once.
2793       */
2794      update_timers = false;
2795    }
2796    while (transfered_buffers);
2797
2798    sc = rtems_event_receive (RTEMS_BDBUF_SWAPOUT_SYNC,
2799                              RTEMS_EVENT_ALL | RTEMS_WAIT,
2800                              period_in_ticks,
2801                              &out);
2802
2803    if ((sc != RTEMS_SUCCESSFUL) && (sc != RTEMS_TIMEOUT))
2804      rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
2805  }
2806
2807  rtems_bdbuf_swapout_workers_close ();
2808
2809  free (transfer.write_req);
2810
2811  rtems_task_delete (RTEMS_SELF);
2812}
Note: See TracBrowser for help on using the repository browser.