source: rtems/cpukit/libblock/src/bdbuf.c

Last change on this file was 6ae79e6, checked in by Christian Mauderer <christian.mauderer@…>, on Jan 19, 2021 at 2:33:35 PM

libblock: Add rtems_bdbuf_peek()

Adds a peek function that allows (for example) a file system to suggest
the next blocks that should be used for read ahead. This can increase
the read speed of fragmented files.

Update #3689

  • Property mode set to 100644
File size: 80.2 KB
Line 
1/**
2 * @file
3 *
4 * @ingroup rtems_bdbuf
5 *
6 * Block device buffer management.
7 */
8
9/*
10 * Disk I/O buffering
11 * Buffer managment
12 *
13 * Copyright (C) 2001 OKTET Ltd., St.-Peterburg, Russia
14 * Author: Andrey G. Ivanov <Andrey.Ivanov@oktet.ru>
15 *         Victor V. Vengerov <vvv@oktet.ru>
16 *         Alexander Kukuta <kam@oktet.ru>
17 *
18 * Copyright (C) 2008,2009 Chris Johns <chrisj@rtems.org>
19 *    Rewritten to remove score mutex access. Fixes many performance
20 *    issues.
21 *
22 * Copyright (c) 2009, 2017 embedded brains GmbH.
23 */
24
25/**
26 * Set to 1 to enable debug tracing.
27 */
28#define RTEMS_BDBUF_TRACE 0
29
30#ifdef HAVE_CONFIG_H
31#include "config.h"
32#endif
33#include <limits.h>
34#include <errno.h>
35#include <stdio.h>
36#include <stdlib.h>
37#include <string.h>
38#include <inttypes.h>
39#include <pthread.h>
40
41#include <rtems.h>
42#include <rtems/error.h>
43#include <rtems/thread.h>
44#include <rtems/score/assert.h>
45
46#include "rtems/bdbuf.h"
47
48#define BDBUF_INVALID_DEV NULL
49
50/*
51 * Simpler label for this file.
52 */
53#define bdbuf_config rtems_bdbuf_configuration
54
55/**
56 * A swapout transfer transaction data. This data is passed to a worked thread
57 * to handle the write phase of the transfer.
58 */
59typedef struct rtems_bdbuf_swapout_transfer
60{
61  rtems_chain_control   bds;         /**< The transfer list of BDs. */
62  rtems_disk_device    *dd;          /**< The device the transfer is for. */
63  bool                  syncing;     /**< The data is a sync'ing. */
64  rtems_blkdev_request  write_req;   /**< The write request. */
65} rtems_bdbuf_swapout_transfer;
66
67/**
68 * Swapout worker thread. These are available to take processing from the
69 * main swapout thread and handle the I/O operation.
70 */
71typedef struct rtems_bdbuf_swapout_worker
72{
73  rtems_chain_node             link;     /**< The threads sit on a chain when
74                                          * idle. */
75  rtems_id                     id;       /**< The id of the task so we can wake
76                                          * it. */
77  bool                         enabled;  /**< The worker is enabled. */
78  rtems_bdbuf_swapout_transfer transfer; /**< The transfer data for this
79                                          * thread. */
80} rtems_bdbuf_swapout_worker;
81
82/**
83 * Buffer waiters synchronization.
84 */
85typedef struct rtems_bdbuf_waiters {
86  unsigned                 count;
87  rtems_condition_variable cond_var;
88} rtems_bdbuf_waiters;
89
90/**
91 * The BD buffer cache.
92 */
93typedef struct rtems_bdbuf_cache
94{
95  rtems_id            swapout;           /**< Swapout task ID */
96  bool                swapout_enabled;   /**< Swapout is only running if
97                                          * enabled. Set to false to kill the
98                                          * swap out task. It deletes itself. */
99  rtems_chain_control swapout_free_workers; /**< The work threads for the swapout
100                                             * task. */
101
102  rtems_bdbuf_buffer* bds;               /**< Pointer to table of buffer
103                                          * descriptors. */
104  void*               buffers;           /**< The buffer's memory. */
105  size_t              buffer_min_count;  /**< Number of minimum size buffers
106                                          * that fit the buffer memory. */
107  size_t              max_bds_per_group; /**< The number of BDs of minimum
108                                          * buffer size that fit in a group. */
109  uint32_t            flags;             /**< Configuration flags. */
110
111  rtems_mutex         lock;              /**< The cache lock. It locks all
112                                          * cache data, BD and lists. */
113  rtems_mutex         sync_lock;         /**< Sync calls block writes. */
114  bool                sync_active;       /**< True if a sync is active. */
115  rtems_id            sync_requester;    /**< The sync requester. */
116  rtems_disk_device  *sync_device;       /**< The device to sync and
117                                          * BDBUF_INVALID_DEV not a device
118                                          * sync. */
119
120  rtems_bdbuf_buffer* tree;              /**< Buffer descriptor lookup AVL tree
121                                          * root. There is only one. */
122  rtems_chain_control lru;               /**< Least recently used list */
123  rtems_chain_control modified;          /**< Modified buffers list */
124  rtems_chain_control sync;              /**< Buffers to sync list */
125
126  rtems_bdbuf_waiters access_waiters;    /**< Wait for a buffer in
127                                          * ACCESS_CACHED, ACCESS_MODIFIED or
128                                          * ACCESS_EMPTY
129                                          * state. */
130  rtems_bdbuf_waiters transfer_waiters;  /**< Wait for a buffer in TRANSFER
131                                          * state. */
132  rtems_bdbuf_waiters buffer_waiters;    /**< Wait for a buffer and no one is
133                                          * available. */
134
135  rtems_bdbuf_swapout_transfer *swapout_transfer;
136  rtems_bdbuf_swapout_worker *swapout_workers;
137
138  size_t              group_count;       /**< The number of groups. */
139  rtems_bdbuf_group*  groups;            /**< The groups. */
140  rtems_id            read_ahead_task;   /**< Read-ahead task */
141  rtems_chain_control read_ahead_chain;  /**< Read-ahead request chain */
142  bool                read_ahead_enabled; /**< Read-ahead enabled */
143  rtems_status_code   init_status;       /**< The initialization status */
144  pthread_once_t      once;
145} rtems_bdbuf_cache;
146
147typedef enum {
148  RTEMS_BDBUF_FATAL_CACHE_WAIT_2,
149  RTEMS_BDBUF_FATAL_CACHE_WAIT_TO,
150  RTEMS_BDBUF_FATAL_CACHE_WAKE,
151  RTEMS_BDBUF_FATAL_PREEMPT_DIS,
152  RTEMS_BDBUF_FATAL_PREEMPT_RST,
153  RTEMS_BDBUF_FATAL_RA_WAKE_UP,
154  RTEMS_BDBUF_FATAL_RECYCLE,
155  RTEMS_BDBUF_FATAL_SO_WAKE_1,
156  RTEMS_BDBUF_FATAL_SO_WAKE_2,
157  RTEMS_BDBUF_FATAL_STATE_0,
158  RTEMS_BDBUF_FATAL_STATE_2,
159  RTEMS_BDBUF_FATAL_STATE_4,
160  RTEMS_BDBUF_FATAL_STATE_5,
161  RTEMS_BDBUF_FATAL_STATE_6,
162  RTEMS_BDBUF_FATAL_STATE_7,
163  RTEMS_BDBUF_FATAL_STATE_8,
164  RTEMS_BDBUF_FATAL_STATE_9,
165  RTEMS_BDBUF_FATAL_STATE_10,
166  RTEMS_BDBUF_FATAL_STATE_11,
167  RTEMS_BDBUF_FATAL_SWAPOUT_RE,
168  RTEMS_BDBUF_FATAL_TREE_RM,
169  RTEMS_BDBUF_FATAL_WAIT_EVNT,
170  RTEMS_BDBUF_FATAL_WAIT_TRANS_EVNT
171} rtems_bdbuf_fatal_code;
172
173/**
174 * The events used in this code. These should be system events rather than
175 * application events.
176 */
177#define RTEMS_BDBUF_SWAPOUT_SYNC   RTEMS_EVENT_2
178#define RTEMS_BDBUF_READ_AHEAD_WAKE_UP RTEMS_EVENT_1
179
180static rtems_task rtems_bdbuf_swapout_task(rtems_task_argument arg);
181
182static rtems_task rtems_bdbuf_read_ahead_task(rtems_task_argument arg);
183
184/**
185 * The Buffer Descriptor cache.
186 */
187static rtems_bdbuf_cache bdbuf_cache = {
188  .lock = RTEMS_MUTEX_INITIALIZER(NULL),
189  .sync_lock = RTEMS_MUTEX_INITIALIZER(NULL),
190  .access_waiters = { .cond_var = RTEMS_CONDITION_VARIABLE_INITIALIZER(NULL) },
191  .transfer_waiters = {
192    .cond_var = RTEMS_CONDITION_VARIABLE_INITIALIZER(NULL)
193  },
194  .buffer_waiters = { .cond_var = RTEMS_CONDITION_VARIABLE_INITIALIZER(NULL) },
195  .once = PTHREAD_ONCE_INIT
196};
197
198#if RTEMS_BDBUF_TRACE
199/**
200 * If true output the trace message.
201 */
202bool rtems_bdbuf_tracer;
203
204/**
205 * Return the number of items on the list.
206 *
207 * @param list The chain control.
208 * @return uint32_t The number of items on the list.
209 */
210uint32_t
211rtems_bdbuf_list_count (rtems_chain_control* list)
212{
213  rtems_chain_node* node = rtems_chain_first (list);
214  uint32_t          count = 0;
215  while (!rtems_chain_is_tail (list, node))
216  {
217    count++;
218    node = rtems_chain_next (node);
219  }
220  return count;
221}
222
223/**
224 * Show the usage for the bdbuf cache.
225 */
226void
227rtems_bdbuf_show_usage (void)
228{
229  uint32_t group;
230  uint32_t total = 0;
231  uint32_t val;
232
233  for (group = 0; group < bdbuf_cache.group_count; group++)
234    total += bdbuf_cache.groups[group].users;
235  printf ("bdbuf:group users=%lu", total);
236  val = rtems_bdbuf_list_count (&bdbuf_cache.lru);
237  printf (", lru=%lu", val);
238  total = val;
239  val = rtems_bdbuf_list_count (&bdbuf_cache.modified);
240  printf (", mod=%lu", val);
241  total += val;
242  val = rtems_bdbuf_list_count (&bdbuf_cache.sync);
243  printf (", sync=%lu", val);
244  total += val;
245  printf (", total=%lu\n", total);
246}
247
248/**
249 * Show the users for a group of a bd.
250 *
251 * @param where A label to show the context of output.
252 * @param bd The bd to show the users of.
253 */
254void
255rtems_bdbuf_show_users (const char* where, rtems_bdbuf_buffer* bd)
256{
257  const char* states[] =
258    { "FR", "EM", "CH", "AC", "AM", "AE", "AP", "MD", "SY", "TR", "TP" };
259
260  printf ("bdbuf:users: %15s: [%" PRIu32 " (%s)] %td:%td = %" PRIu32 " %s\n",
261          where,
262          bd->block, states[bd->state],
263          bd->group - bdbuf_cache.groups,
264          bd - bdbuf_cache.bds,
265          bd->group->users,
266          bd->group->users > 8 ? "<<<<<<<" : "");
267}
268#else
269#define rtems_bdbuf_tracer (0)
270#define rtems_bdbuf_show_usage() ((void) 0)
271#define rtems_bdbuf_show_users(_w, _b) ((void) 0)
272#endif
273
274/**
275 * The default maximum height of 32 allows for AVL trees having between
276 * 5,704,880 and 4,294,967,295 nodes, depending on order of insertion.  You may
277 * change this compile-time constant as you wish.
278 */
279#ifndef RTEMS_BDBUF_AVL_MAX_HEIGHT
280#define RTEMS_BDBUF_AVL_MAX_HEIGHT (32)
281#endif
282
283static void
284rtems_bdbuf_fatal (rtems_fatal_code error)
285{
286  rtems_fatal (RTEMS_FATAL_SOURCE_BDBUF, error);
287}
288
289static void
290rtems_bdbuf_fatal_with_state (rtems_bdbuf_buf_state state,
291                              rtems_bdbuf_fatal_code error)
292{
293  rtems_bdbuf_fatal ((((uint32_t) state) << 16) | error);
294}
295
296/**
297 * Searches for the node with specified dd/block.
298 *
299 * @param root pointer to the root node of the AVL-Tree
300 * @param dd disk device search key
301 * @param block block search key
302 * @retval NULL node with the specified dd/block is not found
303 * @return pointer to the node with specified dd/block
304 */
305static rtems_bdbuf_buffer *
306rtems_bdbuf_avl_search (rtems_bdbuf_buffer** root,
307                        const rtems_disk_device *dd,
308                        rtems_blkdev_bnum    block)
309{
310  rtems_bdbuf_buffer* p = *root;
311
312  while ((p != NULL) && ((p->dd != dd) || (p->block != block)))
313  {
314    if (((uintptr_t) p->dd < (uintptr_t) dd)
315        || ((p->dd == dd) && (p->block < block)))
316    {
317      p = p->avl.right;
318    }
319    else
320    {
321      p = p->avl.left;
322    }
323  }
324
325  return p;
326}
327
328/**
329 * Inserts the specified node to the AVl-Tree.
330 *
331 * @param root pointer to the root node of the AVL-Tree
332 * @param node Pointer to the node to add.
333 * @retval 0 The node added successfully
334 * @retval -1 An error occured
335 */
336static int
337rtems_bdbuf_avl_insert(rtems_bdbuf_buffer** root,
338                       rtems_bdbuf_buffer*  node)
339{
340  const rtems_disk_device *dd = node->dd;
341  rtems_blkdev_bnum block = node->block;
342
343  rtems_bdbuf_buffer*  p = *root;
344  rtems_bdbuf_buffer*  q;
345  rtems_bdbuf_buffer*  p1;
346  rtems_bdbuf_buffer*  p2;
347  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
348  rtems_bdbuf_buffer** buf_prev = buf_stack;
349
350  bool modified = false;
351
352  if (p == NULL)
353  {
354    *root = node;
355    node->avl.left = NULL;
356    node->avl.right = NULL;
357    node->avl.bal = 0;
358    return 0;
359  }
360
361  while (p != NULL)
362  {
363    *buf_prev++ = p;
364
365    if (((uintptr_t) p->dd < (uintptr_t) dd)
366        || ((p->dd == dd) && (p->block < block)))
367    {
368      p->avl.cache = 1;
369      q = p->avl.right;
370      if (q == NULL)
371      {
372        q = node;
373        p->avl.right = q = node;
374        break;
375      }
376    }
377    else if ((p->dd != dd) || (p->block != block))
378    {
379      p->avl.cache = -1;
380      q = p->avl.left;
381      if (q == NULL)
382      {
383        q = node;
384        p->avl.left = q;
385        break;
386      }
387    }
388    else
389    {
390      return -1;
391    }
392
393    p = q;
394  }
395
396  q->avl.left = q->avl.right = NULL;
397  q->avl.bal = 0;
398  modified = true;
399  buf_prev--;
400
401  while (modified)
402  {
403    if (p->avl.cache == -1)
404    {
405      switch (p->avl.bal)
406      {
407        case 1:
408          p->avl.bal = 0;
409          modified = false;
410          break;
411
412        case 0:
413          p->avl.bal = -1;
414          break;
415
416        case -1:
417          p1 = p->avl.left;
418          if (p1->avl.bal == -1) /* simple LL-turn */
419          {
420            p->avl.left = p1->avl.right;
421            p1->avl.right = p;
422            p->avl.bal = 0;
423            p = p1;
424          }
425          else /* double LR-turn */
426          {
427            p2 = p1->avl.right;
428            p1->avl.right = p2->avl.left;
429            p2->avl.left = p1;
430            p->avl.left = p2->avl.right;
431            p2->avl.right = p;
432            if (p2->avl.bal == -1) p->avl.bal = +1; else p->avl.bal = 0;
433            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
434            p = p2;
435          }
436          p->avl.bal = 0;
437          modified = false;
438          break;
439
440        default:
441          break;
442      }
443    }
444    else
445    {
446      switch (p->avl.bal)
447      {
448        case -1:
449          p->avl.bal = 0;
450          modified = false;
451          break;
452
453        case 0:
454          p->avl.bal = 1;
455          break;
456
457        case 1:
458          p1 = p->avl.right;
459          if (p1->avl.bal == 1) /* simple RR-turn */
460          {
461            p->avl.right = p1->avl.left;
462            p1->avl.left = p;
463            p->avl.bal = 0;
464            p = p1;
465          }
466          else /* double RL-turn */
467          {
468            p2 = p1->avl.left;
469            p1->avl.left = p2->avl.right;
470            p2->avl.right = p1;
471            p->avl.right = p2->avl.left;
472            p2->avl.left = p;
473            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
474            if (p2->avl.bal == -1) p1->avl.bal = +1; else p1->avl.bal = 0;
475            p = p2;
476          }
477          p->avl.bal = 0;
478          modified = false;
479          break;
480
481        default:
482          break;
483      }
484    }
485    q = p;
486    if (buf_prev > buf_stack)
487    {
488      p = *--buf_prev;
489
490      if (p->avl.cache == -1)
491      {
492        p->avl.left = q;
493      }
494      else
495      {
496        p->avl.right = q;
497      }
498    }
499    else
500    {
501      *root = p;
502      break;
503    }
504  };
505
506  return 0;
507}
508
509
510/**
511 * Removes the node from the tree.
512 *
513 * @param root Pointer to pointer to the root node
514 * @param node Pointer to the node to remove
515 * @retval 0 Item removed
516 * @retval -1 No such item found
517 */
518static int
519rtems_bdbuf_avl_remove(rtems_bdbuf_buffer**      root,
520                       const rtems_bdbuf_buffer* node)
521{
522  const rtems_disk_device *dd = node->dd;
523  rtems_blkdev_bnum block = node->block;
524
525  rtems_bdbuf_buffer*  p = *root;
526  rtems_bdbuf_buffer*  q;
527  rtems_bdbuf_buffer*  r;
528  rtems_bdbuf_buffer*  s;
529  rtems_bdbuf_buffer*  p1;
530  rtems_bdbuf_buffer*  p2;
531  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
532  rtems_bdbuf_buffer** buf_prev = buf_stack;
533
534  bool modified = false;
535
536  memset (buf_stack, 0, sizeof(buf_stack));
537
538  while (p != NULL)
539  {
540    *buf_prev++ = p;
541
542    if (((uintptr_t) p->dd < (uintptr_t) dd)
543        || ((p->dd == dd) && (p->block < block)))
544    {
545      p->avl.cache = 1;
546      p = p->avl.right;
547    }
548    else if ((p->dd != dd) || (p->block != block))
549    {
550      p->avl.cache = -1;
551      p = p->avl.left;
552    }
553    else
554    {
555      /* node found */
556      break;
557    }
558  }
559
560  if (p == NULL)
561  {
562    /* there is no such node */
563    return -1;
564  }
565
566  q = p;
567
568  buf_prev--;
569  if (buf_prev > buf_stack)
570  {
571    p = *(buf_prev - 1);
572  }
573  else
574  {
575    p = NULL;
576  }
577
578  /* at this moment q - is a node to delete, p is q's parent */
579  if (q->avl.right == NULL)
580  {
581    r = q->avl.left;
582    if (r != NULL)
583    {
584      r->avl.bal = 0;
585    }
586    q = r;
587  }
588  else
589  {
590    rtems_bdbuf_buffer **t;
591
592    r = q->avl.right;
593
594    if (r->avl.left == NULL)
595    {
596      r->avl.left = q->avl.left;
597      r->avl.bal = q->avl.bal;
598      r->avl.cache = 1;
599      *buf_prev++ = q = r;
600    }
601    else
602    {
603      t = buf_prev++;
604      s = r;
605
606      while (s->avl.left != NULL)
607      {
608        *buf_prev++ = r = s;
609        s = r->avl.left;
610        r->avl.cache = -1;
611      }
612
613      s->avl.left = q->avl.left;
614      r->avl.left = s->avl.right;
615      s->avl.right = q->avl.right;
616      s->avl.bal = q->avl.bal;
617      s->avl.cache = 1;
618
619      *t = q = s;
620    }
621  }
622
623  if (p != NULL)
624  {
625    if (p->avl.cache == -1)
626    {
627      p->avl.left = q;
628    }
629    else
630    {
631      p->avl.right = q;
632    }
633  }
634  else
635  {
636    *root = q;
637  }
638
639  modified = true;
640
641  while (modified)
642  {
643    if (buf_prev > buf_stack)
644    {
645      p = *--buf_prev;
646    }
647    else
648    {
649      break;
650    }
651
652    if (p->avl.cache == -1)
653    {
654      /* rebalance left branch */
655      switch (p->avl.bal)
656      {
657        case -1:
658          p->avl.bal = 0;
659          break;
660        case  0:
661          p->avl.bal = 1;
662          modified = false;
663          break;
664
665        case +1:
666          p1 = p->avl.right;
667
668          if (p1->avl.bal >= 0) /* simple RR-turn */
669          {
670            p->avl.right = p1->avl.left;
671            p1->avl.left = p;
672
673            if (p1->avl.bal == 0)
674            {
675              p1->avl.bal = -1;
676              modified = false;
677            }
678            else
679            {
680              p->avl.bal = 0;
681              p1->avl.bal = 0;
682            }
683            p = p1;
684          }
685          else /* double RL-turn */
686          {
687            p2 = p1->avl.left;
688
689            p1->avl.left = p2->avl.right;
690            p2->avl.right = p1;
691            p->avl.right = p2->avl.left;
692            p2->avl.left = p;
693
694            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
695            if (p2->avl.bal == -1) p1->avl.bal = 1; else p1->avl.bal = 0;
696
697            p = p2;
698            p2->avl.bal = 0;
699          }
700          break;
701
702        default:
703          break;
704      }
705    }
706    else
707    {
708      /* rebalance right branch */
709      switch (p->avl.bal)
710      {
711        case +1:
712          p->avl.bal = 0;
713          break;
714
715        case  0:
716          p->avl.bal = -1;
717          modified = false;
718          break;
719
720        case -1:
721          p1 = p->avl.left;
722
723          if (p1->avl.bal <= 0) /* simple LL-turn */
724          {
725            p->avl.left = p1->avl.right;
726            p1->avl.right = p;
727            if (p1->avl.bal == 0)
728            {
729              p1->avl.bal = 1;
730              modified = false;
731            }
732            else
733            {
734              p->avl.bal = 0;
735              p1->avl.bal = 0;
736            }
737            p = p1;
738          }
739          else /* double LR-turn */
740          {
741            p2 = p1->avl.right;
742
743            p1->avl.right = p2->avl.left;
744            p2->avl.left = p1;
745            p->avl.left = p2->avl.right;
746            p2->avl.right = p;
747
748            if (p2->avl.bal == -1) p->avl.bal = 1; else p->avl.bal = 0;
749            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
750
751            p = p2;
752            p2->avl.bal = 0;
753          }
754          break;
755
756        default:
757          break;
758      }
759    }
760
761    if (buf_prev > buf_stack)
762    {
763      q = *(buf_prev - 1);
764
765      if (q->avl.cache == -1)
766      {
767        q->avl.left = p;
768      }
769      else
770      {
771        q->avl.right = p;
772      }
773    }
774    else
775    {
776      *root = p;
777      break;
778    }
779
780  }
781
782  return 0;
783}
784
785static void
786rtems_bdbuf_set_state (rtems_bdbuf_buffer *bd, rtems_bdbuf_buf_state state)
787{
788  bd->state = state;
789}
790
791static rtems_blkdev_bnum
792rtems_bdbuf_media_block (const rtems_disk_device *dd, rtems_blkdev_bnum block)
793{
794  if (dd->block_to_media_block_shift >= 0)
795    return block << dd->block_to_media_block_shift;
796  else
797    /*
798     * Change the block number for the block size to the block number for the media
799     * block size. We have to use 64bit maths. There is no short cut here.
800     */
801    return (rtems_blkdev_bnum)
802      ((((uint64_t) block) * dd->block_size) / dd->media_block_size);
803}
804
805/**
806 * Lock the mutex. A single task can nest calls.
807 *
808 * @param lock The mutex to lock.
809 */
810static void
811rtems_bdbuf_lock (rtems_mutex *lock)
812{
813  rtems_mutex_lock (lock);
814}
815
816/**
817 * Unlock the mutex.
818 *
819 * @param lock The mutex to unlock.
820 */
821static void
822rtems_bdbuf_unlock (rtems_mutex *lock)
823{
824  rtems_mutex_unlock (lock);
825}
826
827/**
828 * Lock the cache. A single task can nest calls.
829 */
830static void
831rtems_bdbuf_lock_cache (void)
832{
833  rtems_bdbuf_lock (&bdbuf_cache.lock);
834}
835
836/**
837 * Unlock the cache.
838 */
839static void
840rtems_bdbuf_unlock_cache (void)
841{
842  rtems_bdbuf_unlock (&bdbuf_cache.lock);
843}
844
845/**
846 * Lock the cache's sync. A single task can nest calls.
847 */
848static void
849rtems_bdbuf_lock_sync (void)
850{
851  rtems_bdbuf_lock (&bdbuf_cache.sync_lock);
852}
853
854/**
855 * Unlock the cache's sync lock. Any blocked writers are woken.
856 */
857static void
858rtems_bdbuf_unlock_sync (void)
859{
860  rtems_bdbuf_unlock (&bdbuf_cache.sync_lock);
861}
862
863static void
864rtems_bdbuf_group_obtain (rtems_bdbuf_buffer *bd)
865{
866  ++bd->group->users;
867}
868
869static void
870rtems_bdbuf_group_release (rtems_bdbuf_buffer *bd)
871{
872  --bd->group->users;
873}
874
875/**
876 * Wait until woken. Semaphores are used so a number of tasks can wait and can
877 * be woken at once. Task events would require we maintain a list of tasks to
878 * be woken and this would require storage and we do not know the number of
879 * tasks that could be waiting.
880 *
881 * While we have the cache locked we can try and claim the semaphore and
882 * therefore know when we release the lock to the cache we will block until the
883 * semaphore is released. This may even happen before we get to block.
884 *
885 * A counter is used to save the release call when no one is waiting.
886 *
887 * The function assumes the cache is locked on entry and it will be locked on
888 * exit.
889 */
890static void
891rtems_bdbuf_anonymous_wait (rtems_bdbuf_waiters *waiters)
892{
893  /*
894   * Indicate we are waiting.
895   */
896  ++waiters->count;
897
898  rtems_condition_variable_wait (&waiters->cond_var, &bdbuf_cache.lock);
899
900  --waiters->count;
901}
902
903static void
904rtems_bdbuf_wait (rtems_bdbuf_buffer *bd, rtems_bdbuf_waiters *waiters)
905{
906  rtems_bdbuf_group_obtain (bd);
907  ++bd->waiters;
908  rtems_bdbuf_anonymous_wait (waiters);
909  --bd->waiters;
910  rtems_bdbuf_group_release (bd);
911}
912
913/**
914 * Wake a blocked resource. The resource has a counter that lets us know if
915 * there are any waiters.
916 */
917static void
918rtems_bdbuf_wake (rtems_bdbuf_waiters *waiters)
919{
920  if (waiters->count > 0)
921  {
922    rtems_condition_variable_broadcast (&waiters->cond_var);
923  }
924}
925
926static void
927rtems_bdbuf_wake_swapper (void)
928{
929  rtems_status_code sc = rtems_event_send (bdbuf_cache.swapout,
930                                           RTEMS_BDBUF_SWAPOUT_SYNC);
931  if (sc != RTEMS_SUCCESSFUL)
932    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_SO_WAKE_1);
933}
934
935static bool
936rtems_bdbuf_has_buffer_waiters (void)
937{
938  return bdbuf_cache.buffer_waiters.count;
939}
940
941static void
942rtems_bdbuf_remove_from_tree (rtems_bdbuf_buffer *bd)
943{
944  if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
945    rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_TREE_RM);
946}
947
948static void
949rtems_bdbuf_remove_from_tree_and_lru_list (rtems_bdbuf_buffer *bd)
950{
951  switch (bd->state)
952  {
953    case RTEMS_BDBUF_STATE_FREE:
954      break;
955    case RTEMS_BDBUF_STATE_CACHED:
956      rtems_bdbuf_remove_from_tree (bd);
957      break;
958    default:
959      rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_10);
960  }
961
962  rtems_chain_extract_unprotected (&bd->link);
963}
964
965static void
966rtems_bdbuf_make_free_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
967{
968  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_FREE);
969  rtems_chain_prepend_unprotected (&bdbuf_cache.lru, &bd->link);
970}
971
972static void
973rtems_bdbuf_make_empty (rtems_bdbuf_buffer *bd)
974{
975  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_EMPTY);
976}
977
978static void
979rtems_bdbuf_make_cached_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
980{
981  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_CACHED);
982  rtems_chain_append_unprotected (&bdbuf_cache.lru, &bd->link);
983}
984
985static void
986rtems_bdbuf_discard_buffer (rtems_bdbuf_buffer *bd)
987{
988  rtems_bdbuf_make_empty (bd);
989
990  if (bd->waiters == 0)
991  {
992    rtems_bdbuf_remove_from_tree (bd);
993    rtems_bdbuf_make_free_and_add_to_lru_list (bd);
994  }
995}
996
997static void
998rtems_bdbuf_add_to_modified_list_after_access (rtems_bdbuf_buffer *bd)
999{
1000  if (bdbuf_cache.sync_active && bdbuf_cache.sync_device == bd->dd)
1001  {
1002    rtems_bdbuf_unlock_cache ();
1003
1004    /*
1005     * Wait for the sync lock.
1006     */
1007    rtems_bdbuf_lock_sync ();
1008
1009    rtems_bdbuf_unlock_sync ();
1010    rtems_bdbuf_lock_cache ();
1011  }
1012
1013  /*
1014   * Only the first modified release sets the timer and any further user
1015   * accesses do not change the timer value which should move down. This
1016   * assumes the user's hold of the buffer is much less than the time on the
1017   * modified list. Resetting the timer on each access which could result in a
1018   * buffer never getting to 0 and never being forced onto disk. This raises a
1019   * difficult question. Is a snapshot of a block that is changing better than
1020   * nothing being written? We have tended to think we should hold changes for
1021   * only a specific period of time even if still changing and get onto disk
1022   * and letting the file system try and recover this position if it can.
1023   */
1024  if (bd->state == RTEMS_BDBUF_STATE_ACCESS_CACHED
1025        || bd->state == RTEMS_BDBUF_STATE_ACCESS_EMPTY)
1026    bd->hold_timer = bdbuf_config.swap_block_hold;
1027
1028  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_MODIFIED);
1029  rtems_chain_append_unprotected (&bdbuf_cache.modified, &bd->link);
1030
1031  if (bd->waiters)
1032    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1033  else if (rtems_bdbuf_has_buffer_waiters ())
1034    rtems_bdbuf_wake_swapper ();
1035}
1036
1037static void
1038rtems_bdbuf_add_to_lru_list_after_access (rtems_bdbuf_buffer *bd)
1039{
1040  rtems_bdbuf_group_release (bd);
1041  rtems_bdbuf_make_cached_and_add_to_lru_list (bd);
1042
1043  if (bd->waiters)
1044    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1045  else
1046    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1047}
1048
1049/**
1050 * Compute the number of BDs per group for a given buffer size.
1051 *
1052 * @param size The buffer size. It can be any size and we scale up.
1053 */
1054static size_t
1055rtems_bdbuf_bds_per_group (size_t size)
1056{
1057  size_t bufs_per_size;
1058  size_t bds_per_size;
1059
1060  if (size > bdbuf_config.buffer_max)
1061    return 0;
1062
1063  bufs_per_size = ((size - 1) / bdbuf_config.buffer_min) + 1;
1064
1065  for (bds_per_size = 1;
1066       bds_per_size < bufs_per_size;
1067       bds_per_size <<= 1)
1068    ;
1069
1070  return bdbuf_cache.max_bds_per_group / bds_per_size;
1071}
1072
1073static void
1074rtems_bdbuf_discard_buffer_after_access (rtems_bdbuf_buffer *bd)
1075{
1076  rtems_bdbuf_group_release (bd);
1077  rtems_bdbuf_discard_buffer (bd);
1078
1079  if (bd->waiters)
1080    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1081  else
1082    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1083}
1084
1085/**
1086 * Reallocate a group. The BDs currently allocated in the group are removed
1087 * from the ALV tree and any lists then the new BD's are prepended to the ready
1088 * list of the cache.
1089 *
1090 * @param group The group to reallocate.
1091 * @param new_bds_per_group The new count of BDs per group.
1092 * @return A buffer of this group.
1093 */
1094static rtems_bdbuf_buffer *
1095rtems_bdbuf_group_realloc (rtems_bdbuf_group* group, size_t new_bds_per_group)
1096{
1097  rtems_bdbuf_buffer* bd;
1098  size_t              b;
1099  size_t              bufs_per_bd;
1100
1101  if (rtems_bdbuf_tracer)
1102    printf ("bdbuf:realloc: %tu: %zd -> %zd\n",
1103            group - bdbuf_cache.groups, group->bds_per_group,
1104            new_bds_per_group);
1105
1106  bufs_per_bd = bdbuf_cache.max_bds_per_group / group->bds_per_group;
1107
1108  for (b = 0, bd = group->bdbuf;
1109       b < group->bds_per_group;
1110       b++, bd += bufs_per_bd)
1111    rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1112
1113  group->bds_per_group = new_bds_per_group;
1114  bufs_per_bd = bdbuf_cache.max_bds_per_group / new_bds_per_group;
1115
1116  for (b = 1, bd = group->bdbuf + bufs_per_bd;
1117       b < group->bds_per_group;
1118       b++, bd += bufs_per_bd)
1119    rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1120
1121  if (b > 1)
1122    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1123
1124  return group->bdbuf;
1125}
1126
1127static void
1128rtems_bdbuf_setup_empty_buffer (rtems_bdbuf_buffer *bd,
1129                                rtems_disk_device  *dd,
1130                                rtems_blkdev_bnum   block)
1131{
1132  bd->dd        = dd ;
1133  bd->block     = block;
1134  bd->avl.left  = NULL;
1135  bd->avl.right = NULL;
1136  bd->waiters   = 0;
1137
1138  if (rtems_bdbuf_avl_insert (&bdbuf_cache.tree, bd) != 0)
1139    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_RECYCLE);
1140
1141  rtems_bdbuf_make_empty (bd);
1142}
1143
1144static rtems_bdbuf_buffer *
1145rtems_bdbuf_get_buffer_from_lru_list (rtems_disk_device *dd,
1146                                      rtems_blkdev_bnum  block)
1147{
1148  rtems_chain_node *node = rtems_chain_first (&bdbuf_cache.lru);
1149
1150  while (!rtems_chain_is_tail (&bdbuf_cache.lru, node))
1151  {
1152    rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
1153    rtems_bdbuf_buffer *empty_bd = NULL;
1154
1155    if (rtems_bdbuf_tracer)
1156      printf ("bdbuf:next-bd: %tu (%td:%" PRId32 ") %zd -> %zd\n",
1157              bd - bdbuf_cache.bds,
1158              bd->group - bdbuf_cache.groups, bd->group->users,
1159              bd->group->bds_per_group, dd->bds_per_group);
1160
1161    /*
1162     * If nobody waits for this BD, we may recycle it.
1163     */
1164    if (bd->waiters == 0)
1165    {
1166      if (bd->group->bds_per_group == dd->bds_per_group)
1167      {
1168        rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1169
1170        empty_bd = bd;
1171      }
1172      else if (bd->group->users == 0)
1173        empty_bd = rtems_bdbuf_group_realloc (bd->group, dd->bds_per_group);
1174    }
1175
1176    if (empty_bd != NULL)
1177    {
1178      rtems_bdbuf_setup_empty_buffer (empty_bd, dd, block);
1179
1180      return empty_bd;
1181    }
1182
1183    node = rtems_chain_next (node);
1184  }
1185
1186  return NULL;
1187}
1188
1189static rtems_status_code
1190rtems_bdbuf_create_task(
1191  rtems_name name,
1192  rtems_task_priority priority,
1193  rtems_task_priority default_priority,
1194  rtems_id *id
1195)
1196{
1197  rtems_status_code sc;
1198  size_t stack_size = bdbuf_config.task_stack_size ?
1199    bdbuf_config.task_stack_size : RTEMS_BDBUF_TASK_STACK_SIZE_DEFAULT;
1200
1201  priority = priority != 0 ? priority : default_priority;
1202
1203  sc = rtems_task_create (name,
1204                          priority,
1205                          stack_size,
1206                          RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
1207                          RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT,
1208                          id);
1209
1210  return sc;
1211}
1212
1213static rtems_bdbuf_swapout_transfer*
1214rtems_bdbuf_swapout_transfer_alloc (void)
1215{
1216  /*
1217   * @note chrisj The rtems_blkdev_request and the array at the end is a hack.
1218   * I am disappointment at finding code like this in RTEMS. The request should
1219   * have been a rtems_chain_control. Simple, fast and less storage as the node
1220   * is already part of the buffer structure.
1221   */
1222  size_t transfer_size = sizeof (rtems_bdbuf_swapout_transfer)
1223    + (bdbuf_config.max_write_blocks * sizeof (rtems_blkdev_sg_buffer));
1224  return calloc (1, transfer_size);
1225}
1226
1227static void
1228rtems_bdbuf_transfer_done (rtems_blkdev_request* req, rtems_status_code status);
1229
1230static void
1231rtems_bdbuf_swapout_transfer_init (rtems_bdbuf_swapout_transfer* transfer,
1232                                   rtems_id id)
1233{
1234  rtems_chain_initialize_empty (&transfer->bds);
1235  transfer->dd = BDBUF_INVALID_DEV;
1236  transfer->syncing = false;
1237  transfer->write_req.req = RTEMS_BLKDEV_REQ_WRITE;
1238  transfer->write_req.done = rtems_bdbuf_transfer_done;
1239  transfer->write_req.io_task = id;
1240}
1241
1242static size_t
1243rtems_bdbuf_swapout_worker_size (void)
1244{
1245  return sizeof (rtems_bdbuf_swapout_worker)
1246    + (bdbuf_config.max_write_blocks * sizeof (rtems_blkdev_sg_buffer));
1247}
1248
1249static rtems_task
1250rtems_bdbuf_swapout_worker_task (rtems_task_argument arg);
1251
1252static rtems_status_code
1253rtems_bdbuf_swapout_workers_create (void)
1254{
1255  rtems_status_code  sc;
1256  size_t             w;
1257  size_t             worker_size;
1258  char              *worker_current;
1259
1260  worker_size = rtems_bdbuf_swapout_worker_size ();
1261  worker_current = calloc (1, bdbuf_config.swapout_workers * worker_size);
1262  sc = worker_current != NULL ? RTEMS_SUCCESSFUL : RTEMS_NO_MEMORY;
1263
1264  bdbuf_cache.swapout_workers = (rtems_bdbuf_swapout_worker *) worker_current;
1265
1266  for (w = 0;
1267       sc == RTEMS_SUCCESSFUL && w < bdbuf_config.swapout_workers;
1268       w++, worker_current += worker_size)
1269  {
1270    rtems_bdbuf_swapout_worker *worker = (rtems_bdbuf_swapout_worker *) worker_current;
1271
1272    sc = rtems_bdbuf_create_task (rtems_build_name('B', 'D', 'o', 'a' + w),
1273                                  bdbuf_config.swapout_worker_priority,
1274                                  RTEMS_BDBUF_SWAPOUT_WORKER_TASK_PRIORITY_DEFAULT,
1275                                  &worker->id);
1276    if (sc == RTEMS_SUCCESSFUL)
1277    {
1278      rtems_bdbuf_swapout_transfer_init (&worker->transfer, worker->id);
1279
1280      rtems_chain_append_unprotected (&bdbuf_cache.swapout_free_workers, &worker->link);
1281      worker->enabled = true;
1282
1283      sc = rtems_task_start (worker->id,
1284                             rtems_bdbuf_swapout_worker_task,
1285                             (rtems_task_argument) worker);
1286    }
1287  }
1288
1289  return sc;
1290}
1291
1292static size_t
1293rtems_bdbuf_read_request_size (uint32_t transfer_count)
1294{
1295  return sizeof (rtems_blkdev_request)
1296    + sizeof (rtems_blkdev_sg_buffer) * transfer_count;
1297}
1298
1299static rtems_status_code
1300rtems_bdbuf_do_init (void)
1301{
1302  rtems_bdbuf_group*  group;
1303  rtems_bdbuf_buffer* bd;
1304  uint8_t*            buffer;
1305  size_t              b;
1306  rtems_status_code   sc;
1307
1308  if (rtems_bdbuf_tracer)
1309    printf ("bdbuf:init\n");
1310
1311  if (rtems_interrupt_is_in_progress())
1312    return RTEMS_CALLED_FROM_ISR;
1313
1314  /*
1315   * Check the configuration table values.
1316   */
1317
1318  if ((bdbuf_config.buffer_max % bdbuf_config.buffer_min) != 0)
1319    return RTEMS_INVALID_NUMBER;
1320
1321  if (rtems_bdbuf_read_request_size (bdbuf_config.max_read_ahead_blocks)
1322      > RTEMS_MINIMUM_STACK_SIZE / 8U)
1323    return RTEMS_INVALID_NUMBER;
1324
1325  bdbuf_cache.sync_device = BDBUF_INVALID_DEV;
1326
1327  rtems_chain_initialize_empty (&bdbuf_cache.swapout_free_workers);
1328  rtems_chain_initialize_empty (&bdbuf_cache.lru);
1329  rtems_chain_initialize_empty (&bdbuf_cache.modified);
1330  rtems_chain_initialize_empty (&bdbuf_cache.sync);
1331  rtems_chain_initialize_empty (&bdbuf_cache.read_ahead_chain);
1332
1333  rtems_mutex_set_name (&bdbuf_cache.lock, "bdbuf lock");
1334  rtems_mutex_set_name (&bdbuf_cache.sync_lock, "bdbuf sync lock");
1335  rtems_condition_variable_set_name (&bdbuf_cache.access_waiters.cond_var,
1336                                     "bdbuf access");
1337  rtems_condition_variable_set_name (&bdbuf_cache.transfer_waiters.cond_var,
1338                                     "bdbuf transfer");
1339  rtems_condition_variable_set_name (&bdbuf_cache.buffer_waiters.cond_var,
1340                                     "bdbuf buffer");
1341
1342  rtems_bdbuf_lock_cache ();
1343
1344  /*
1345   * Compute the various number of elements in the cache.
1346   */
1347  bdbuf_cache.buffer_min_count =
1348    bdbuf_config.size / bdbuf_config.buffer_min;
1349  bdbuf_cache.max_bds_per_group =
1350    bdbuf_config.buffer_max / bdbuf_config.buffer_min;
1351  bdbuf_cache.group_count =
1352    bdbuf_cache.buffer_min_count / bdbuf_cache.max_bds_per_group;
1353
1354  /*
1355   * Allocate the memory for the buffer descriptors.
1356   */
1357  bdbuf_cache.bds = calloc (sizeof (rtems_bdbuf_buffer),
1358                            bdbuf_cache.buffer_min_count);
1359  if (!bdbuf_cache.bds)
1360    goto error;
1361
1362  /*
1363   * Allocate the memory for the buffer descriptors.
1364   */
1365  bdbuf_cache.groups = calloc (sizeof (rtems_bdbuf_group),
1366                               bdbuf_cache.group_count);
1367  if (!bdbuf_cache.groups)
1368    goto error;
1369
1370  /*
1371   * Allocate memory for buffer memory. The buffer memory will be cache
1372   * aligned. It is possible to free the memory allocated by
1373   * rtems_cache_aligned_malloc() with free().
1374   */
1375  bdbuf_cache.buffers = rtems_cache_aligned_malloc(bdbuf_cache.buffer_min_count
1376                                                   * bdbuf_config.buffer_min);
1377  if (bdbuf_cache.buffers == NULL)
1378    goto error;
1379
1380  /*
1381   * The cache is empty after opening so we need to add all the buffers to it
1382   * and initialise the groups.
1383   */
1384  for (b = 0, group = bdbuf_cache.groups,
1385         bd = bdbuf_cache.bds, buffer = bdbuf_cache.buffers;
1386       b < bdbuf_cache.buffer_min_count;
1387       b++, bd++, buffer += bdbuf_config.buffer_min)
1388  {
1389    bd->dd    = BDBUF_INVALID_DEV;
1390    bd->group  = group;
1391    bd->buffer = buffer;
1392
1393    rtems_chain_append_unprotected (&bdbuf_cache.lru, &bd->link);
1394
1395    if ((b % bdbuf_cache.max_bds_per_group) ==
1396        (bdbuf_cache.max_bds_per_group - 1))
1397      group++;
1398  }
1399
1400  for (b = 0,
1401         group = bdbuf_cache.groups,
1402         bd = bdbuf_cache.bds;
1403       b < bdbuf_cache.group_count;
1404       b++,
1405         group++,
1406         bd += bdbuf_cache.max_bds_per_group)
1407  {
1408    group->bds_per_group = bdbuf_cache.max_bds_per_group;
1409    group->bdbuf = bd;
1410  }
1411
1412  /*
1413   * Create and start swapout task.
1414   */
1415
1416  bdbuf_cache.swapout_transfer = rtems_bdbuf_swapout_transfer_alloc ();
1417  if (!bdbuf_cache.swapout_transfer)
1418    goto error;
1419
1420  bdbuf_cache.swapout_enabled = true;
1421
1422  sc = rtems_bdbuf_create_task (rtems_build_name('B', 'S', 'W', 'P'),
1423                                bdbuf_config.swapout_priority,
1424                                RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT,
1425                                &bdbuf_cache.swapout);
1426  if (sc != RTEMS_SUCCESSFUL)
1427    goto error;
1428
1429  rtems_bdbuf_swapout_transfer_init (bdbuf_cache.swapout_transfer, bdbuf_cache.swapout);
1430
1431  sc = rtems_task_start (bdbuf_cache.swapout,
1432                         rtems_bdbuf_swapout_task,
1433                         (rtems_task_argument) bdbuf_cache.swapout_transfer);
1434  if (sc != RTEMS_SUCCESSFUL)
1435    goto error;
1436
1437  if (bdbuf_config.swapout_workers > 0)
1438  {
1439    sc = rtems_bdbuf_swapout_workers_create ();
1440    if (sc != RTEMS_SUCCESSFUL)
1441      goto error;
1442  }
1443
1444  if (bdbuf_config.max_read_ahead_blocks > 0)
1445  {
1446    bdbuf_cache.read_ahead_enabled = true;
1447    sc = rtems_bdbuf_create_task (rtems_build_name('B', 'R', 'D', 'A'),
1448                                  bdbuf_config.read_ahead_priority,
1449                                  RTEMS_BDBUF_READ_AHEAD_TASK_PRIORITY_DEFAULT,
1450                                  &bdbuf_cache.read_ahead_task);
1451    if (sc != RTEMS_SUCCESSFUL)
1452      goto error;
1453
1454    sc = rtems_task_start (bdbuf_cache.read_ahead_task,
1455                           rtems_bdbuf_read_ahead_task,
1456                           0);
1457    if (sc != RTEMS_SUCCESSFUL)
1458      goto error;
1459  }
1460
1461  rtems_bdbuf_unlock_cache ();
1462
1463  return RTEMS_SUCCESSFUL;
1464
1465error:
1466
1467  if (bdbuf_cache.read_ahead_task != 0)
1468    rtems_task_delete (bdbuf_cache.read_ahead_task);
1469
1470  if (bdbuf_cache.swapout != 0)
1471    rtems_task_delete (bdbuf_cache.swapout);
1472
1473  if (bdbuf_cache.swapout_workers)
1474  {
1475    char   *worker_current = (char *) bdbuf_cache.swapout_workers;
1476    size_t  worker_size = rtems_bdbuf_swapout_worker_size ();
1477    size_t  w;
1478
1479    for (w = 0;
1480         w < bdbuf_config.swapout_workers;
1481         w++, worker_current += worker_size)
1482    {
1483      rtems_bdbuf_swapout_worker *worker = (rtems_bdbuf_swapout_worker *) worker_current;
1484
1485      if (worker->id != 0) {
1486        rtems_task_delete (worker->id);
1487      }
1488    }
1489  }
1490
1491  free (bdbuf_cache.buffers);
1492  free (bdbuf_cache.groups);
1493  free (bdbuf_cache.bds);
1494  free (bdbuf_cache.swapout_transfer);
1495  free (bdbuf_cache.swapout_workers);
1496
1497  rtems_bdbuf_unlock_cache ();
1498
1499  return RTEMS_UNSATISFIED;
1500}
1501
1502static void
1503rtems_bdbuf_init_once (void)
1504{
1505  bdbuf_cache.init_status = rtems_bdbuf_do_init();
1506}
1507
1508rtems_status_code
1509rtems_bdbuf_init (void)
1510{
1511  int eno;
1512
1513  eno = pthread_once (&bdbuf_cache.once, rtems_bdbuf_init_once);
1514  _Assert (eno == 0);
1515  (void) eno;
1516
1517  return bdbuf_cache.init_status;
1518}
1519
1520static void
1521rtems_bdbuf_wait_for_event (rtems_event_set event)
1522{
1523  rtems_status_code sc = RTEMS_SUCCESSFUL;
1524  rtems_event_set   out = 0;
1525
1526  sc = rtems_event_receive (event,
1527                            RTEMS_EVENT_ALL | RTEMS_WAIT,
1528                            RTEMS_NO_TIMEOUT,
1529                            &out);
1530
1531  if (sc != RTEMS_SUCCESSFUL || out != event)
1532    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_WAIT_EVNT);
1533}
1534
1535static void
1536rtems_bdbuf_wait_for_transient_event (void)
1537{
1538  rtems_status_code sc = RTEMS_SUCCESSFUL;
1539
1540  sc = rtems_event_transient_receive (RTEMS_WAIT, RTEMS_NO_TIMEOUT);
1541  if (sc != RTEMS_SUCCESSFUL)
1542    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_WAIT_TRANS_EVNT);
1543}
1544
1545static void
1546rtems_bdbuf_wait_for_access (rtems_bdbuf_buffer *bd)
1547{
1548  while (true)
1549  {
1550    switch (bd->state)
1551    {
1552      case RTEMS_BDBUF_STATE_MODIFIED:
1553        rtems_bdbuf_group_release (bd);
1554        /* Fall through */
1555      case RTEMS_BDBUF_STATE_CACHED:
1556        rtems_chain_extract_unprotected (&bd->link);
1557        /* Fall through */
1558      case RTEMS_BDBUF_STATE_EMPTY:
1559        return;
1560      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1561      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1562      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1563      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1564        rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
1565        break;
1566      case RTEMS_BDBUF_STATE_SYNC:
1567      case RTEMS_BDBUF_STATE_TRANSFER:
1568      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1569        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1570        break;
1571      default:
1572        rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_7);
1573    }
1574  }
1575}
1576
1577static void
1578rtems_bdbuf_request_sync_for_modified_buffer (rtems_bdbuf_buffer *bd)
1579{
1580  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
1581  rtems_chain_extract_unprotected (&bd->link);
1582  rtems_chain_append_unprotected (&bdbuf_cache.sync, &bd->link);
1583  rtems_bdbuf_wake_swapper ();
1584}
1585
1586/**
1587 * @brief Waits until the buffer is ready for recycling.
1588 *
1589 * @retval @c true Buffer is valid and may be recycled.
1590 * @retval @c false Buffer is invalid and has to searched again.
1591 */
1592static bool
1593rtems_bdbuf_wait_for_recycle (rtems_bdbuf_buffer *bd)
1594{
1595  while (true)
1596  {
1597    switch (bd->state)
1598    {
1599      case RTEMS_BDBUF_STATE_FREE:
1600        return true;
1601      case RTEMS_BDBUF_STATE_MODIFIED:
1602        rtems_bdbuf_request_sync_for_modified_buffer (bd);
1603        break;
1604      case RTEMS_BDBUF_STATE_CACHED:
1605      case RTEMS_BDBUF_STATE_EMPTY:
1606        if (bd->waiters == 0)
1607          return true;
1608        else
1609        {
1610          /*
1611           * It is essential that we wait here without a special wait count and
1612           * without the group in use.  Otherwise we could trigger a wait ping
1613           * pong with another recycle waiter.  The state of the buffer is
1614           * arbitrary afterwards.
1615           */
1616          rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
1617          return false;
1618        }
1619      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1620      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1621      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1622      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1623        rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
1624        break;
1625      case RTEMS_BDBUF_STATE_SYNC:
1626      case RTEMS_BDBUF_STATE_TRANSFER:
1627      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1628        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1629        break;
1630      default:
1631        rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_8);
1632    }
1633  }
1634}
1635
1636static void
1637rtems_bdbuf_wait_for_sync_done (rtems_bdbuf_buffer *bd)
1638{
1639  while (true)
1640  {
1641    switch (bd->state)
1642    {
1643      case RTEMS_BDBUF_STATE_CACHED:
1644      case RTEMS_BDBUF_STATE_EMPTY:
1645      case RTEMS_BDBUF_STATE_MODIFIED:
1646      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1647      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1648      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1649      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1650        return;
1651      case RTEMS_BDBUF_STATE_SYNC:
1652      case RTEMS_BDBUF_STATE_TRANSFER:
1653      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1654        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1655        break;
1656      default:
1657        rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_9);
1658    }
1659  }
1660}
1661
1662static void
1663rtems_bdbuf_wait_for_buffer (void)
1664{
1665  if (!rtems_chain_is_empty (&bdbuf_cache.modified))
1666    rtems_bdbuf_wake_swapper ();
1667
1668  rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
1669}
1670
1671static void
1672rtems_bdbuf_sync_after_access (rtems_bdbuf_buffer *bd)
1673{
1674  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
1675
1676  rtems_chain_append_unprotected (&bdbuf_cache.sync, &bd->link);
1677
1678  if (bd->waiters)
1679    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1680
1681  rtems_bdbuf_wake_swapper ();
1682  rtems_bdbuf_wait_for_sync_done (bd);
1683
1684  /*
1685   * We may have created a cached or empty buffer which may be recycled.
1686   */
1687  if (bd->waiters == 0
1688        && (bd->state == RTEMS_BDBUF_STATE_CACHED
1689          || bd->state == RTEMS_BDBUF_STATE_EMPTY))
1690  {
1691    if (bd->state == RTEMS_BDBUF_STATE_EMPTY)
1692    {
1693      rtems_bdbuf_remove_from_tree (bd);
1694      rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1695    }
1696    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1697  }
1698}
1699
1700static rtems_bdbuf_buffer *
1701rtems_bdbuf_get_buffer_for_read_ahead (rtems_disk_device *dd,
1702                                       rtems_blkdev_bnum  block)
1703{
1704  rtems_bdbuf_buffer *bd = NULL;
1705
1706  bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dd, block);
1707
1708  if (bd == NULL)
1709  {
1710    bd = rtems_bdbuf_get_buffer_from_lru_list (dd, block);
1711
1712    if (bd != NULL)
1713      rtems_bdbuf_group_obtain (bd);
1714  }
1715  else
1716    /*
1717     * The buffer is in the cache.  So it is already available or in use, and
1718     * thus no need for a read ahead.
1719     */
1720    bd = NULL;
1721
1722  return bd;
1723}
1724
1725static rtems_bdbuf_buffer *
1726rtems_bdbuf_get_buffer_for_access (rtems_disk_device *dd,
1727                                   rtems_blkdev_bnum  block)
1728{
1729  rtems_bdbuf_buffer *bd = NULL;
1730
1731  do
1732  {
1733    bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dd, block);
1734
1735    if (bd != NULL)
1736    {
1737      if (bd->group->bds_per_group != dd->bds_per_group)
1738      {
1739        if (rtems_bdbuf_wait_for_recycle (bd))
1740        {
1741          rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1742          rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1743          rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1744        }
1745        bd = NULL;
1746      }
1747    }
1748    else
1749    {
1750      bd = rtems_bdbuf_get_buffer_from_lru_list (dd, block);
1751
1752      if (bd == NULL)
1753        rtems_bdbuf_wait_for_buffer ();
1754    }
1755  }
1756  while (bd == NULL);
1757
1758  rtems_bdbuf_wait_for_access (bd);
1759  rtems_bdbuf_group_obtain (bd);
1760
1761  return bd;
1762}
1763
1764static rtems_status_code
1765rtems_bdbuf_get_media_block (const rtems_disk_device *dd,
1766                             rtems_blkdev_bnum        block,
1767                             rtems_blkdev_bnum       *media_block_ptr)
1768{
1769  rtems_status_code sc = RTEMS_SUCCESSFUL;
1770
1771  if (block < dd->block_count)
1772  {
1773    /*
1774     * Compute the media block number. Drivers work with media block number not
1775     * the block number a BD may have as this depends on the block size set by
1776     * the user.
1777     */
1778    *media_block_ptr = rtems_bdbuf_media_block (dd, block) + dd->start;
1779  }
1780  else
1781  {
1782    sc = RTEMS_INVALID_ID;
1783  }
1784
1785  return sc;
1786}
1787
1788rtems_status_code
1789rtems_bdbuf_get (rtems_disk_device   *dd,
1790                 rtems_blkdev_bnum    block,
1791                 rtems_bdbuf_buffer **bd_ptr)
1792{
1793  rtems_status_code   sc = RTEMS_SUCCESSFUL;
1794  rtems_bdbuf_buffer *bd = NULL;
1795  rtems_blkdev_bnum   media_block;
1796
1797  rtems_bdbuf_lock_cache ();
1798
1799  sc = rtems_bdbuf_get_media_block (dd, block, &media_block);
1800  if (sc == RTEMS_SUCCESSFUL)
1801  {
1802    /*
1803     * Print the block index relative to the physical disk.
1804     */
1805    if (rtems_bdbuf_tracer)
1806      printf ("bdbuf:get: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
1807              media_block, block, (unsigned) dd->dev);
1808
1809    bd = rtems_bdbuf_get_buffer_for_access (dd, media_block);
1810
1811    switch (bd->state)
1812    {
1813      case RTEMS_BDBUF_STATE_CACHED:
1814        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
1815        break;
1816      case RTEMS_BDBUF_STATE_EMPTY:
1817        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_EMPTY);
1818        break;
1819      case RTEMS_BDBUF_STATE_MODIFIED:
1820        /*
1821         * To get a modified buffer could be considered a bug in the caller
1822         * because you should not be getting an already modified buffer but
1823         * user may have modified a byte in a block then decided to seek the
1824         * start and write the whole block and the file system will have no
1825         * record of this so just gets the block to fill.
1826         */
1827        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
1828        break;
1829      default:
1830        rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_2);
1831        break;
1832    }
1833
1834    if (rtems_bdbuf_tracer)
1835    {
1836      rtems_bdbuf_show_users ("get", bd);
1837      rtems_bdbuf_show_usage ();
1838    }
1839  }
1840
1841  rtems_bdbuf_unlock_cache ();
1842
1843  *bd_ptr = bd;
1844
1845  return sc;
1846}
1847
1848/**
1849 * Call back handler called by the low level driver when the transfer has
1850 * completed. This function may be invoked from interrupt handler.
1851 *
1852 * @param arg Arbitrary argument specified in block device request
1853 *            structure (in this case - pointer to the appropriate
1854 *            block device request structure).
1855 * @param status I/O completion status
1856 */
1857static void
1858rtems_bdbuf_transfer_done (rtems_blkdev_request* req, rtems_status_code status)
1859{
1860  req->status = status;
1861
1862  rtems_event_transient_send (req->io_task);
1863}
1864
1865static rtems_status_code
1866rtems_bdbuf_execute_transfer_request (rtems_disk_device    *dd,
1867                                      rtems_blkdev_request *req,
1868                                      bool                  cache_locked)
1869{
1870  rtems_status_code sc = RTEMS_SUCCESSFUL;
1871  uint32_t transfer_index = 0;
1872  bool wake_transfer_waiters = false;
1873  bool wake_buffer_waiters = false;
1874
1875  if (cache_locked)
1876    rtems_bdbuf_unlock_cache ();
1877
1878  /* The return value will be ignored for transfer requests */
1879  dd->ioctl (dd->phys_dev, RTEMS_BLKIO_REQUEST, req);
1880
1881  /* Wait for transfer request completion */
1882  rtems_bdbuf_wait_for_transient_event ();
1883  sc = req->status;
1884
1885  rtems_bdbuf_lock_cache ();
1886
1887  /* Statistics */
1888  if (req->req == RTEMS_BLKDEV_REQ_READ)
1889  {
1890    dd->stats.read_blocks += req->bufnum;
1891    if (sc != RTEMS_SUCCESSFUL)
1892      ++dd->stats.read_errors;
1893  }
1894  else
1895  {
1896    dd->stats.write_blocks += req->bufnum;
1897    ++dd->stats.write_transfers;
1898    if (sc != RTEMS_SUCCESSFUL)
1899      ++dd->stats.write_errors;
1900  }
1901
1902  for (transfer_index = 0; transfer_index < req->bufnum; ++transfer_index)
1903  {
1904    rtems_bdbuf_buffer *bd = req->bufs [transfer_index].user;
1905    bool waiters = bd->waiters;
1906
1907    if (waiters)
1908      wake_transfer_waiters = true;
1909    else
1910      wake_buffer_waiters = true;
1911
1912    rtems_bdbuf_group_release (bd);
1913
1914    if (sc == RTEMS_SUCCESSFUL && bd->state == RTEMS_BDBUF_STATE_TRANSFER)
1915      rtems_bdbuf_make_cached_and_add_to_lru_list (bd);
1916    else
1917      rtems_bdbuf_discard_buffer (bd);
1918
1919    if (rtems_bdbuf_tracer)
1920      rtems_bdbuf_show_users ("transfer", bd);
1921  }
1922
1923  if (wake_transfer_waiters)
1924    rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
1925
1926  if (wake_buffer_waiters)
1927    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1928
1929  if (!cache_locked)
1930    rtems_bdbuf_unlock_cache ();
1931
1932  if (sc == RTEMS_SUCCESSFUL || sc == RTEMS_UNSATISFIED)
1933    return sc;
1934  else
1935    return RTEMS_IO_ERROR;
1936}
1937
1938static rtems_status_code
1939rtems_bdbuf_execute_read_request (rtems_disk_device  *dd,
1940                                  rtems_bdbuf_buffer *bd,
1941                                  uint32_t            transfer_count)
1942{
1943  rtems_blkdev_request *req = NULL;
1944  rtems_blkdev_bnum media_block = bd->block;
1945  uint32_t media_blocks_per_block = dd->media_blocks_per_block;
1946  uint32_t block_size = dd->block_size;
1947  uint32_t transfer_index = 1;
1948
1949  /*
1950   * TODO: This type of request structure is wrong and should be removed.
1951   */
1952#define bdbuf_alloc(size) __builtin_alloca (size)
1953
1954  req = bdbuf_alloc (rtems_bdbuf_read_request_size (transfer_count));
1955
1956  req->req = RTEMS_BLKDEV_REQ_READ;
1957  req->done = rtems_bdbuf_transfer_done;
1958  req->io_task = rtems_task_self ();
1959  req->bufnum = 0;
1960
1961  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
1962
1963  req->bufs [0].user   = bd;
1964  req->bufs [0].block  = media_block;
1965  req->bufs [0].length = block_size;
1966  req->bufs [0].buffer = bd->buffer;
1967
1968  if (rtems_bdbuf_tracer)
1969    rtems_bdbuf_show_users ("read", bd);
1970
1971  while (transfer_index < transfer_count)
1972  {
1973    media_block += media_blocks_per_block;
1974
1975    bd = rtems_bdbuf_get_buffer_for_read_ahead (dd, media_block);
1976
1977    if (bd == NULL)
1978      break;
1979
1980    rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
1981
1982    req->bufs [transfer_index].user   = bd;
1983    req->bufs [transfer_index].block  = media_block;
1984    req->bufs [transfer_index].length = block_size;
1985    req->bufs [transfer_index].buffer = bd->buffer;
1986
1987    if (rtems_bdbuf_tracer)
1988      rtems_bdbuf_show_users ("read", bd);
1989
1990    ++transfer_index;
1991  }
1992
1993  req->bufnum = transfer_index;
1994
1995  return rtems_bdbuf_execute_transfer_request (dd, req, true);
1996}
1997
1998static bool
1999rtems_bdbuf_is_read_ahead_active (const rtems_disk_device *dd)
2000{
2001  return !rtems_chain_is_node_off_chain (&dd->read_ahead.node);
2002}
2003
2004static void
2005rtems_bdbuf_read_ahead_cancel (rtems_disk_device *dd)
2006{
2007  if (rtems_bdbuf_is_read_ahead_active (dd))
2008  {
2009    rtems_chain_extract_unprotected (&dd->read_ahead.node);
2010    rtems_chain_set_off_chain (&dd->read_ahead.node);
2011  }
2012}
2013
2014static void
2015rtems_bdbuf_read_ahead_reset (rtems_disk_device *dd)
2016{
2017  rtems_bdbuf_read_ahead_cancel (dd);
2018  dd->read_ahead.trigger = RTEMS_DISK_READ_AHEAD_NO_TRIGGER;
2019}
2020
2021static void
2022rtems_bdbuf_read_ahead_add_to_chain (rtems_disk_device *dd)
2023{
2024  rtems_status_code sc;
2025  rtems_chain_control *chain = &bdbuf_cache.read_ahead_chain;
2026
2027  if (rtems_chain_is_empty (chain))
2028  {
2029    sc = rtems_event_send (bdbuf_cache.read_ahead_task,
2030                           RTEMS_BDBUF_READ_AHEAD_WAKE_UP);
2031    if (sc != RTEMS_SUCCESSFUL)
2032      rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_RA_WAKE_UP);
2033  }
2034
2035  rtems_chain_append_unprotected (chain, &dd->read_ahead.node);
2036}
2037
2038static void
2039rtems_bdbuf_check_read_ahead_trigger (rtems_disk_device *dd,
2040                                      rtems_blkdev_bnum  block)
2041{
2042  if (bdbuf_cache.read_ahead_task != 0
2043      && dd->read_ahead.trigger == block
2044      && !rtems_bdbuf_is_read_ahead_active (dd))
2045  {
2046    dd->read_ahead.nr_blocks = RTEMS_DISK_READ_AHEAD_SIZE_AUTO;
2047    rtems_bdbuf_read_ahead_add_to_chain(dd);
2048  }
2049}
2050
2051static void
2052rtems_bdbuf_set_read_ahead_trigger (rtems_disk_device *dd,
2053                                    rtems_blkdev_bnum  block)
2054{
2055  if (dd->read_ahead.trigger != block)
2056  {
2057    rtems_bdbuf_read_ahead_cancel (dd);
2058    dd->read_ahead.trigger = block + 1;
2059    dd->read_ahead.next = block + 2;
2060  }
2061}
2062
2063rtems_status_code
2064rtems_bdbuf_read (rtems_disk_device   *dd,
2065                  rtems_blkdev_bnum    block,
2066                  rtems_bdbuf_buffer **bd_ptr)
2067{
2068  rtems_status_code     sc = RTEMS_SUCCESSFUL;
2069  rtems_bdbuf_buffer   *bd = NULL;
2070  rtems_blkdev_bnum     media_block;
2071
2072  rtems_bdbuf_lock_cache ();
2073
2074  sc = rtems_bdbuf_get_media_block (dd, block, &media_block);
2075  if (sc == RTEMS_SUCCESSFUL)
2076  {
2077    if (rtems_bdbuf_tracer)
2078      printf ("bdbuf:read: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
2079              media_block, block, (unsigned) dd->dev);
2080
2081    bd = rtems_bdbuf_get_buffer_for_access (dd, media_block);
2082    switch (bd->state)
2083    {
2084      case RTEMS_BDBUF_STATE_CACHED:
2085        ++dd->stats.read_hits;
2086        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
2087        break;
2088      case RTEMS_BDBUF_STATE_MODIFIED:
2089        ++dd->stats.read_hits;
2090        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
2091        break;
2092      case RTEMS_BDBUF_STATE_EMPTY:
2093        ++dd->stats.read_misses;
2094        rtems_bdbuf_set_read_ahead_trigger (dd, block);
2095        sc = rtems_bdbuf_execute_read_request (dd, bd, 1);
2096        if (sc == RTEMS_SUCCESSFUL)
2097        {
2098          rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
2099          rtems_chain_extract_unprotected (&bd->link);
2100          rtems_bdbuf_group_obtain (bd);
2101        }
2102        else
2103        {
2104          bd = NULL;
2105        }
2106        break;
2107      default:
2108        rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_4);
2109        break;
2110    }
2111
2112    rtems_bdbuf_check_read_ahead_trigger (dd, block);
2113  }
2114
2115  rtems_bdbuf_unlock_cache ();
2116
2117  *bd_ptr = bd;
2118
2119  return sc;
2120}
2121
2122void
2123rtems_bdbuf_peek (rtems_disk_device *dd,
2124                  rtems_blkdev_bnum block,
2125                  uint32_t nr_blocks)
2126{
2127  rtems_bdbuf_lock_cache ();
2128
2129  if (bdbuf_cache.read_ahead_enabled && nr_blocks > 0)
2130  {
2131    rtems_bdbuf_read_ahead_reset(dd);
2132    dd->read_ahead.next = block;
2133    dd->read_ahead.nr_blocks = nr_blocks;
2134    rtems_bdbuf_read_ahead_add_to_chain(dd);
2135  }
2136
2137  rtems_bdbuf_unlock_cache ();
2138}
2139
2140static rtems_status_code
2141rtems_bdbuf_check_bd_and_lock_cache (rtems_bdbuf_buffer *bd, const char *kind)
2142{
2143  if (bd == NULL)
2144    return RTEMS_INVALID_ADDRESS;
2145  if (rtems_bdbuf_tracer)
2146  {
2147    printf ("bdbuf:%s: %" PRIu32 "\n", kind, bd->block);
2148    rtems_bdbuf_show_users (kind, bd);
2149  }
2150  rtems_bdbuf_lock_cache();
2151
2152  return RTEMS_SUCCESSFUL;
2153}
2154
2155rtems_status_code
2156rtems_bdbuf_release (rtems_bdbuf_buffer *bd)
2157{
2158  rtems_status_code sc = RTEMS_SUCCESSFUL;
2159
2160  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release");
2161  if (sc != RTEMS_SUCCESSFUL)
2162    return sc;
2163
2164  switch (bd->state)
2165  {
2166    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2167      rtems_bdbuf_add_to_lru_list_after_access (bd);
2168      break;
2169    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2170    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2171      rtems_bdbuf_discard_buffer_after_access (bd);
2172      break;
2173    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2174      rtems_bdbuf_add_to_modified_list_after_access (bd);
2175      break;
2176    default:
2177      rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_0);
2178      break;
2179  }
2180
2181  if (rtems_bdbuf_tracer)
2182    rtems_bdbuf_show_usage ();
2183
2184  rtems_bdbuf_unlock_cache ();
2185
2186  return RTEMS_SUCCESSFUL;
2187}
2188
2189rtems_status_code
2190rtems_bdbuf_release_modified (rtems_bdbuf_buffer *bd)
2191{
2192  rtems_status_code sc = RTEMS_SUCCESSFUL;
2193
2194  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release modified");
2195  if (sc != RTEMS_SUCCESSFUL)
2196    return sc;
2197
2198  switch (bd->state)
2199  {
2200    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2201    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2202    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2203      rtems_bdbuf_add_to_modified_list_after_access (bd);
2204      break;
2205    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2206      rtems_bdbuf_discard_buffer_after_access (bd);
2207      break;
2208    default:
2209      rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_6);
2210      break;
2211  }
2212
2213  if (rtems_bdbuf_tracer)
2214    rtems_bdbuf_show_usage ();
2215
2216  rtems_bdbuf_unlock_cache ();
2217
2218  return RTEMS_SUCCESSFUL;
2219}
2220
2221rtems_status_code
2222rtems_bdbuf_sync (rtems_bdbuf_buffer *bd)
2223{
2224  rtems_status_code sc = RTEMS_SUCCESSFUL;
2225
2226  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "sync");
2227  if (sc != RTEMS_SUCCESSFUL)
2228    return sc;
2229
2230  switch (bd->state)
2231  {
2232    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2233    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2234    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2235      rtems_bdbuf_sync_after_access (bd);
2236      break;
2237    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2238      rtems_bdbuf_discard_buffer_after_access (bd);
2239      break;
2240    default:
2241      rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_5);
2242      break;
2243  }
2244
2245  if (rtems_bdbuf_tracer)
2246    rtems_bdbuf_show_usage ();
2247
2248  rtems_bdbuf_unlock_cache ();
2249
2250  return RTEMS_SUCCESSFUL;
2251}
2252
2253rtems_status_code
2254rtems_bdbuf_syncdev (rtems_disk_device *dd)
2255{
2256  if (rtems_bdbuf_tracer)
2257    printf ("bdbuf:syncdev: %08x\n", (unsigned) dd->dev);
2258
2259  /*
2260   * Take the sync lock before locking the cache. Once we have the sync lock we
2261   * can lock the cache. If another thread has the sync lock it will cause this
2262   * thread to block until it owns the sync lock then it can own the cache. The
2263   * sync lock can only be obtained with the cache unlocked.
2264   */
2265  rtems_bdbuf_lock_sync ();
2266  rtems_bdbuf_lock_cache ();
2267
2268  /*
2269   * Set the cache to have a sync active for a specific device and let the swap
2270   * out task know the id of the requester to wake when done.
2271   *
2272   * The swap out task will negate the sync active flag when no more buffers
2273   * for the device are held on the "modified for sync" queues.
2274   */
2275  bdbuf_cache.sync_active    = true;
2276  bdbuf_cache.sync_requester = rtems_task_self ();
2277  bdbuf_cache.sync_device    = dd;
2278
2279  rtems_bdbuf_wake_swapper ();
2280  rtems_bdbuf_unlock_cache ();
2281  rtems_bdbuf_wait_for_transient_event ();
2282  rtems_bdbuf_unlock_sync ();
2283
2284  return RTEMS_SUCCESSFUL;
2285}
2286
2287/**
2288 * Swapout transfer to the driver. The driver will break this I/O into groups
2289 * of consecutive write requests is multiple consecutive buffers are required
2290 * by the driver. The cache is not locked.
2291 *
2292 * @param transfer The transfer transaction.
2293 */
2294static void
2295rtems_bdbuf_swapout_write (rtems_bdbuf_swapout_transfer* transfer)
2296{
2297  rtems_chain_node *node;
2298
2299  if (rtems_bdbuf_tracer)
2300    printf ("bdbuf:swapout transfer: %08x\n", (unsigned) transfer->dd->dev);
2301
2302  /*
2303   * If there are buffers to transfer to the media transfer them.
2304   */
2305  if (!rtems_chain_is_empty (&transfer->bds))
2306  {
2307    /*
2308     * The last block number used when the driver only supports
2309     * continuous blocks in a single request.
2310     */
2311    uint32_t last_block = 0;
2312
2313    rtems_disk_device *dd = transfer->dd;
2314    uint32_t media_blocks_per_block = dd->media_blocks_per_block;
2315    bool need_continuous_blocks =
2316      (dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_MULTISECTOR_CONT) != 0;
2317
2318    /*
2319     * Take as many buffers as configured and pass to the driver. Note, the
2320     * API to the drivers has an array of buffers and if a chain was passed
2321     * we could have just passed the list. If the driver API is updated it
2322     * should be possible to make this change with little effect in this
2323     * code. The array that is passed is broken in design and should be
2324     * removed. Merging members of a struct into the first member is
2325     * trouble waiting to happen.
2326     */
2327    transfer->write_req.status = RTEMS_RESOURCE_IN_USE;
2328    transfer->write_req.bufnum = 0;
2329
2330    while ((node = rtems_chain_get_unprotected(&transfer->bds)) != NULL)
2331    {
2332      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2333      bool                write = false;
2334
2335      /*
2336       * If the device only accepts sequential buffers and this is not the
2337       * first buffer (the first is always sequential, and the buffer is not
2338       * sequential then put the buffer back on the transfer chain and write
2339       * the committed buffers.
2340       */
2341
2342      if (rtems_bdbuf_tracer)
2343        printf ("bdbuf:swapout write: bd:%" PRIu32 ", bufnum:%" PRIu32 " mode:%s\n",
2344                bd->block, transfer->write_req.bufnum,
2345                need_continuous_blocks ? "MULTI" : "SCAT");
2346
2347      if (need_continuous_blocks && transfer->write_req.bufnum &&
2348          bd->block != last_block + media_blocks_per_block)
2349      {
2350        rtems_chain_prepend_unprotected (&transfer->bds, &bd->link);
2351        write = true;
2352      }
2353      else
2354      {
2355        rtems_blkdev_sg_buffer* buf;
2356        buf = &transfer->write_req.bufs[transfer->write_req.bufnum];
2357        transfer->write_req.bufnum++;
2358        buf->user   = bd;
2359        buf->block  = bd->block;
2360        buf->length = dd->block_size;
2361        buf->buffer = bd->buffer;
2362        last_block  = bd->block;
2363      }
2364
2365      /*
2366       * Perform the transfer if there are no more buffers, or the transfer
2367       * size has reached the configured max. value.
2368       */
2369
2370      if (rtems_chain_is_empty (&transfer->bds) ||
2371          (transfer->write_req.bufnum >= bdbuf_config.max_write_blocks))
2372        write = true;
2373
2374      if (write)
2375      {
2376        rtems_bdbuf_execute_transfer_request (dd, &transfer->write_req, false);
2377
2378        transfer->write_req.status = RTEMS_RESOURCE_IN_USE;
2379        transfer->write_req.bufnum = 0;
2380      }
2381    }
2382
2383    /*
2384     * If sync'ing and the deivce is capability of handling a sync IO control
2385     * call perform the call.
2386     */
2387    if (transfer->syncing &&
2388        (dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_SYNC))
2389    {
2390      /* int result = */ dd->ioctl (dd->phys_dev, RTEMS_BLKDEV_REQ_SYNC, NULL);
2391      /* How should the error be handled ? */
2392    }
2393  }
2394}
2395
2396/**
2397 * Process the modified list of buffers. There is a sync or modified list that
2398 * needs to be handled so we have a common function to do the work.
2399 *
2400 * @param dd_ptr Pointer to the device to handle. If BDBUF_INVALID_DEV no
2401 * device is selected so select the device of the first buffer to be written to
2402 * disk.
2403 * @param chain The modified chain to process.
2404 * @param transfer The chain to append buffers to be written too.
2405 * @param sync_active If true this is a sync operation so expire all timers.
2406 * @param update_timers If true update the timers.
2407 * @param timer_delta It update_timers is true update the timers by this
2408 *                    amount.
2409 */
2410static void
2411rtems_bdbuf_swapout_modified_processing (rtems_disk_device  **dd_ptr,
2412                                         rtems_chain_control* chain,
2413                                         rtems_chain_control* transfer,
2414                                         bool                 sync_active,
2415                                         bool                 update_timers,
2416                                         uint32_t             timer_delta)
2417{
2418  if (!rtems_chain_is_empty (chain))
2419  {
2420    rtems_chain_node* node = rtems_chain_head (chain);
2421    bool              sync_all;
2422
2423    node = node->next;
2424
2425    /*
2426     * A sync active with no valid dev means sync all.
2427     */
2428    if (sync_active && (*dd_ptr == BDBUF_INVALID_DEV))
2429      sync_all = true;
2430    else
2431      sync_all = false;
2432
2433    while (!rtems_chain_is_tail (chain, node))
2434    {
2435      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2436
2437      /*
2438       * Check if the buffer's hold timer has reached 0. If a sync is active
2439       * or someone waits for a buffer written force all the timers to 0.
2440       *
2441       * @note Lots of sync requests will skew this timer. It should be based
2442       *       on TOD to be accurate. Does it matter ?
2443       */
2444      if (sync_all || (sync_active && (*dd_ptr == bd->dd))
2445          || rtems_bdbuf_has_buffer_waiters ())
2446        bd->hold_timer = 0;
2447
2448      if (bd->hold_timer)
2449      {
2450        if (update_timers)
2451        {
2452          if (bd->hold_timer > timer_delta)
2453            bd->hold_timer -= timer_delta;
2454          else
2455            bd->hold_timer = 0;
2456        }
2457
2458        if (bd->hold_timer)
2459        {
2460          node = node->next;
2461          continue;
2462        }
2463      }
2464
2465      /*
2466       * This assumes we can set it to BDBUF_INVALID_DEV which is just an
2467       * assumption. Cannot use the transfer list being empty the sync dev
2468       * calls sets the dev to use.
2469       */
2470      if (*dd_ptr == BDBUF_INVALID_DEV)
2471        *dd_ptr = bd->dd;
2472
2473      if (bd->dd == *dd_ptr)
2474      {
2475        rtems_chain_node* next_node = node->next;
2476        rtems_chain_node* tnode = rtems_chain_tail (transfer);
2477
2478        /*
2479         * The blocks on the transfer list are sorted in block order. This
2480         * means multi-block transfers for drivers that require consecutive
2481         * blocks perform better with sorted blocks and for real disks it may
2482         * help lower head movement.
2483         */
2484
2485        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
2486
2487        rtems_chain_extract_unprotected (node);
2488
2489        tnode = tnode->previous;
2490
2491        while (node && !rtems_chain_is_head (transfer, tnode))
2492        {
2493          rtems_bdbuf_buffer* tbd = (rtems_bdbuf_buffer*) tnode;
2494
2495          if (bd->block > tbd->block)
2496          {
2497            rtems_chain_insert_unprotected (tnode, node);
2498            node = NULL;
2499          }
2500          else
2501            tnode = tnode->previous;
2502        }
2503
2504        if (node)
2505          rtems_chain_prepend_unprotected (transfer, node);
2506
2507        node = next_node;
2508      }
2509      else
2510      {
2511        node = node->next;
2512      }
2513    }
2514  }
2515}
2516
2517/**
2518 * Process the cache's modified buffers. Check the sync list first then the
2519 * modified list extracting the buffers suitable to be written to disk. We have
2520 * a device at a time. The task level loop will repeat this operation while
2521 * there are buffers to be written. If the transfer fails place the buffers
2522 * back on the modified list and try again later. The cache is unlocked while
2523 * the buffers are being written to disk.
2524 *
2525 * @param timer_delta It update_timers is true update the timers by this
2526 *                    amount.
2527 * @param update_timers If true update the timers.
2528 * @param transfer The transfer transaction data.
2529 *
2530 * @retval true Buffers where written to disk so scan again.
2531 * @retval false No buffers where written to disk.
2532 */
2533static bool
2534rtems_bdbuf_swapout_processing (unsigned long                 timer_delta,
2535                                bool                          update_timers,
2536                                rtems_bdbuf_swapout_transfer* transfer)
2537{
2538  rtems_bdbuf_swapout_worker* worker;
2539  bool                        transfered_buffers = false;
2540  bool                        sync_active;
2541
2542  rtems_bdbuf_lock_cache ();
2543
2544  /*
2545   * To set this to true you need the cache and the sync lock.
2546   */
2547  sync_active = bdbuf_cache.sync_active;
2548
2549  /*
2550   * If a sync is active do not use a worker because the current code does not
2551   * cleaning up after. We need to know the buffers have been written when
2552   * syncing to release sync lock and currently worker threads do not return to
2553   * here. We do not know the worker is the last in a sequence of sync writes
2554   * until after we have it running so we do not know to tell it to release the
2555   * lock. The simplest solution is to get the main swap out task perform all
2556   * sync operations.
2557   */
2558  if (sync_active)
2559    worker = NULL;
2560  else
2561  {
2562    worker = (rtems_bdbuf_swapout_worker*)
2563      rtems_chain_get_unprotected (&bdbuf_cache.swapout_free_workers);
2564    if (worker)
2565      transfer = &worker->transfer;
2566  }
2567
2568  rtems_chain_initialize_empty (&transfer->bds);
2569  transfer->dd = BDBUF_INVALID_DEV;
2570  transfer->syncing = sync_active;
2571
2572  /*
2573   * When the sync is for a device limit the sync to that device. If the sync
2574   * is for a buffer handle process the devices in the order on the sync
2575   * list. This means the dev is BDBUF_INVALID_DEV.
2576   */
2577  if (sync_active)
2578    transfer->dd = bdbuf_cache.sync_device;
2579
2580  /*
2581   * If we have any buffers in the sync queue move them to the modified
2582   * list. The first sync buffer will select the device we use.
2583   */
2584  rtems_bdbuf_swapout_modified_processing (&transfer->dd,
2585                                           &bdbuf_cache.sync,
2586                                           &transfer->bds,
2587                                           true, false,
2588                                           timer_delta);
2589
2590  /*
2591   * Process the cache's modified list.
2592   */
2593  rtems_bdbuf_swapout_modified_processing (&transfer->dd,
2594                                           &bdbuf_cache.modified,
2595                                           &transfer->bds,
2596                                           sync_active,
2597                                           update_timers,
2598                                           timer_delta);
2599
2600  /*
2601   * We have all the buffers that have been modified for this device so the
2602   * cache can be unlocked because the state of each buffer has been set to
2603   * TRANSFER.
2604   */
2605  rtems_bdbuf_unlock_cache ();
2606
2607  /*
2608   * If there are buffers to transfer to the media transfer them.
2609   */
2610  if (!rtems_chain_is_empty (&transfer->bds))
2611  {
2612    if (worker)
2613    {
2614      rtems_status_code sc = rtems_event_send (worker->id,
2615                                               RTEMS_BDBUF_SWAPOUT_SYNC);
2616      if (sc != RTEMS_SUCCESSFUL)
2617        rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_SO_WAKE_2);
2618    }
2619    else
2620    {
2621      rtems_bdbuf_swapout_write (transfer);
2622    }
2623
2624    transfered_buffers = true;
2625  }
2626
2627  if (sync_active && !transfered_buffers)
2628  {
2629    rtems_id sync_requester;
2630    rtems_bdbuf_lock_cache ();
2631    sync_requester = bdbuf_cache.sync_requester;
2632    bdbuf_cache.sync_active = false;
2633    bdbuf_cache.sync_requester = 0;
2634    rtems_bdbuf_unlock_cache ();
2635    if (sync_requester)
2636      rtems_event_transient_send (sync_requester);
2637  }
2638
2639  return transfered_buffers;
2640}
2641
2642/**
2643 * The swapout worker thread body.
2644 *
2645 * @param arg A pointer to the worker thread's private data.
2646 * @return rtems_task Not used.
2647 */
2648static rtems_task
2649rtems_bdbuf_swapout_worker_task (rtems_task_argument arg)
2650{
2651  rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) arg;
2652
2653  while (worker->enabled)
2654  {
2655    rtems_bdbuf_wait_for_event (RTEMS_BDBUF_SWAPOUT_SYNC);
2656
2657    rtems_bdbuf_swapout_write (&worker->transfer);
2658
2659    rtems_bdbuf_lock_cache ();
2660
2661    rtems_chain_initialize_empty (&worker->transfer.bds);
2662    worker->transfer.dd = BDBUF_INVALID_DEV;
2663
2664    rtems_chain_append_unprotected (&bdbuf_cache.swapout_free_workers, &worker->link);
2665
2666    rtems_bdbuf_unlock_cache ();
2667  }
2668
2669  free (worker);
2670
2671  rtems_task_exit();
2672}
2673
2674/**
2675 * Close the swapout worker threads.
2676 */
2677static void
2678rtems_bdbuf_swapout_workers_close (void)
2679{
2680  rtems_chain_node* node;
2681
2682  rtems_bdbuf_lock_cache ();
2683
2684  node = rtems_chain_first (&bdbuf_cache.swapout_free_workers);
2685  while (!rtems_chain_is_tail (&bdbuf_cache.swapout_free_workers, node))
2686  {
2687    rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) node;
2688    worker->enabled = false;
2689    rtems_event_send (worker->id, RTEMS_BDBUF_SWAPOUT_SYNC);
2690    node = rtems_chain_next (node);
2691  }
2692
2693  rtems_bdbuf_unlock_cache ();
2694}
2695
2696/**
2697 * Body of task which takes care on flushing modified buffers to the disk.
2698 *
2699 * @param arg A pointer to the global cache data. Use the global variable and
2700 *            not this.
2701 * @return rtems_task Not used.
2702 */
2703static rtems_task
2704rtems_bdbuf_swapout_task (rtems_task_argument arg)
2705{
2706  rtems_bdbuf_swapout_transfer* transfer = (rtems_bdbuf_swapout_transfer *) arg;
2707  uint32_t                      period_in_ticks;
2708  const uint32_t                period_in_msecs = bdbuf_config.swapout_period;
2709  uint32_t                      timer_delta;
2710
2711  /*
2712   * Localise the period.
2713   */
2714  period_in_ticks = RTEMS_MICROSECONDS_TO_TICKS (period_in_msecs * 1000);
2715
2716  /*
2717   * This is temporary. Needs to be changed to use the real time clock.
2718   */
2719  timer_delta = period_in_msecs;
2720
2721  while (bdbuf_cache.swapout_enabled)
2722  {
2723    rtems_event_set   out;
2724    rtems_status_code sc;
2725
2726    /*
2727     * Only update the timers once in the processing cycle.
2728     */
2729    bool update_timers = true;
2730
2731    /*
2732     * If we write buffers to any disk perform a check again. We only write a
2733     * single device at a time and the cache may have more than one device's
2734     * buffers modified waiting to be written.
2735     */
2736    bool transfered_buffers;
2737
2738    do
2739    {
2740      transfered_buffers = false;
2741
2742      /*
2743       * Extact all the buffers we find for a specific device. The device is
2744       * the first one we find on a modified list. Process the sync queue of
2745       * buffers first.
2746       */
2747      if (rtems_bdbuf_swapout_processing (timer_delta,
2748                                          update_timers,
2749                                          transfer))
2750      {
2751        transfered_buffers = true;
2752      }
2753
2754      /*
2755       * Only update the timers once.
2756       */
2757      update_timers = false;
2758    }
2759    while (transfered_buffers);
2760
2761    sc = rtems_event_receive (RTEMS_BDBUF_SWAPOUT_SYNC,
2762                              RTEMS_EVENT_ALL | RTEMS_WAIT,
2763                              period_in_ticks,
2764                              &out);
2765
2766    if ((sc != RTEMS_SUCCESSFUL) && (sc != RTEMS_TIMEOUT))
2767      rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_SWAPOUT_RE);
2768  }
2769
2770  rtems_bdbuf_swapout_workers_close ();
2771
2772  free (transfer);
2773
2774  rtems_task_exit();
2775}
2776
2777static void
2778rtems_bdbuf_purge_list (rtems_chain_control *purge_list)
2779{
2780  bool wake_buffer_waiters = false;
2781  rtems_chain_node *node = NULL;
2782
2783  while ((node = rtems_chain_get_unprotected (purge_list)) != NULL)
2784  {
2785    rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
2786
2787    if (bd->waiters == 0)
2788      wake_buffer_waiters = true;
2789
2790    rtems_bdbuf_discard_buffer (bd);
2791  }
2792
2793  if (wake_buffer_waiters)
2794    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
2795}
2796
2797static void
2798rtems_bdbuf_gather_for_purge (rtems_chain_control *purge_list,
2799                              const rtems_disk_device *dd)
2800{
2801  rtems_bdbuf_buffer *stack [RTEMS_BDBUF_AVL_MAX_HEIGHT];
2802  rtems_bdbuf_buffer **prev = stack;
2803  rtems_bdbuf_buffer *cur = bdbuf_cache.tree;
2804
2805  *prev = NULL;
2806
2807  while (cur != NULL)
2808  {
2809    if (cur->dd == dd)
2810    {
2811      switch (cur->state)
2812      {
2813        case RTEMS_BDBUF_STATE_FREE:
2814        case RTEMS_BDBUF_STATE_EMPTY:
2815        case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2816        case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
2817          break;
2818        case RTEMS_BDBUF_STATE_SYNC:
2819          rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
2820          /* Fall through */
2821        case RTEMS_BDBUF_STATE_MODIFIED:
2822          rtems_bdbuf_group_release (cur);
2823          /* Fall through */
2824        case RTEMS_BDBUF_STATE_CACHED:
2825          rtems_chain_extract_unprotected (&cur->link);
2826          rtems_chain_append_unprotected (purge_list, &cur->link);
2827          break;
2828        case RTEMS_BDBUF_STATE_TRANSFER:
2829          rtems_bdbuf_set_state (cur, RTEMS_BDBUF_STATE_TRANSFER_PURGED);
2830          break;
2831        case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2832        case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2833        case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2834          rtems_bdbuf_set_state (cur, RTEMS_BDBUF_STATE_ACCESS_PURGED);
2835          break;
2836        default:
2837          rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_STATE_11);
2838      }
2839    }
2840
2841    if (cur->avl.left != NULL)
2842    {
2843      /* Left */
2844      ++prev;
2845      *prev = cur;
2846      cur = cur->avl.left;
2847    }
2848    else if (cur->avl.right != NULL)
2849    {
2850      /* Right */
2851      ++prev;
2852      *prev = cur;
2853      cur = cur->avl.right;
2854    }
2855    else
2856    {
2857      while (*prev != NULL
2858             && (cur == (*prev)->avl.right || (*prev)->avl.right == NULL))
2859      {
2860        /* Up */
2861        cur = *prev;
2862        --prev;
2863      }
2864      if (*prev != NULL)
2865        /* Right */
2866        cur = (*prev)->avl.right;
2867      else
2868        /* Finished */
2869        cur = NULL;
2870    }
2871  }
2872}
2873
2874static void
2875rtems_bdbuf_do_purge_dev (rtems_disk_device *dd)
2876{
2877  rtems_chain_control purge_list;
2878
2879  rtems_chain_initialize_empty (&purge_list);
2880  rtems_bdbuf_read_ahead_reset (dd);
2881  rtems_bdbuf_gather_for_purge (&purge_list, dd);
2882  rtems_bdbuf_purge_list (&purge_list);
2883}
2884
2885void
2886rtems_bdbuf_purge_dev (rtems_disk_device *dd)
2887{
2888  rtems_bdbuf_lock_cache ();
2889  rtems_bdbuf_do_purge_dev (dd);
2890  rtems_bdbuf_unlock_cache ();
2891}
2892
2893rtems_status_code
2894rtems_bdbuf_set_block_size (rtems_disk_device *dd,
2895                            uint32_t           block_size,
2896                            bool               sync)
2897{
2898  rtems_status_code sc = RTEMS_SUCCESSFUL;
2899
2900  /*
2901   * We do not care about the synchronization status since we will purge the
2902   * device later.
2903   */
2904  if (sync)
2905    rtems_bdbuf_syncdev (dd);
2906
2907  rtems_bdbuf_lock_cache ();
2908
2909  if (block_size > 0)
2910  {
2911    size_t bds_per_group = rtems_bdbuf_bds_per_group (block_size);
2912
2913    if (bds_per_group != 0)
2914    {
2915      int block_to_media_block_shift = 0;
2916      uint32_t media_blocks_per_block = block_size / dd->media_block_size;
2917      uint32_t one = 1;
2918
2919      while ((one << block_to_media_block_shift) < media_blocks_per_block)
2920      {
2921        ++block_to_media_block_shift;
2922      }
2923
2924      if ((dd->media_block_size << block_to_media_block_shift) != block_size)
2925        block_to_media_block_shift = -1;
2926
2927      dd->block_size = block_size;
2928      dd->block_count = dd->size / media_blocks_per_block;
2929      dd->media_blocks_per_block = media_blocks_per_block;
2930      dd->block_to_media_block_shift = block_to_media_block_shift;
2931      dd->bds_per_group = bds_per_group;
2932
2933      rtems_bdbuf_do_purge_dev (dd);
2934    }
2935    else
2936    {
2937      sc = RTEMS_INVALID_NUMBER;
2938    }
2939  }
2940  else
2941  {
2942    sc = RTEMS_INVALID_NUMBER;
2943  }
2944
2945  rtems_bdbuf_unlock_cache ();
2946
2947  return sc;
2948}
2949
2950static rtems_task
2951rtems_bdbuf_read_ahead_task (rtems_task_argument arg)
2952{
2953  rtems_chain_control *chain = &bdbuf_cache.read_ahead_chain;
2954
2955  while (bdbuf_cache.read_ahead_enabled)
2956  {
2957    rtems_chain_node *node;
2958
2959    rtems_bdbuf_wait_for_event (RTEMS_BDBUF_READ_AHEAD_WAKE_UP);
2960    rtems_bdbuf_lock_cache ();
2961
2962    while ((node = rtems_chain_get_unprotected (chain)) != NULL)
2963    {
2964      rtems_disk_device *dd =
2965        RTEMS_CONTAINER_OF (node, rtems_disk_device, read_ahead.node);
2966      rtems_blkdev_bnum block = dd->read_ahead.next;
2967      rtems_blkdev_bnum media_block = 0;
2968      rtems_status_code sc =
2969        rtems_bdbuf_get_media_block (dd, block, &media_block);
2970
2971      rtems_chain_set_off_chain (&dd->read_ahead.node);
2972
2973      if (sc == RTEMS_SUCCESSFUL)
2974      {
2975        rtems_bdbuf_buffer *bd =
2976          rtems_bdbuf_get_buffer_for_read_ahead (dd, media_block);
2977
2978        if (bd != NULL)
2979        {
2980          uint32_t transfer_count = dd->read_ahead.nr_blocks;
2981          uint32_t blocks_until_end_of_disk = dd->block_count - block;
2982          uint32_t max_transfer_count = bdbuf_config.max_read_ahead_blocks;
2983
2984          if (transfer_count == RTEMS_DISK_READ_AHEAD_SIZE_AUTO) {
2985            transfer_count = blocks_until_end_of_disk;
2986
2987            if (transfer_count >= max_transfer_count)
2988            {
2989              transfer_count = max_transfer_count;
2990              dd->read_ahead.trigger = block + transfer_count / 2;
2991              dd->read_ahead.next = block + transfer_count;
2992            }
2993            else
2994            {
2995              dd->read_ahead.trigger = RTEMS_DISK_READ_AHEAD_NO_TRIGGER;
2996            }
2997          } else {
2998            if (transfer_count > blocks_until_end_of_disk) {
2999              transfer_count = blocks_until_end_of_disk;
3000            }
3001
3002            if (transfer_count > max_transfer_count) {
3003              transfer_count = max_transfer_count;
3004            }
3005
3006            ++dd->stats.read_ahead_peeks;
3007          }
3008
3009          ++dd->stats.read_ahead_transfers;
3010          rtems_bdbuf_execute_read_request (dd, bd, transfer_count);
3011        }
3012      }
3013      else
3014      {
3015        dd->read_ahead.trigger = RTEMS_DISK_READ_AHEAD_NO_TRIGGER;
3016      }
3017    }
3018
3019    rtems_bdbuf_unlock_cache ();
3020  }
3021
3022  rtems_task_exit();
3023}
3024
3025void rtems_bdbuf_get_device_stats (const rtems_disk_device *dd,
3026                                   rtems_blkdev_stats      *stats)
3027{
3028  rtems_bdbuf_lock_cache ();
3029  *stats = dd->stats;
3030  rtems_bdbuf_unlock_cache ();
3031}
3032
3033void rtems_bdbuf_reset_device_stats (rtems_disk_device *dd)
3034{
3035  rtems_bdbuf_lock_cache ();
3036  memset (&dd->stats, 0, sizeof(dd->stats));
3037  rtems_bdbuf_unlock_cache ();
3038}
Note: See TracBrowser for help on using the repository browser.