source: rtems/cpukit/libblock/src/bdbuf.c @ f004b2b8

5
Last change on this file since f004b2b8 was f004b2b8, checked in by Sebastian Huber <sebastian.huber@…>, on 10/02/18 at 08:22:15

Use rtems_task_exit()

Update #3530.
Update #3533.

  • Property mode set to 100644
File size: 79.1 KB
Line 
1/**
2 * @file
3 *
4 * @ingroup rtems_bdbuf
5 *
6 * Block device buffer management.
7 */
8
9/*
10 * Disk I/O buffering
11 * Buffer managment
12 *
13 * Copyright (C) 2001 OKTET Ltd., St.-Peterburg, Russia
14 * Author: Andrey G. Ivanov <Andrey.Ivanov@oktet.ru>
15 *         Victor V. Vengerov <vvv@oktet.ru>
16 *         Alexander Kukuta <kam@oktet.ru>
17 *
18 * Copyright (C) 2008,2009 Chris Johns <chrisj@rtems.org>
19 *    Rewritten to remove score mutex access. Fixes many performance
20 *    issues.
21 *
22 * Copyright (c) 2009, 2017 embedded brains GmbH.
23 */
24
25/**
26 * Set to 1 to enable debug tracing.
27 */
28#define RTEMS_BDBUF_TRACE 0
29
30#if HAVE_CONFIG_H
31#include "config.h"
32#endif
33#include <limits.h>
34#include <errno.h>
35#include <stdio.h>
36#include <stdlib.h>
37#include <string.h>
38#include <inttypes.h>
39#include <pthread.h>
40
41#include <rtems.h>
42#include <rtems/error.h>
43#include <rtems/thread.h>
44#include <rtems/score/assert.h>
45
46#include "rtems/bdbuf.h"
47
48#define BDBUF_INVALID_DEV NULL
49
50/*
51 * Simpler label for this file.
52 */
53#define bdbuf_config rtems_bdbuf_configuration
54
55/**
56 * A swapout transfer transaction data. This data is passed to a worked thread
57 * to handle the write phase of the transfer.
58 */
59typedef struct rtems_bdbuf_swapout_transfer
60{
61  rtems_chain_control   bds;         /**< The transfer list of BDs. */
62  rtems_disk_device    *dd;          /**< The device the transfer is for. */
63  bool                  syncing;     /**< The data is a sync'ing. */
64  rtems_blkdev_request  write_req;   /**< The write request. */
65} rtems_bdbuf_swapout_transfer;
66
67/**
68 * Swapout worker thread. These are available to take processing from the
69 * main swapout thread and handle the I/O operation.
70 */
71typedef struct rtems_bdbuf_swapout_worker
72{
73  rtems_chain_node             link;     /**< The threads sit on a chain when
74                                          * idle. */
75  rtems_id                     id;       /**< The id of the task so we can wake
76                                          * it. */
77  bool                         enabled;  /**< The worker is enabled. */
78  rtems_bdbuf_swapout_transfer transfer; /**< The transfer data for this
79                                          * thread. */
80} rtems_bdbuf_swapout_worker;
81
82/**
83 * Buffer waiters synchronization.
84 */
85typedef struct rtems_bdbuf_waiters {
86  unsigned                 count;
87  rtems_condition_variable cond_var;
88} rtems_bdbuf_waiters;
89
90/**
91 * The BD buffer cache.
92 */
93typedef struct rtems_bdbuf_cache
94{
95  rtems_id            swapout;           /**< Swapout task ID */
96  bool                swapout_enabled;   /**< Swapout is only running if
97                                          * enabled. Set to false to kill the
98                                          * swap out task. It deletes itself. */
99  rtems_chain_control swapout_free_workers; /**< The work threads for the swapout
100                                             * task. */
101
102  rtems_bdbuf_buffer* bds;               /**< Pointer to table of buffer
103                                          * descriptors. */
104  void*               buffers;           /**< The buffer's memory. */
105  size_t              buffer_min_count;  /**< Number of minimum size buffers
106                                          * that fit the buffer memory. */
107  size_t              max_bds_per_group; /**< The number of BDs of minimum
108                                          * buffer size that fit in a group. */
109  uint32_t            flags;             /**< Configuration flags. */
110
111  rtems_mutex         lock;              /**< The cache lock. It locks all
112                                          * cache data, BD and lists. */
113  rtems_mutex         sync_lock;         /**< Sync calls block writes. */
114  bool                sync_active;       /**< True if a sync is active. */
115  rtems_id            sync_requester;    /**< The sync requester. */
116  rtems_disk_device  *sync_device;       /**< The device to sync and
117                                          * BDBUF_INVALID_DEV not a device
118                                          * sync. */
119
120  rtems_bdbuf_buffer* tree;              /**< Buffer descriptor lookup AVL tree
121                                          * root. There is only one. */
122  rtems_chain_control lru;               /**< Least recently used list */
123  rtems_chain_control modified;          /**< Modified buffers list */
124  rtems_chain_control sync;              /**< Buffers to sync list */
125
126  rtems_bdbuf_waiters access_waiters;    /**< Wait for a buffer in
127                                          * ACCESS_CACHED, ACCESS_MODIFIED or
128                                          * ACCESS_EMPTY
129                                          * state. */
130  rtems_bdbuf_waiters transfer_waiters;  /**< Wait for a buffer in TRANSFER
131                                          * state. */
132  rtems_bdbuf_waiters buffer_waiters;    /**< Wait for a buffer and no one is
133                                          * available. */
134
135  rtems_bdbuf_swapout_transfer *swapout_transfer;
136  rtems_bdbuf_swapout_worker *swapout_workers;
137
138  size_t              group_count;       /**< The number of groups. */
139  rtems_bdbuf_group*  groups;            /**< The groups. */
140  rtems_id            read_ahead_task;   /**< Read-ahead task */
141  rtems_chain_control read_ahead_chain;  /**< Read-ahead request chain */
142  bool                read_ahead_enabled; /**< Read-ahead enabled */
143  rtems_status_code   init_status;       /**< The initialization status */
144  pthread_once_t      once;
145} rtems_bdbuf_cache;
146
147typedef enum {
148  RTEMS_BDBUF_FATAL_CACHE_WAIT_2,
149  RTEMS_BDBUF_FATAL_CACHE_WAIT_TO,
150  RTEMS_BDBUF_FATAL_CACHE_WAKE,
151  RTEMS_BDBUF_FATAL_PREEMPT_DIS,
152  RTEMS_BDBUF_FATAL_PREEMPT_RST,
153  RTEMS_BDBUF_FATAL_RA_WAKE_UP,
154  RTEMS_BDBUF_FATAL_RECYCLE,
155  RTEMS_BDBUF_FATAL_SO_WAKE_1,
156  RTEMS_BDBUF_FATAL_SO_WAKE_2,
157  RTEMS_BDBUF_FATAL_STATE_0,
158  RTEMS_BDBUF_FATAL_STATE_2,
159  RTEMS_BDBUF_FATAL_STATE_4,
160  RTEMS_BDBUF_FATAL_STATE_5,
161  RTEMS_BDBUF_FATAL_STATE_6,
162  RTEMS_BDBUF_FATAL_STATE_7,
163  RTEMS_BDBUF_FATAL_STATE_8,
164  RTEMS_BDBUF_FATAL_STATE_9,
165  RTEMS_BDBUF_FATAL_STATE_10,
166  RTEMS_BDBUF_FATAL_STATE_11,
167  RTEMS_BDBUF_FATAL_SWAPOUT_RE,
168  RTEMS_BDBUF_FATAL_TREE_RM,
169  RTEMS_BDBUF_FATAL_WAIT_EVNT,
170  RTEMS_BDBUF_FATAL_WAIT_TRANS_EVNT
171} rtems_bdbuf_fatal_code;
172
173/**
174 * The events used in this code. These should be system events rather than
175 * application events.
176 */
177#define RTEMS_BDBUF_SWAPOUT_SYNC   RTEMS_EVENT_2
178#define RTEMS_BDBUF_READ_AHEAD_WAKE_UP RTEMS_EVENT_1
179
180static rtems_task rtems_bdbuf_swapout_task(rtems_task_argument arg);
181
182static rtems_task rtems_bdbuf_read_ahead_task(rtems_task_argument arg);
183
184/**
185 * The Buffer Descriptor cache.
186 */
187static rtems_bdbuf_cache bdbuf_cache = {
188  .lock = RTEMS_MUTEX_INITIALIZER(NULL),
189  .sync_lock = RTEMS_MUTEX_INITIALIZER(NULL),
190  .access_waiters = { .cond_var = RTEMS_CONDITION_VARIABLE_INITIALIZER(NULL) },
191  .transfer_waiters = {
192    .cond_var = RTEMS_CONDITION_VARIABLE_INITIALIZER(NULL)
193  },
194  .buffer_waiters = { .cond_var = RTEMS_CONDITION_VARIABLE_INITIALIZER(NULL) },
195  .once = PTHREAD_ONCE_INIT
196};
197
198#if RTEMS_BDBUF_TRACE
199/**
200 * If true output the trace message.
201 */
202bool rtems_bdbuf_tracer;
203
204/**
205 * Return the number of items on the list.
206 *
207 * @param list The chain control.
208 * @return uint32_t The number of items on the list.
209 */
210uint32_t
211rtems_bdbuf_list_count (rtems_chain_control* list)
212{
213  rtems_chain_node* node = rtems_chain_first (list);
214  uint32_t          count = 0;
215  while (!rtems_chain_is_tail (list, node))
216  {
217    count++;
218    node = rtems_chain_next (node);
219  }
220  return count;
221}
222
223/**
224 * Show the usage for the bdbuf cache.
225 */
226void
227rtems_bdbuf_show_usage (void)
228{
229  uint32_t group;
230  uint32_t total = 0;
231  uint32_t val;
232
233  for (group = 0; group < bdbuf_cache.group_count; group++)
234    total += bdbuf_cache.groups[group].users;
235  printf ("bdbuf:group users=%lu", total);
236  val = rtems_bdbuf_list_count (&bdbuf_cache.lru);
237  printf (", lru=%lu", val);
238  total = val;
239  val = rtems_bdbuf_list_count (&bdbuf_cache.modified);
240  printf (", mod=%lu", val);
241  total += val;
242  val = rtems_bdbuf_list_count (&bdbuf_cache.sync);
243  printf (", sync=%lu", val);
244  total += val;
245  printf (", total=%lu\n", total);
246}
247
248/**
249 * Show the users for a group of a bd.
250 *
251 * @param where A label to show the context of output.
252 * @param bd The bd to show the users of.
253 */
254void
255rtems_bdbuf_show_users (const char* where, rtems_bdbuf_buffer* bd)
256{
257  const char* states[] =
258    { "FR", "EM", "CH", "AC", "AM", "AE", "AP", "MD", "SY", "TR", "TP" };
259
260  printf ("bdbuf:users: %15s: [%" PRIu32 " (%s)] %td:%td = %" PRIu32 " %s\n",
261          where,
262          bd->block, states[bd->state],
263          bd->group - bdbuf_cache.groups,
264          bd - bdbuf_cache.bds,
265          bd->group->users,
266          bd->group->users > 8 ? "<<<<<<<" : "");
267}
268#else
269#define rtems_bdbuf_tracer (0)
270#define rtems_bdbuf_show_usage() ((void) 0)
271#define rtems_bdbuf_show_users(_w, _b) ((void) 0)
272#endif
273
274/**
275 * The default maximum height of 32 allows for AVL trees having between
276 * 5,704,880 and 4,294,967,295 nodes, depending on order of insertion.  You may
277 * change this compile-time constant as you wish.
278 */
279#ifndef RTEMS_BDBUF_AVL_MAX_HEIGHT
280#define RTEMS_BDBUF_AVL_MAX_HEIGHT (32)
281#endif
282
283static void
284rtems_bdbuf_fatal (rtems_fatal_code error)
285{
286  rtems_fatal (RTEMS_FATAL_SOURCE_BDBUF, error);
287}
288
289static void
290rtems_bdbuf_fatal_with_state (rtems_bdbuf_buf_state state,
291                              rtems_bdbuf_fatal_code error)
292{
293  rtems_bdbuf_fatal ((((uint32_t) state) << 16) | error);
294}
295
296/**
297 * Searches for the node with specified dd/block.
298 *
299 * @param root pointer to the root node of the AVL-Tree
300 * @param dd disk device search key
301 * @param block block search key
302 * @retval NULL node with the specified dd/block is not found
303 * @return pointer to the node with specified dd/block
304 */
305static rtems_bdbuf_buffer *
306rtems_bdbuf_avl_search (rtems_bdbuf_buffer** root,
307                        const rtems_disk_device *dd,
308                        rtems_blkdev_bnum    block)
309{
310  rtems_bdbuf_buffer* p = *root;
311
312  while ((p != NULL) && ((p->dd != dd) || (p->block != block)))
313  {
314    if (((uintptr_t) p->dd < (uintptr_t) dd)
315        || ((p->dd == dd) && (p->block < block)))
316    {
317      p = p->avl.right;
318    }
319    else
320    {
321      p = p->avl.left;
322    }
323  }
324
325  return p;
326}
327
328/**
329 * Inserts the specified node to the AVl-Tree.
330 *
331 * @param root pointer to the root node of the AVL-Tree
332 * @param node Pointer to the node to add.
333 * @retval 0 The node added successfully
334 * @retval -1 An error occured
335 */
336static int
337rtems_bdbuf_avl_insert(rtems_bdbuf_buffer** root,
338                       rtems_bdbuf_buffer*  node)
339{
340  const rtems_disk_device *dd = node->dd;
341  rtems_blkdev_bnum block = node->block;
342
343  rtems_bdbuf_buffer*  p = *root;
344  rtems_bdbuf_buffer*  q;
345  rtems_bdbuf_buffer*  p1;
346  rtems_bdbuf_buffer*  p2;
347  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
348  rtems_bdbuf_buffer** buf_prev = buf_stack;
349
350  bool modified = false;
351
352  if (p == NULL)
353  {
354    *root = node;
355    node->avl.left = NULL;
356    node->avl.right = NULL;
357    node->avl.bal = 0;
358    return 0;
359  }
360
361  while (p != NULL)
362  {
363    *buf_prev++ = p;
364
365    if (((uintptr_t) p->dd < (uintptr_t) dd)
366        || ((p->dd == dd) && (p->block < block)))
367    {
368      p->avl.cache = 1;
369      q = p->avl.right;
370      if (q == NULL)
371      {
372        q = node;
373        p->avl.right = q = node;
374        break;
375      }
376    }
377    else if ((p->dd != dd) || (p->block != block))
378    {
379      p->avl.cache = -1;
380      q = p->avl.left;
381      if (q == NULL)
382      {
383        q = node;
384        p->avl.left = q;
385        break;
386      }
387    }
388    else
389    {
390      return -1;
391    }
392
393    p = q;
394  }
395
396  q->avl.left = q->avl.right = NULL;
397  q->avl.bal = 0;
398  modified = true;
399  buf_prev--;
400
401  while (modified)
402  {
403    if (p->avl.cache == -1)
404    {
405      switch (p->avl.bal)
406      {
407        case 1:
408          p->avl.bal = 0;
409          modified = false;
410          break;
411
412        case 0:
413          p->avl.bal = -1;
414          break;
415
416        case -1:
417          p1 = p->avl.left;
418          if (p1->avl.bal == -1) /* simple LL-turn */
419          {
420            p->avl.left = p1->avl.right;
421            p1->avl.right = p;
422            p->avl.bal = 0;
423            p = p1;
424          }
425          else /* double LR-turn */
426          {
427            p2 = p1->avl.right;
428            p1->avl.right = p2->avl.left;
429            p2->avl.left = p1;
430            p->avl.left = p2->avl.right;
431            p2->avl.right = p;
432            if (p2->avl.bal == -1) p->avl.bal = +1; else p->avl.bal = 0;
433            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
434            p = p2;
435          }
436          p->avl.bal = 0;
437          modified = false;
438          break;
439
440        default:
441          break;
442      }
443    }
444    else
445    {
446      switch (p->avl.bal)
447      {
448        case -1:
449          p->avl.bal = 0;
450          modified = false;
451          break;
452
453        case 0:
454          p->avl.bal = 1;
455          break;
456
457        case 1:
458          p1 = p->avl.right;
459          if (p1->avl.bal == 1) /* simple RR-turn */
460          {
461            p->avl.right = p1->avl.left;
462            p1->avl.left = p;
463            p->avl.bal = 0;
464            p = p1;
465          }
466          else /* double RL-turn */
467          {
468            p2 = p1->avl.left;
469            p1->avl.left = p2->avl.right;
470            p2->avl.right = p1;
471            p->avl.right = p2->avl.left;
472            p2->avl.left = p;
473            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
474            if (p2->avl.bal == -1) p1->avl.bal = +1; else p1->avl.bal = 0;
475            p = p2;
476          }
477          p->avl.bal = 0;
478          modified = false;
479          break;
480
481        default:
482          break;
483      }
484    }
485    q = p;
486    if (buf_prev > buf_stack)
487    {
488      p = *--buf_prev;
489
490      if (p->avl.cache == -1)
491      {
492        p->avl.left = q;
493      }
494      else
495      {
496        p->avl.right = q;
497      }
498    }
499    else
500    {
501      *root = p;
502      break;
503    }
504  };
505
506  return 0;
507}
508
509
510/**
511 * Removes the node from the tree.
512 *
513 * @param root Pointer to pointer to the root node
514 * @param node Pointer to the node to remove
515 * @retval 0 Item removed
516 * @retval -1 No such item found
517 */
518static int
519rtems_bdbuf_avl_remove(rtems_bdbuf_buffer**      root,
520                       const rtems_bdbuf_buffer* node)
521{
522  const rtems_disk_device *dd = node->dd;
523  rtems_blkdev_bnum block = node->block;
524
525  rtems_bdbuf_buffer*  p = *root;
526  rtems_bdbuf_buffer*  q;
527  rtems_bdbuf_buffer*  r;
528  rtems_bdbuf_buffer*  s;
529  rtems_bdbuf_buffer*  p1;
530  rtems_bdbuf_buffer*  p2;
531  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
532  rtems_bdbuf_buffer** buf_prev = buf_stack;
533
534  bool modified = false;
535
536  memset (buf_stack, 0, sizeof(buf_stack));
537
538  while (p != NULL)
539  {
540    *buf_prev++ = p;
541
542    if (((uintptr_t) p->dd < (uintptr_t) dd)
543        || ((p->dd == dd) && (p->block < block)))
544    {
545      p->avl.cache = 1;
546      p = p->avl.right;
547    }
548    else if ((p->dd != dd) || (p->block != block))
549    {
550      p->avl.cache = -1;
551      p = p->avl.left;
552    }
553    else
554    {
555      /* node found */
556      break;
557    }
558  }
559
560  if (p == NULL)
561  {
562    /* there is no such node */
563    return -1;
564  }
565
566  q = p;
567
568  buf_prev--;
569  if (buf_prev > buf_stack)
570  {
571    p = *(buf_prev - 1);
572  }
573  else
574  {
575    p = NULL;
576  }
577
578  /* at this moment q - is a node to delete, p is q's parent */
579  if (q->avl.right == NULL)
580  {
581    r = q->avl.left;
582    if (r != NULL)
583    {
584      r->avl.bal = 0;
585    }
586    q = r;
587  }
588  else
589  {
590    rtems_bdbuf_buffer **t;
591
592    r = q->avl.right;
593
594    if (r->avl.left == NULL)
595    {
596      r->avl.left = q->avl.left;
597      r->avl.bal = q->avl.bal;
598      r->avl.cache = 1;
599      *buf_prev++ = q = r;
600    }
601    else
602    {
603      t = buf_prev++;
604      s = r;
605
606      while (s->avl.left != NULL)
607      {
608        *buf_prev++ = r = s;
609        s = r->avl.left;
610        r->avl.cache = -1;
611      }
612
613      s->avl.left = q->avl.left;
614      r->avl.left = s->avl.right;
615      s->avl.right = q->avl.right;
616      s->avl.bal = q->avl.bal;
617      s->avl.cache = 1;
618
619      *t = q = s;
620    }
621  }
622
623  if (p != NULL)
624  {
625    if (p->avl.cache == -1)
626    {
627      p->avl.left = q;
628    }
629    else
630    {
631      p->avl.right = q;
632    }
633  }
634  else
635  {
636    *root = q;
637  }
638
639  modified = true;
640
641  while (modified)
642  {
643    if (buf_prev > buf_stack)
644    {
645      p = *--buf_prev;
646    }
647    else
648    {
649      break;
650    }
651
652    if (p->avl.cache == -1)
653    {
654      /* rebalance left branch */
655      switch (p->avl.bal)
656      {
657        case -1:
658          p->avl.bal = 0;
659          break;
660        case  0:
661          p->avl.bal = 1;
662          modified = false;
663          break;
664
665        case +1:
666          p1 = p->avl.right;
667
668          if (p1->avl.bal >= 0) /* simple RR-turn */
669          {
670            p->avl.right = p1->avl.left;
671            p1->avl.left = p;
672
673            if (p1->avl.bal == 0)
674            {
675              p1->avl.bal = -1;
676              modified = false;
677            }
678            else
679            {
680              p->avl.bal = 0;
681              p1->avl.bal = 0;
682            }
683            p = p1;
684          }
685          else /* double RL-turn */
686          {
687            p2 = p1->avl.left;
688
689            p1->avl.left = p2->avl.right;
690            p2->avl.right = p1;
691            p->avl.right = p2->avl.left;
692            p2->avl.left = p;
693
694            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
695            if (p2->avl.bal == -1) p1->avl.bal = 1; else p1->avl.bal = 0;
696
697            p = p2;
698            p2->avl.bal = 0;
699          }
700          break;
701
702        default:
703          break;
704      }
705    }
706    else
707    {
708      /* rebalance right branch */
709      switch (p->avl.bal)
710      {
711        case +1:
712          p->avl.bal = 0;
713          break;
714
715        case  0:
716          p->avl.bal = -1;
717          modified = false;
718          break;
719
720        case -1:
721          p1 = p->avl.left;
722
723          if (p1->avl.bal <= 0) /* simple LL-turn */
724          {
725            p->avl.left = p1->avl.right;
726            p1->avl.right = p;
727            if (p1->avl.bal == 0)
728            {
729              p1->avl.bal = 1;
730              modified = false;
731            }
732            else
733            {
734              p->avl.bal = 0;
735              p1->avl.bal = 0;
736            }
737            p = p1;
738          }
739          else /* double LR-turn */
740          {
741            p2 = p1->avl.right;
742
743            p1->avl.right = p2->avl.left;
744            p2->avl.left = p1;
745            p->avl.left = p2->avl.right;
746            p2->avl.right = p;
747
748            if (p2->avl.bal == -1) p->avl.bal = 1; else p->avl.bal = 0;
749            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
750
751            p = p2;
752            p2->avl.bal = 0;
753          }
754          break;
755
756        default:
757          break;
758      }
759    }
760
761    if (buf_prev > buf_stack)
762    {
763      q = *(buf_prev - 1);
764
765      if (q->avl.cache == -1)
766      {
767        q->avl.left = p;
768      }
769      else
770      {
771        q->avl.right = p;
772      }
773    }
774    else
775    {
776      *root = p;
777      break;
778    }
779
780  }
781
782  return 0;
783}
784
785static void
786rtems_bdbuf_set_state (rtems_bdbuf_buffer *bd, rtems_bdbuf_buf_state state)
787{
788  bd->state = state;
789}
790
791static rtems_blkdev_bnum
792rtems_bdbuf_media_block (const rtems_disk_device *dd, rtems_blkdev_bnum block)
793{
794  if (dd->block_to_media_block_shift >= 0)
795    return block << dd->block_to_media_block_shift;
796  else
797    /*
798     * Change the block number for the block size to the block number for the media
799     * block size. We have to use 64bit maths. There is no short cut here.
800     */
801    return (rtems_blkdev_bnum)
802      ((((uint64_t) block) * dd->block_size) / dd->media_block_size);
803}
804
805/**
806 * Lock the mutex. A single task can nest calls.
807 *
808 * @param lock The mutex to lock.
809 */
810static void
811rtems_bdbuf_lock (rtems_mutex *lock)
812{
813  rtems_mutex_lock (lock);
814}
815
816/**
817 * Unlock the mutex.
818 *
819 * @param lock The mutex to unlock.
820 */
821static void
822rtems_bdbuf_unlock (rtems_mutex *lock)
823{
824  rtems_mutex_unlock (lock);
825}
826
827/**
828 * Lock the cache. A single task can nest calls.
829 */
830static void
831rtems_bdbuf_lock_cache (void)
832{
833  rtems_bdbuf_lock (&bdbuf_cache.lock);
834}
835
836/**
837 * Unlock the cache.
838 */
839static void
840rtems_bdbuf_unlock_cache (void)
841{
842  rtems_bdbuf_unlock (&bdbuf_cache.lock);
843}
844
845/**
846 * Lock the cache's sync. A single task can nest calls.
847 */
848static void
849rtems_bdbuf_lock_sync (void)
850{
851  rtems_bdbuf_lock (&bdbuf_cache.sync_lock);
852}
853
854/**
855 * Unlock the cache's sync lock. Any blocked writers are woken.
856 */
857static void
858rtems_bdbuf_unlock_sync (void)
859{
860  rtems_bdbuf_unlock (&bdbuf_cache.sync_lock);
861}
862
863static void
864rtems_bdbuf_group_obtain (rtems_bdbuf_buffer *bd)
865{
866  ++bd->group->users;
867}
868
869static void
870rtems_bdbuf_group_release (rtems_bdbuf_buffer *bd)
871{
872  --bd->group->users;
873}
874
875/**
876 * Wait until woken. Semaphores are used so a number of tasks can wait and can
877 * be woken at once. Task events would require we maintain a list of tasks to
878 * be woken and this would require storage and we do not know the number of
879 * tasks that could be waiting.
880 *
881 * While we have the cache locked we can try and claim the semaphore and
882 * therefore know when we release the lock to the cache we will block until the
883 * semaphore is released. This may even happen before we get to block.
884 *
885 * A counter is used to save the release call when no one is waiting.
886 *
887 * The function assumes the cache is locked on entry and it will be locked on
888 * exit.
889 */
890static void
891rtems_bdbuf_anonymous_wait (rtems_bdbuf_waiters *waiters)
892{
893  /*
894   * Indicate we are waiting.
895   */
896  ++waiters->count;
897
898  rtems_condition_variable_wait (&waiters->cond_var, &bdbuf_cache.lock);
899
900  --waiters->count;
901}
902
903static void
904rtems_bdbuf_wait (rtems_bdbuf_buffer *bd, rtems_bdbuf_waiters *waiters)
905{
906  rtems_bdbuf_group_obtain (bd);
907  ++bd->waiters;
908  rtems_bdbuf_anonymous_wait (waiters);
909  --bd->waiters;
910  rtems_bdbuf_group_release (bd);
911}
912
913/**
914 * Wake a blocked resource. The resource has a counter that lets us know if
915 * there are any waiters.
916 */
917static void
918rtems_bdbuf_wake (rtems_bdbuf_waiters *waiters)
919{
920  if (waiters->count > 0)
921  {
922    rtems_condition_variable_broadcast (&waiters->cond_var);
923  }
924}
925
926static void
927rtems_bdbuf_wake_swapper (void)
928{
929  rtems_status_code sc = rtems_event_send (bdbuf_cache.swapout,
930                                           RTEMS_BDBUF_SWAPOUT_SYNC);
931  if (sc != RTEMS_SUCCESSFUL)
932    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_SO_WAKE_1);
933}
934
935static bool
936rtems_bdbuf_has_buffer_waiters (void)
937{
938  return bdbuf_cache.buffer_waiters.count;
939}
940
941static void
942rtems_bdbuf_remove_from_tree (rtems_bdbuf_buffer *bd)
943{
944  if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
945    rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_TREE_RM);
946}
947
948static void
949rtems_bdbuf_remove_from_tree_and_lru_list (rtems_bdbuf_buffer *bd)
950{
951  switch (bd->state)
952  {
953    case RTEMS_BDBUF_STATE_FREE:
954      break;
955    case RTEMS_BDBUF_STATE_CACHED:
956      rtems_bdbuf_remove_from_tree (bd);
957      break;
958    default:
959      rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_10);
960  }
961
962  rtems_chain_extract_unprotected (&bd->link);
963}
964
965static void
966rtems_bdbuf_make_free_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
967{
968  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_FREE);
969  rtems_chain_prepend_unprotected (&bdbuf_cache.lru, &bd->link);
970}
971
972static void
973rtems_bdbuf_make_empty (rtems_bdbuf_buffer *bd)
974{
975  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_EMPTY);
976}
977
978static void
979rtems_bdbuf_make_cached_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
980{
981  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_CACHED);
982  rtems_chain_append_unprotected (&bdbuf_cache.lru, &bd->link);
983}
984
985static void
986rtems_bdbuf_discard_buffer (rtems_bdbuf_buffer *bd)
987{
988  rtems_bdbuf_make_empty (bd);
989
990  if (bd->waiters == 0)
991  {
992    rtems_bdbuf_remove_from_tree (bd);
993    rtems_bdbuf_make_free_and_add_to_lru_list (bd);
994  }
995}
996
997static void
998rtems_bdbuf_add_to_modified_list_after_access (rtems_bdbuf_buffer *bd)
999{
1000  if (bdbuf_cache.sync_active && bdbuf_cache.sync_device == bd->dd)
1001  {
1002    rtems_bdbuf_unlock_cache ();
1003
1004    /*
1005     * Wait for the sync lock.
1006     */
1007    rtems_bdbuf_lock_sync ();
1008
1009    rtems_bdbuf_unlock_sync ();
1010    rtems_bdbuf_lock_cache ();
1011  }
1012
1013  /*
1014   * Only the first modified release sets the timer and any further user
1015   * accesses do not change the timer value which should move down. This
1016   * assumes the user's hold of the buffer is much less than the time on the
1017   * modified list. Resetting the timer on each access which could result in a
1018   * buffer never getting to 0 and never being forced onto disk. This raises a
1019   * difficult question. Is a snapshot of a block that is changing better than
1020   * nothing being written? We have tended to think we should hold changes for
1021   * only a specific period of time even if still changing and get onto disk
1022   * and letting the file system try and recover this position if it can.
1023   */
1024  if (bd->state == RTEMS_BDBUF_STATE_ACCESS_CACHED
1025        || bd->state == RTEMS_BDBUF_STATE_ACCESS_EMPTY)
1026    bd->hold_timer = bdbuf_config.swap_block_hold;
1027
1028  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_MODIFIED);
1029  rtems_chain_append_unprotected (&bdbuf_cache.modified, &bd->link);
1030
1031  if (bd->waiters)
1032    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1033  else if (rtems_bdbuf_has_buffer_waiters ())
1034    rtems_bdbuf_wake_swapper ();
1035}
1036
1037static void
1038rtems_bdbuf_add_to_lru_list_after_access (rtems_bdbuf_buffer *bd)
1039{
1040  rtems_bdbuf_group_release (bd);
1041  rtems_bdbuf_make_cached_and_add_to_lru_list (bd);
1042
1043  if (bd->waiters)
1044    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1045  else
1046    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1047}
1048
1049/**
1050 * Compute the number of BDs per group for a given buffer size.
1051 *
1052 * @param size The buffer size. It can be any size and we scale up.
1053 */
1054static size_t
1055rtems_bdbuf_bds_per_group (size_t size)
1056{
1057  size_t bufs_per_size;
1058  size_t bds_per_size;
1059
1060  if (size > bdbuf_config.buffer_max)
1061    return 0;
1062
1063  bufs_per_size = ((size - 1) / bdbuf_config.buffer_min) + 1;
1064
1065  for (bds_per_size = 1;
1066       bds_per_size < bufs_per_size;
1067       bds_per_size <<= 1)
1068    ;
1069
1070  return bdbuf_cache.max_bds_per_group / bds_per_size;
1071}
1072
1073static void
1074rtems_bdbuf_discard_buffer_after_access (rtems_bdbuf_buffer *bd)
1075{
1076  rtems_bdbuf_group_release (bd);
1077  rtems_bdbuf_discard_buffer (bd);
1078
1079  if (bd->waiters)
1080    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1081  else
1082    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1083}
1084
1085/**
1086 * Reallocate a group. The BDs currently allocated in the group are removed
1087 * from the ALV tree and any lists then the new BD's are prepended to the ready
1088 * list of the cache.
1089 *
1090 * @param group The group to reallocate.
1091 * @param new_bds_per_group The new count of BDs per group.
1092 * @return A buffer of this group.
1093 */
1094static rtems_bdbuf_buffer *
1095rtems_bdbuf_group_realloc (rtems_bdbuf_group* group, size_t new_bds_per_group)
1096{
1097  rtems_bdbuf_buffer* bd;
1098  size_t              b;
1099  size_t              bufs_per_bd;
1100
1101  if (rtems_bdbuf_tracer)
1102    printf ("bdbuf:realloc: %tu: %zd -> %zd\n",
1103            group - bdbuf_cache.groups, group->bds_per_group,
1104            new_bds_per_group);
1105
1106  bufs_per_bd = bdbuf_cache.max_bds_per_group / group->bds_per_group;
1107
1108  for (b = 0, bd = group->bdbuf;
1109       b < group->bds_per_group;
1110       b++, bd += bufs_per_bd)
1111    rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1112
1113  group->bds_per_group = new_bds_per_group;
1114  bufs_per_bd = bdbuf_cache.max_bds_per_group / new_bds_per_group;
1115
1116  for (b = 1, bd = group->bdbuf + bufs_per_bd;
1117       b < group->bds_per_group;
1118       b++, bd += bufs_per_bd)
1119    rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1120
1121  if (b > 1)
1122    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1123
1124  return group->bdbuf;
1125}
1126
1127static void
1128rtems_bdbuf_setup_empty_buffer (rtems_bdbuf_buffer *bd,
1129                                rtems_disk_device  *dd,
1130                                rtems_blkdev_bnum   block)
1131{
1132  bd->dd        = dd ;
1133  bd->block     = block;
1134  bd->avl.left  = NULL;
1135  bd->avl.right = NULL;
1136  bd->waiters   = 0;
1137
1138  if (rtems_bdbuf_avl_insert (&bdbuf_cache.tree, bd) != 0)
1139    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_RECYCLE);
1140
1141  rtems_bdbuf_make_empty (bd);
1142}
1143
1144static rtems_bdbuf_buffer *
1145rtems_bdbuf_get_buffer_from_lru_list (rtems_disk_device *dd,
1146                                      rtems_blkdev_bnum  block)
1147{
1148  rtems_chain_node *node = rtems_chain_first (&bdbuf_cache.lru);
1149
1150  while (!rtems_chain_is_tail (&bdbuf_cache.lru, node))
1151  {
1152    rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
1153    rtems_bdbuf_buffer *empty_bd = NULL;
1154
1155    if (rtems_bdbuf_tracer)
1156      printf ("bdbuf:next-bd: %tu (%td:%" PRId32 ") %zd -> %zd\n",
1157              bd - bdbuf_cache.bds,
1158              bd->group - bdbuf_cache.groups, bd->group->users,
1159              bd->group->bds_per_group, dd->bds_per_group);
1160
1161    /*
1162     * If nobody waits for this BD, we may recycle it.
1163     */
1164    if (bd->waiters == 0)
1165    {
1166      if (bd->group->bds_per_group == dd->bds_per_group)
1167      {
1168        rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1169
1170        empty_bd = bd;
1171      }
1172      else if (bd->group->users == 0)
1173        empty_bd = rtems_bdbuf_group_realloc (bd->group, dd->bds_per_group);
1174    }
1175
1176    if (empty_bd != NULL)
1177    {
1178      rtems_bdbuf_setup_empty_buffer (empty_bd, dd, block);
1179
1180      return empty_bd;
1181    }
1182
1183    node = rtems_chain_next (node);
1184  }
1185
1186  return NULL;
1187}
1188
1189static rtems_status_code
1190rtems_bdbuf_create_task(
1191  rtems_name name,
1192  rtems_task_priority priority,
1193  rtems_task_priority default_priority,
1194  rtems_id *id
1195)
1196{
1197  rtems_status_code sc;
1198  size_t stack_size = bdbuf_config.task_stack_size ?
1199    bdbuf_config.task_stack_size : RTEMS_BDBUF_TASK_STACK_SIZE_DEFAULT;
1200
1201  priority = priority != 0 ? priority : default_priority;
1202
1203  sc = rtems_task_create (name,
1204                          priority,
1205                          stack_size,
1206                          RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
1207                          RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT,
1208                          id);
1209
1210  return sc;
1211}
1212
1213static rtems_bdbuf_swapout_transfer*
1214rtems_bdbuf_swapout_transfer_alloc (void)
1215{
1216  /*
1217   * @note chrisj The rtems_blkdev_request and the array at the end is a hack.
1218   * I am disappointment at finding code like this in RTEMS. The request should
1219   * have been a rtems_chain_control. Simple, fast and less storage as the node
1220   * is already part of the buffer structure.
1221   */
1222  size_t transfer_size = sizeof (rtems_bdbuf_swapout_transfer)
1223    + (bdbuf_config.max_write_blocks * sizeof (rtems_blkdev_sg_buffer));
1224  return calloc (1, transfer_size);
1225}
1226
1227static void
1228rtems_bdbuf_transfer_done (rtems_blkdev_request* req, rtems_status_code status);
1229
1230static void
1231rtems_bdbuf_swapout_transfer_init (rtems_bdbuf_swapout_transfer* transfer,
1232                                   rtems_id id)
1233{
1234  rtems_chain_initialize_empty (&transfer->bds);
1235  transfer->dd = BDBUF_INVALID_DEV;
1236  transfer->syncing = false;
1237  transfer->write_req.req = RTEMS_BLKDEV_REQ_WRITE;
1238  transfer->write_req.done = rtems_bdbuf_transfer_done;
1239  transfer->write_req.io_task = id;
1240}
1241
1242static size_t
1243rtems_bdbuf_swapout_worker_size (void)
1244{
1245  return sizeof (rtems_bdbuf_swapout_worker)
1246    + (bdbuf_config.max_write_blocks * sizeof (rtems_blkdev_sg_buffer));
1247}
1248
1249static rtems_task
1250rtems_bdbuf_swapout_worker_task (rtems_task_argument arg);
1251
1252static rtems_status_code
1253rtems_bdbuf_swapout_workers_create (void)
1254{
1255  rtems_status_code  sc;
1256  size_t             w;
1257  size_t             worker_size;
1258  char              *worker_current;
1259
1260  worker_size = rtems_bdbuf_swapout_worker_size ();
1261  worker_current = calloc (1, bdbuf_config.swapout_workers * worker_size);
1262  sc = worker_current != NULL ? RTEMS_SUCCESSFUL : RTEMS_NO_MEMORY;
1263
1264  bdbuf_cache.swapout_workers = (rtems_bdbuf_swapout_worker *) worker_current;
1265
1266  for (w = 0;
1267       sc == RTEMS_SUCCESSFUL && w < bdbuf_config.swapout_workers;
1268       w++, worker_current += worker_size)
1269  {
1270    rtems_bdbuf_swapout_worker *worker = (rtems_bdbuf_swapout_worker *) worker_current;
1271
1272    sc = rtems_bdbuf_create_task (rtems_build_name('B', 'D', 'o', 'a' + w),
1273                                  bdbuf_config.swapout_worker_priority,
1274                                  RTEMS_BDBUF_SWAPOUT_WORKER_TASK_PRIORITY_DEFAULT,
1275                                  &worker->id);
1276    if (sc == RTEMS_SUCCESSFUL)
1277    {
1278      rtems_bdbuf_swapout_transfer_init (&worker->transfer, worker->id);
1279
1280      rtems_chain_append_unprotected (&bdbuf_cache.swapout_free_workers, &worker->link);
1281      worker->enabled = true;
1282
1283      sc = rtems_task_start (worker->id,
1284                             rtems_bdbuf_swapout_worker_task,
1285                             (rtems_task_argument) worker);
1286    }
1287  }
1288
1289  return sc;
1290}
1291
1292static size_t
1293rtems_bdbuf_read_request_size (uint32_t transfer_count)
1294{
1295  return sizeof (rtems_blkdev_request)
1296    + sizeof (rtems_blkdev_sg_buffer) * transfer_count;
1297}
1298
1299static rtems_status_code
1300rtems_bdbuf_do_init (void)
1301{
1302  rtems_bdbuf_group*  group;
1303  rtems_bdbuf_buffer* bd;
1304  uint8_t*            buffer;
1305  size_t              b;
1306  rtems_status_code   sc;
1307
1308  if (rtems_bdbuf_tracer)
1309    printf ("bdbuf:init\n");
1310
1311  if (rtems_interrupt_is_in_progress())
1312    return RTEMS_CALLED_FROM_ISR;
1313
1314  /*
1315   * Check the configuration table values.
1316   */
1317
1318  if ((bdbuf_config.buffer_max % bdbuf_config.buffer_min) != 0)
1319    return RTEMS_INVALID_NUMBER;
1320
1321  if (rtems_bdbuf_read_request_size (bdbuf_config.max_read_ahead_blocks)
1322      > RTEMS_MINIMUM_STACK_SIZE / 8U)
1323    return RTEMS_INVALID_NUMBER;
1324
1325  bdbuf_cache.sync_device = BDBUF_INVALID_DEV;
1326
1327  rtems_chain_initialize_empty (&bdbuf_cache.swapout_free_workers);
1328  rtems_chain_initialize_empty (&bdbuf_cache.lru);
1329  rtems_chain_initialize_empty (&bdbuf_cache.modified);
1330  rtems_chain_initialize_empty (&bdbuf_cache.sync);
1331  rtems_chain_initialize_empty (&bdbuf_cache.read_ahead_chain);
1332
1333  rtems_mutex_set_name (&bdbuf_cache.lock, "bdbuf lock");
1334  rtems_mutex_set_name (&bdbuf_cache.sync_lock, "bdbuf sync lock");
1335  rtems_condition_variable_set_name (&bdbuf_cache.access_waiters.cond_var,
1336                                     "bdbuf access");
1337  rtems_condition_variable_set_name (&bdbuf_cache.transfer_waiters.cond_var,
1338                                     "bdbuf transfer");
1339  rtems_condition_variable_set_name (&bdbuf_cache.buffer_waiters.cond_var,
1340                                     "bdbuf buffer");
1341
1342  rtems_bdbuf_lock_cache ();
1343
1344  /*
1345   * Compute the various number of elements in the cache.
1346   */
1347  bdbuf_cache.buffer_min_count =
1348    bdbuf_config.size / bdbuf_config.buffer_min;
1349  bdbuf_cache.max_bds_per_group =
1350    bdbuf_config.buffer_max / bdbuf_config.buffer_min;
1351  bdbuf_cache.group_count =
1352    bdbuf_cache.buffer_min_count / bdbuf_cache.max_bds_per_group;
1353
1354  /*
1355   * Allocate the memory for the buffer descriptors.
1356   */
1357  bdbuf_cache.bds = calloc (sizeof (rtems_bdbuf_buffer),
1358                            bdbuf_cache.buffer_min_count);
1359  if (!bdbuf_cache.bds)
1360    goto error;
1361
1362  /*
1363   * Allocate the memory for the buffer descriptors.
1364   */
1365  bdbuf_cache.groups = calloc (sizeof (rtems_bdbuf_group),
1366                               bdbuf_cache.group_count);
1367  if (!bdbuf_cache.groups)
1368    goto error;
1369
1370  /*
1371   * Allocate memory for buffer memory. The buffer memory will be cache
1372   * aligned. It is possible to free the memory allocated by
1373   * rtems_cache_aligned_malloc() with free().
1374   */
1375  bdbuf_cache.buffers = rtems_cache_aligned_malloc(bdbuf_cache.buffer_min_count
1376                                                   * bdbuf_config.buffer_min);
1377  if (bdbuf_cache.buffers == NULL)
1378    goto error;
1379
1380  /*
1381   * The cache is empty after opening so we need to add all the buffers to it
1382   * and initialise the groups.
1383   */
1384  for (b = 0, group = bdbuf_cache.groups,
1385         bd = bdbuf_cache.bds, buffer = bdbuf_cache.buffers;
1386       b < bdbuf_cache.buffer_min_count;
1387       b++, bd++, buffer += bdbuf_config.buffer_min)
1388  {
1389    bd->dd    = BDBUF_INVALID_DEV;
1390    bd->group  = group;
1391    bd->buffer = buffer;
1392
1393    rtems_chain_append_unprotected (&bdbuf_cache.lru, &bd->link);
1394
1395    if ((b % bdbuf_cache.max_bds_per_group) ==
1396        (bdbuf_cache.max_bds_per_group - 1))
1397      group++;
1398  }
1399
1400  for (b = 0,
1401         group = bdbuf_cache.groups,
1402         bd = bdbuf_cache.bds;
1403       b < bdbuf_cache.group_count;
1404       b++,
1405         group++,
1406         bd += bdbuf_cache.max_bds_per_group)
1407  {
1408    group->bds_per_group = bdbuf_cache.max_bds_per_group;
1409    group->bdbuf = bd;
1410  }
1411
1412  /*
1413   * Create and start swapout task.
1414   */
1415
1416  bdbuf_cache.swapout_transfer = rtems_bdbuf_swapout_transfer_alloc ();
1417  if (!bdbuf_cache.swapout_transfer)
1418    goto error;
1419
1420  bdbuf_cache.swapout_enabled = true;
1421
1422  sc = rtems_bdbuf_create_task (rtems_build_name('B', 'S', 'W', 'P'),
1423                                bdbuf_config.swapout_priority,
1424                                RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT,
1425                                &bdbuf_cache.swapout);
1426  if (sc != RTEMS_SUCCESSFUL)
1427    goto error;
1428
1429  rtems_bdbuf_swapout_transfer_init (bdbuf_cache.swapout_transfer, bdbuf_cache.swapout);
1430
1431  sc = rtems_task_start (bdbuf_cache.swapout,
1432                         rtems_bdbuf_swapout_task,
1433                         (rtems_task_argument) bdbuf_cache.swapout_transfer);
1434  if (sc != RTEMS_SUCCESSFUL)
1435    goto error;
1436
1437  if (bdbuf_config.swapout_workers > 0)
1438  {
1439    sc = rtems_bdbuf_swapout_workers_create ();
1440    if (sc != RTEMS_SUCCESSFUL)
1441      goto error;
1442  }
1443
1444  if (bdbuf_config.max_read_ahead_blocks > 0)
1445  {
1446    bdbuf_cache.read_ahead_enabled = true;
1447    sc = rtems_bdbuf_create_task (rtems_build_name('B', 'R', 'D', 'A'),
1448                                  bdbuf_config.read_ahead_priority,
1449                                  RTEMS_BDBUF_READ_AHEAD_TASK_PRIORITY_DEFAULT,
1450                                  &bdbuf_cache.read_ahead_task);
1451    if (sc != RTEMS_SUCCESSFUL)
1452      goto error;
1453
1454    sc = rtems_task_start (bdbuf_cache.read_ahead_task,
1455                           rtems_bdbuf_read_ahead_task,
1456                           0);
1457    if (sc != RTEMS_SUCCESSFUL)
1458      goto error;
1459  }
1460
1461  rtems_bdbuf_unlock_cache ();
1462
1463  return RTEMS_SUCCESSFUL;
1464
1465error:
1466
1467  if (bdbuf_cache.read_ahead_task != 0)
1468    rtems_task_delete (bdbuf_cache.read_ahead_task);
1469
1470  if (bdbuf_cache.swapout != 0)
1471    rtems_task_delete (bdbuf_cache.swapout);
1472
1473  if (bdbuf_cache.swapout_workers)
1474  {
1475    char   *worker_current = (char *) bdbuf_cache.swapout_workers;
1476    size_t  worker_size = rtems_bdbuf_swapout_worker_size ();
1477    size_t  w;
1478
1479    for (w = 0;
1480         w < bdbuf_config.swapout_workers;
1481         w++, worker_current += worker_size)
1482    {
1483      rtems_bdbuf_swapout_worker *worker = (rtems_bdbuf_swapout_worker *) worker_current;
1484
1485      if (worker->id != 0) {
1486        rtems_task_delete (worker->id);
1487      }
1488    }
1489  }
1490
1491  free (bdbuf_cache.buffers);
1492  free (bdbuf_cache.groups);
1493  free (bdbuf_cache.bds);
1494  free (bdbuf_cache.swapout_transfer);
1495  free (bdbuf_cache.swapout_workers);
1496
1497  rtems_bdbuf_unlock_cache ();
1498
1499  return RTEMS_UNSATISFIED;
1500}
1501
1502static void
1503rtems_bdbuf_init_once (void)
1504{
1505  bdbuf_cache.init_status = rtems_bdbuf_do_init();
1506}
1507
1508rtems_status_code
1509rtems_bdbuf_init (void)
1510{
1511  int eno;
1512
1513  eno = pthread_once (&bdbuf_cache.once, rtems_bdbuf_init_once);
1514  _Assert (eno == 0);
1515  (void) eno;
1516
1517  return bdbuf_cache.init_status;
1518}
1519
1520static void
1521rtems_bdbuf_wait_for_event (rtems_event_set event)
1522{
1523  rtems_status_code sc = RTEMS_SUCCESSFUL;
1524  rtems_event_set   out = 0;
1525
1526  sc = rtems_event_receive (event,
1527                            RTEMS_EVENT_ALL | RTEMS_WAIT,
1528                            RTEMS_NO_TIMEOUT,
1529                            &out);
1530
1531  if (sc != RTEMS_SUCCESSFUL || out != event)
1532    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_WAIT_EVNT);
1533}
1534
1535static void
1536rtems_bdbuf_wait_for_transient_event (void)
1537{
1538  rtems_status_code sc = RTEMS_SUCCESSFUL;
1539
1540  sc = rtems_event_transient_receive (RTEMS_WAIT, RTEMS_NO_TIMEOUT);
1541  if (sc != RTEMS_SUCCESSFUL)
1542    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_WAIT_TRANS_EVNT);
1543}
1544
1545static void
1546rtems_bdbuf_wait_for_access (rtems_bdbuf_buffer *bd)
1547{
1548  while (true)
1549  {
1550    switch (bd->state)
1551    {
1552      case RTEMS_BDBUF_STATE_MODIFIED:
1553        rtems_bdbuf_group_release (bd);
1554        /* Fall through */
1555      case RTEMS_BDBUF_STATE_CACHED:
1556        rtems_chain_extract_unprotected (&bd->link);
1557        /* Fall through */
1558      case RTEMS_BDBUF_STATE_EMPTY:
1559        return;
1560      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1561      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1562      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1563      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1564        rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
1565        break;
1566      case RTEMS_BDBUF_STATE_SYNC:
1567      case RTEMS_BDBUF_STATE_TRANSFER:
1568      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1569        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1570        break;
1571      default:
1572        rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_7);
1573    }
1574  }
1575}
1576
1577static void
1578rtems_bdbuf_request_sync_for_modified_buffer (rtems_bdbuf_buffer *bd)
1579{
1580  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
1581  rtems_chain_extract_unprotected (&bd->link);
1582  rtems_chain_append_unprotected (&bdbuf_cache.sync, &bd->link);
1583  rtems_bdbuf_wake_swapper ();
1584}
1585
1586/**
1587 * @brief Waits until the buffer is ready for recycling.
1588 *
1589 * @retval @c true Buffer is valid and may be recycled.
1590 * @retval @c false Buffer is invalid and has to searched again.
1591 */
1592static bool
1593rtems_bdbuf_wait_for_recycle (rtems_bdbuf_buffer *bd)
1594{
1595  while (true)
1596  {
1597    switch (bd->state)
1598    {
1599      case RTEMS_BDBUF_STATE_FREE:
1600        return true;
1601      case RTEMS_BDBUF_STATE_MODIFIED:
1602        rtems_bdbuf_request_sync_for_modified_buffer (bd);
1603        break;
1604      case RTEMS_BDBUF_STATE_CACHED:
1605      case RTEMS_BDBUF_STATE_EMPTY:
1606        if (bd->waiters == 0)
1607          return true;
1608        else
1609        {
1610          /*
1611           * It is essential that we wait here without a special wait count and
1612           * without the group in use.  Otherwise we could trigger a wait ping
1613           * pong with another recycle waiter.  The state of the buffer is
1614           * arbitrary afterwards.
1615           */
1616          rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
1617          return false;
1618        }
1619      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1620      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1621      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1622      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1623        rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
1624        break;
1625      case RTEMS_BDBUF_STATE_SYNC:
1626      case RTEMS_BDBUF_STATE_TRANSFER:
1627      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1628        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1629        break;
1630      default:
1631        rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_8);
1632    }
1633  }
1634}
1635
1636static void
1637rtems_bdbuf_wait_for_sync_done (rtems_bdbuf_buffer *bd)
1638{
1639  while (true)
1640  {
1641    switch (bd->state)
1642    {
1643      case RTEMS_BDBUF_STATE_CACHED:
1644      case RTEMS_BDBUF_STATE_EMPTY:
1645      case RTEMS_BDBUF_STATE_MODIFIED:
1646      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1647      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1648      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1649      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1650        return;
1651      case RTEMS_BDBUF_STATE_SYNC:
1652      case RTEMS_BDBUF_STATE_TRANSFER:
1653      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1654        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1655        break;
1656      default:
1657        rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_9);
1658    }
1659  }
1660}
1661
1662static void
1663rtems_bdbuf_wait_for_buffer (void)
1664{
1665  if (!rtems_chain_is_empty (&bdbuf_cache.modified))
1666    rtems_bdbuf_wake_swapper ();
1667
1668  rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
1669}
1670
1671static void
1672rtems_bdbuf_sync_after_access (rtems_bdbuf_buffer *bd)
1673{
1674  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
1675
1676  rtems_chain_append_unprotected (&bdbuf_cache.sync, &bd->link);
1677
1678  if (bd->waiters)
1679    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1680
1681  rtems_bdbuf_wake_swapper ();
1682  rtems_bdbuf_wait_for_sync_done (bd);
1683
1684  /*
1685   * We may have created a cached or empty buffer which may be recycled.
1686   */
1687  if (bd->waiters == 0
1688        && (bd->state == RTEMS_BDBUF_STATE_CACHED
1689          || bd->state == RTEMS_BDBUF_STATE_EMPTY))
1690  {
1691    if (bd->state == RTEMS_BDBUF_STATE_EMPTY)
1692    {
1693      rtems_bdbuf_remove_from_tree (bd);
1694      rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1695    }
1696    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1697  }
1698}
1699
1700static rtems_bdbuf_buffer *
1701rtems_bdbuf_get_buffer_for_read_ahead (rtems_disk_device *dd,
1702                                       rtems_blkdev_bnum  block)
1703{
1704  rtems_bdbuf_buffer *bd = NULL;
1705
1706  bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dd, block);
1707
1708  if (bd == NULL)
1709  {
1710    bd = rtems_bdbuf_get_buffer_from_lru_list (dd, block);
1711
1712    if (bd != NULL)
1713      rtems_bdbuf_group_obtain (bd);
1714  }
1715  else
1716    /*
1717     * The buffer is in the cache.  So it is already available or in use, and
1718     * thus no need for a read ahead.
1719     */
1720    bd = NULL;
1721
1722  return bd;
1723}
1724
1725static rtems_bdbuf_buffer *
1726rtems_bdbuf_get_buffer_for_access (rtems_disk_device *dd,
1727                                   rtems_blkdev_bnum  block)
1728{
1729  rtems_bdbuf_buffer *bd = NULL;
1730
1731  do
1732  {
1733    bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dd, block);
1734
1735    if (bd != NULL)
1736    {
1737      if (bd->group->bds_per_group != dd->bds_per_group)
1738      {
1739        if (rtems_bdbuf_wait_for_recycle (bd))
1740        {
1741          rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1742          rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1743          rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1744        }
1745        bd = NULL;
1746      }
1747    }
1748    else
1749    {
1750      bd = rtems_bdbuf_get_buffer_from_lru_list (dd, block);
1751
1752      if (bd == NULL)
1753        rtems_bdbuf_wait_for_buffer ();
1754    }
1755  }
1756  while (bd == NULL);
1757
1758  rtems_bdbuf_wait_for_access (bd);
1759  rtems_bdbuf_group_obtain (bd);
1760
1761  return bd;
1762}
1763
1764static rtems_status_code
1765rtems_bdbuf_get_media_block (const rtems_disk_device *dd,
1766                             rtems_blkdev_bnum        block,
1767                             rtems_blkdev_bnum       *media_block_ptr)
1768{
1769  rtems_status_code sc = RTEMS_SUCCESSFUL;
1770
1771  if (block < dd->block_count)
1772  {
1773    /*
1774     * Compute the media block number. Drivers work with media block number not
1775     * the block number a BD may have as this depends on the block size set by
1776     * the user.
1777     */
1778    *media_block_ptr = rtems_bdbuf_media_block (dd, block) + dd->start;
1779  }
1780  else
1781  {
1782    sc = RTEMS_INVALID_ID;
1783  }
1784
1785  return sc;
1786}
1787
1788rtems_status_code
1789rtems_bdbuf_get (rtems_disk_device   *dd,
1790                 rtems_blkdev_bnum    block,
1791                 rtems_bdbuf_buffer **bd_ptr)
1792{
1793  rtems_status_code   sc = RTEMS_SUCCESSFUL;
1794  rtems_bdbuf_buffer *bd = NULL;
1795  rtems_blkdev_bnum   media_block;
1796
1797  rtems_bdbuf_lock_cache ();
1798
1799  sc = rtems_bdbuf_get_media_block (dd, block, &media_block);
1800  if (sc == RTEMS_SUCCESSFUL)
1801  {
1802    /*
1803     * Print the block index relative to the physical disk.
1804     */
1805    if (rtems_bdbuf_tracer)
1806      printf ("bdbuf:get: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
1807              media_block, block, (unsigned) dd->dev);
1808
1809    bd = rtems_bdbuf_get_buffer_for_access (dd, media_block);
1810
1811    switch (bd->state)
1812    {
1813      case RTEMS_BDBUF_STATE_CACHED:
1814        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
1815        break;
1816      case RTEMS_BDBUF_STATE_EMPTY:
1817        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_EMPTY);
1818        break;
1819      case RTEMS_BDBUF_STATE_MODIFIED:
1820        /*
1821         * To get a modified buffer could be considered a bug in the caller
1822         * because you should not be getting an already modified buffer but
1823         * user may have modified a byte in a block then decided to seek the
1824         * start and write the whole block and the file system will have no
1825         * record of this so just gets the block to fill.
1826         */
1827        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
1828        break;
1829      default:
1830        rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_2);
1831        break;
1832    }
1833
1834    if (rtems_bdbuf_tracer)
1835    {
1836      rtems_bdbuf_show_users ("get", bd);
1837      rtems_bdbuf_show_usage ();
1838    }
1839  }
1840
1841  rtems_bdbuf_unlock_cache ();
1842
1843  *bd_ptr = bd;
1844
1845  return sc;
1846}
1847
1848/**
1849 * Call back handler called by the low level driver when the transfer has
1850 * completed. This function may be invoked from interrupt handler.
1851 *
1852 * @param arg Arbitrary argument specified in block device request
1853 *            structure (in this case - pointer to the appropriate
1854 *            block device request structure).
1855 * @param status I/O completion status
1856 */
1857static void
1858rtems_bdbuf_transfer_done (rtems_blkdev_request* req, rtems_status_code status)
1859{
1860  req->status = status;
1861
1862  rtems_event_transient_send (req->io_task);
1863}
1864
1865static rtems_status_code
1866rtems_bdbuf_execute_transfer_request (rtems_disk_device    *dd,
1867                                      rtems_blkdev_request *req,
1868                                      bool                  cache_locked)
1869{
1870  rtems_status_code sc = RTEMS_SUCCESSFUL;
1871  uint32_t transfer_index = 0;
1872  bool wake_transfer_waiters = false;
1873  bool wake_buffer_waiters = false;
1874
1875  if (cache_locked)
1876    rtems_bdbuf_unlock_cache ();
1877
1878  /* The return value will be ignored for transfer requests */
1879  dd->ioctl (dd->phys_dev, RTEMS_BLKIO_REQUEST, req);
1880
1881  /* Wait for transfer request completion */
1882  rtems_bdbuf_wait_for_transient_event ();
1883  sc = req->status;
1884
1885  rtems_bdbuf_lock_cache ();
1886
1887  /* Statistics */
1888  if (req->req == RTEMS_BLKDEV_REQ_READ)
1889  {
1890    dd->stats.read_blocks += req->bufnum;
1891    if (sc != RTEMS_SUCCESSFUL)
1892      ++dd->stats.read_errors;
1893  }
1894  else
1895  {
1896    dd->stats.write_blocks += req->bufnum;
1897    ++dd->stats.write_transfers;
1898    if (sc != RTEMS_SUCCESSFUL)
1899      ++dd->stats.write_errors;
1900  }
1901
1902  for (transfer_index = 0; transfer_index < req->bufnum; ++transfer_index)
1903  {
1904    rtems_bdbuf_buffer *bd = req->bufs [transfer_index].user;
1905    bool waiters = bd->waiters;
1906
1907    if (waiters)
1908      wake_transfer_waiters = true;
1909    else
1910      wake_buffer_waiters = true;
1911
1912    rtems_bdbuf_group_release (bd);
1913
1914    if (sc == RTEMS_SUCCESSFUL && bd->state == RTEMS_BDBUF_STATE_TRANSFER)
1915      rtems_bdbuf_make_cached_and_add_to_lru_list (bd);
1916    else
1917      rtems_bdbuf_discard_buffer (bd);
1918
1919    if (rtems_bdbuf_tracer)
1920      rtems_bdbuf_show_users ("transfer", bd);
1921  }
1922
1923  if (wake_transfer_waiters)
1924    rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
1925
1926  if (wake_buffer_waiters)
1927    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1928
1929  if (!cache_locked)
1930    rtems_bdbuf_unlock_cache ();
1931
1932  if (sc == RTEMS_SUCCESSFUL || sc == RTEMS_UNSATISFIED)
1933    return sc;
1934  else
1935    return RTEMS_IO_ERROR;
1936}
1937
1938static rtems_status_code
1939rtems_bdbuf_execute_read_request (rtems_disk_device  *dd,
1940                                  rtems_bdbuf_buffer *bd,
1941                                  uint32_t            transfer_count)
1942{
1943  rtems_blkdev_request *req = NULL;
1944  rtems_blkdev_bnum media_block = bd->block;
1945  uint32_t media_blocks_per_block = dd->media_blocks_per_block;
1946  uint32_t block_size = dd->block_size;
1947  uint32_t transfer_index = 1;
1948
1949  /*
1950   * TODO: This type of request structure is wrong and should be removed.
1951   */
1952#define bdbuf_alloc(size) __builtin_alloca (size)
1953
1954  req = bdbuf_alloc (rtems_bdbuf_read_request_size (transfer_count));
1955
1956  req->req = RTEMS_BLKDEV_REQ_READ;
1957  req->done = rtems_bdbuf_transfer_done;
1958  req->io_task = rtems_task_self ();
1959  req->bufnum = 0;
1960
1961  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
1962
1963  req->bufs [0].user   = bd;
1964  req->bufs [0].block  = media_block;
1965  req->bufs [0].length = block_size;
1966  req->bufs [0].buffer = bd->buffer;
1967
1968  if (rtems_bdbuf_tracer)
1969    rtems_bdbuf_show_users ("read", bd);
1970
1971  while (transfer_index < transfer_count)
1972  {
1973    media_block += media_blocks_per_block;
1974
1975    bd = rtems_bdbuf_get_buffer_for_read_ahead (dd, media_block);
1976
1977    if (bd == NULL)
1978      break;
1979
1980    rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
1981
1982    req->bufs [transfer_index].user   = bd;
1983    req->bufs [transfer_index].block  = media_block;
1984    req->bufs [transfer_index].length = block_size;
1985    req->bufs [transfer_index].buffer = bd->buffer;
1986
1987    if (rtems_bdbuf_tracer)
1988      rtems_bdbuf_show_users ("read", bd);
1989
1990    ++transfer_index;
1991  }
1992
1993  req->bufnum = transfer_index;
1994
1995  return rtems_bdbuf_execute_transfer_request (dd, req, true);
1996}
1997
1998static bool
1999rtems_bdbuf_is_read_ahead_active (const rtems_disk_device *dd)
2000{
2001  return !rtems_chain_is_node_off_chain (&dd->read_ahead.node);
2002}
2003
2004static void
2005rtems_bdbuf_read_ahead_cancel (rtems_disk_device *dd)
2006{
2007  if (rtems_bdbuf_is_read_ahead_active (dd))
2008  {
2009    rtems_chain_extract_unprotected (&dd->read_ahead.node);
2010    rtems_chain_set_off_chain (&dd->read_ahead.node);
2011  }
2012}
2013
2014static void
2015rtems_bdbuf_read_ahead_reset (rtems_disk_device *dd)
2016{
2017  rtems_bdbuf_read_ahead_cancel (dd);
2018  dd->read_ahead.trigger = RTEMS_DISK_READ_AHEAD_NO_TRIGGER;
2019}
2020
2021static void
2022rtems_bdbuf_check_read_ahead_trigger (rtems_disk_device *dd,
2023                                      rtems_blkdev_bnum  block)
2024{
2025  if (bdbuf_cache.read_ahead_task != 0
2026      && dd->read_ahead.trigger == block
2027      && !rtems_bdbuf_is_read_ahead_active (dd))
2028  {
2029    rtems_status_code sc;
2030    rtems_chain_control *chain = &bdbuf_cache.read_ahead_chain;
2031
2032    if (rtems_chain_is_empty (chain))
2033    {
2034      sc = rtems_event_send (bdbuf_cache.read_ahead_task,
2035                             RTEMS_BDBUF_READ_AHEAD_WAKE_UP);
2036      if (sc != RTEMS_SUCCESSFUL)
2037        rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_RA_WAKE_UP);
2038    }
2039
2040    rtems_chain_append_unprotected (chain, &dd->read_ahead.node);
2041  }
2042}
2043
2044static void
2045rtems_bdbuf_set_read_ahead_trigger (rtems_disk_device *dd,
2046                                    rtems_blkdev_bnum  block)
2047{
2048  if (dd->read_ahead.trigger != block)
2049  {
2050    rtems_bdbuf_read_ahead_cancel (dd);
2051    dd->read_ahead.trigger = block + 1;
2052    dd->read_ahead.next = block + 2;
2053  }
2054}
2055
2056rtems_status_code
2057rtems_bdbuf_read (rtems_disk_device   *dd,
2058                  rtems_blkdev_bnum    block,
2059                  rtems_bdbuf_buffer **bd_ptr)
2060{
2061  rtems_status_code     sc = RTEMS_SUCCESSFUL;
2062  rtems_bdbuf_buffer   *bd = NULL;
2063  rtems_blkdev_bnum     media_block;
2064
2065  rtems_bdbuf_lock_cache ();
2066
2067  sc = rtems_bdbuf_get_media_block (dd, block, &media_block);
2068  if (sc == RTEMS_SUCCESSFUL)
2069  {
2070    if (rtems_bdbuf_tracer)
2071      printf ("bdbuf:read: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
2072              media_block, block, (unsigned) dd->dev);
2073
2074    bd = rtems_bdbuf_get_buffer_for_access (dd, media_block);
2075    switch (bd->state)
2076    {
2077      case RTEMS_BDBUF_STATE_CACHED:
2078        ++dd->stats.read_hits;
2079        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
2080        break;
2081      case RTEMS_BDBUF_STATE_MODIFIED:
2082        ++dd->stats.read_hits;
2083        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
2084        break;
2085      case RTEMS_BDBUF_STATE_EMPTY:
2086        ++dd->stats.read_misses;
2087        rtems_bdbuf_set_read_ahead_trigger (dd, block);
2088        sc = rtems_bdbuf_execute_read_request (dd, bd, 1);
2089        if (sc == RTEMS_SUCCESSFUL)
2090        {
2091          rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
2092          rtems_chain_extract_unprotected (&bd->link);
2093          rtems_bdbuf_group_obtain (bd);
2094        }
2095        else
2096        {
2097          bd = NULL;
2098        }
2099        break;
2100      default:
2101        rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_4);
2102        break;
2103    }
2104
2105    rtems_bdbuf_check_read_ahead_trigger (dd, block);
2106  }
2107
2108  rtems_bdbuf_unlock_cache ();
2109
2110  *bd_ptr = bd;
2111
2112  return sc;
2113}
2114
2115static rtems_status_code
2116rtems_bdbuf_check_bd_and_lock_cache (rtems_bdbuf_buffer *bd, const char *kind)
2117{
2118  if (bd == NULL)
2119    return RTEMS_INVALID_ADDRESS;
2120  if (rtems_bdbuf_tracer)
2121  {
2122    printf ("bdbuf:%s: %" PRIu32 "\n", kind, bd->block);
2123    rtems_bdbuf_show_users (kind, bd);
2124  }
2125  rtems_bdbuf_lock_cache();
2126
2127  return RTEMS_SUCCESSFUL;
2128}
2129
2130rtems_status_code
2131rtems_bdbuf_release (rtems_bdbuf_buffer *bd)
2132{
2133  rtems_status_code sc = RTEMS_SUCCESSFUL;
2134
2135  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release");
2136  if (sc != RTEMS_SUCCESSFUL)
2137    return sc;
2138
2139  switch (bd->state)
2140  {
2141    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2142      rtems_bdbuf_add_to_lru_list_after_access (bd);
2143      break;
2144    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2145    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2146      rtems_bdbuf_discard_buffer_after_access (bd);
2147      break;
2148    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2149      rtems_bdbuf_add_to_modified_list_after_access (bd);
2150      break;
2151    default:
2152      rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_0);
2153      break;
2154  }
2155
2156  if (rtems_bdbuf_tracer)
2157    rtems_bdbuf_show_usage ();
2158
2159  rtems_bdbuf_unlock_cache ();
2160
2161  return RTEMS_SUCCESSFUL;
2162}
2163
2164rtems_status_code
2165rtems_bdbuf_release_modified (rtems_bdbuf_buffer *bd)
2166{
2167  rtems_status_code sc = RTEMS_SUCCESSFUL;
2168
2169  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release modified");
2170  if (sc != RTEMS_SUCCESSFUL)
2171    return sc;
2172
2173  switch (bd->state)
2174  {
2175    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2176    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2177    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2178      rtems_bdbuf_add_to_modified_list_after_access (bd);
2179      break;
2180    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2181      rtems_bdbuf_discard_buffer_after_access (bd);
2182      break;
2183    default:
2184      rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_6);
2185      break;
2186  }
2187
2188  if (rtems_bdbuf_tracer)
2189    rtems_bdbuf_show_usage ();
2190
2191  rtems_bdbuf_unlock_cache ();
2192
2193  return RTEMS_SUCCESSFUL;
2194}
2195
2196rtems_status_code
2197rtems_bdbuf_sync (rtems_bdbuf_buffer *bd)
2198{
2199  rtems_status_code sc = RTEMS_SUCCESSFUL;
2200
2201  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "sync");
2202  if (sc != RTEMS_SUCCESSFUL)
2203    return sc;
2204
2205  switch (bd->state)
2206  {
2207    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2208    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2209    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2210      rtems_bdbuf_sync_after_access (bd);
2211      break;
2212    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2213      rtems_bdbuf_discard_buffer_after_access (bd);
2214      break;
2215    default:
2216      rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_5);
2217      break;
2218  }
2219
2220  if (rtems_bdbuf_tracer)
2221    rtems_bdbuf_show_usage ();
2222
2223  rtems_bdbuf_unlock_cache ();
2224
2225  return RTEMS_SUCCESSFUL;
2226}
2227
2228rtems_status_code
2229rtems_bdbuf_syncdev (rtems_disk_device *dd)
2230{
2231  if (rtems_bdbuf_tracer)
2232    printf ("bdbuf:syncdev: %08x\n", (unsigned) dd->dev);
2233
2234  /*
2235   * Take the sync lock before locking the cache. Once we have the sync lock we
2236   * can lock the cache. If another thread has the sync lock it will cause this
2237   * thread to block until it owns the sync lock then it can own the cache. The
2238   * sync lock can only be obtained with the cache unlocked.
2239   */
2240  rtems_bdbuf_lock_sync ();
2241  rtems_bdbuf_lock_cache ();
2242
2243  /*
2244   * Set the cache to have a sync active for a specific device and let the swap
2245   * out task know the id of the requester to wake when done.
2246   *
2247   * The swap out task will negate the sync active flag when no more buffers
2248   * for the device are held on the "modified for sync" queues.
2249   */
2250  bdbuf_cache.sync_active    = true;
2251  bdbuf_cache.sync_requester = rtems_task_self ();
2252  bdbuf_cache.sync_device    = dd;
2253
2254  rtems_bdbuf_wake_swapper ();
2255  rtems_bdbuf_unlock_cache ();
2256  rtems_bdbuf_wait_for_transient_event ();
2257  rtems_bdbuf_unlock_sync ();
2258
2259  return RTEMS_SUCCESSFUL;
2260}
2261
2262/**
2263 * Swapout transfer to the driver. The driver will break this I/O into groups
2264 * of consecutive write requests is multiple consecutive buffers are required
2265 * by the driver. The cache is not locked.
2266 *
2267 * @param transfer The transfer transaction.
2268 */
2269static void
2270rtems_bdbuf_swapout_write (rtems_bdbuf_swapout_transfer* transfer)
2271{
2272  rtems_chain_node *node;
2273
2274  if (rtems_bdbuf_tracer)
2275    printf ("bdbuf:swapout transfer: %08x\n", (unsigned) transfer->dd->dev);
2276
2277  /*
2278   * If there are buffers to transfer to the media transfer them.
2279   */
2280  if (!rtems_chain_is_empty (&transfer->bds))
2281  {
2282    /*
2283     * The last block number used when the driver only supports
2284     * continuous blocks in a single request.
2285     */
2286    uint32_t last_block = 0;
2287
2288    rtems_disk_device *dd = transfer->dd;
2289    uint32_t media_blocks_per_block = dd->media_blocks_per_block;
2290    bool need_continuous_blocks =
2291      (dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_MULTISECTOR_CONT) != 0;
2292
2293    /*
2294     * Take as many buffers as configured and pass to the driver. Note, the
2295     * API to the drivers has an array of buffers and if a chain was passed
2296     * we could have just passed the list. If the driver API is updated it
2297     * should be possible to make this change with little effect in this
2298     * code. The array that is passed is broken in design and should be
2299     * removed. Merging members of a struct into the first member is
2300     * trouble waiting to happen.
2301     */
2302    transfer->write_req.status = RTEMS_RESOURCE_IN_USE;
2303    transfer->write_req.bufnum = 0;
2304
2305    while ((node = rtems_chain_get_unprotected(&transfer->bds)) != NULL)
2306    {
2307      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2308      bool                write = false;
2309
2310      /*
2311       * If the device only accepts sequential buffers and this is not the
2312       * first buffer (the first is always sequential, and the buffer is not
2313       * sequential then put the buffer back on the transfer chain and write
2314       * the committed buffers.
2315       */
2316
2317      if (rtems_bdbuf_tracer)
2318        printf ("bdbuf:swapout write: bd:%" PRIu32 ", bufnum:%" PRIu32 " mode:%s\n",
2319                bd->block, transfer->write_req.bufnum,
2320                need_continuous_blocks ? "MULTI" : "SCAT");
2321
2322      if (need_continuous_blocks && transfer->write_req.bufnum &&
2323          bd->block != last_block + media_blocks_per_block)
2324      {
2325        rtems_chain_prepend_unprotected (&transfer->bds, &bd->link);
2326        write = true;
2327      }
2328      else
2329      {
2330        rtems_blkdev_sg_buffer* buf;
2331        buf = &transfer->write_req.bufs[transfer->write_req.bufnum];
2332        transfer->write_req.bufnum++;
2333        buf->user   = bd;
2334        buf->block  = bd->block;
2335        buf->length = dd->block_size;
2336        buf->buffer = bd->buffer;
2337        last_block  = bd->block;
2338      }
2339
2340      /*
2341       * Perform the transfer if there are no more buffers, or the transfer
2342       * size has reached the configured max. value.
2343       */
2344
2345      if (rtems_chain_is_empty (&transfer->bds) ||
2346          (transfer->write_req.bufnum >= bdbuf_config.max_write_blocks))
2347        write = true;
2348
2349      if (write)
2350      {
2351        rtems_bdbuf_execute_transfer_request (dd, &transfer->write_req, false);
2352
2353        transfer->write_req.status = RTEMS_RESOURCE_IN_USE;
2354        transfer->write_req.bufnum = 0;
2355      }
2356    }
2357
2358    /*
2359     * If sync'ing and the deivce is capability of handling a sync IO control
2360     * call perform the call.
2361     */
2362    if (transfer->syncing &&
2363        (dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_SYNC))
2364    {
2365      /* int result = */ dd->ioctl (dd->phys_dev, RTEMS_BLKDEV_REQ_SYNC, NULL);
2366      /* How should the error be handled ? */
2367    }
2368  }
2369}
2370
2371/**
2372 * Process the modified list of buffers. There is a sync or modified list that
2373 * needs to be handled so we have a common function to do the work.
2374 *
2375 * @param dd_ptr Pointer to the device to handle. If BDBUF_INVALID_DEV no
2376 * device is selected so select the device of the first buffer to be written to
2377 * disk.
2378 * @param chain The modified chain to process.
2379 * @param transfer The chain to append buffers to be written too.
2380 * @param sync_active If true this is a sync operation so expire all timers.
2381 * @param update_timers If true update the timers.
2382 * @param timer_delta It update_timers is true update the timers by this
2383 *                    amount.
2384 */
2385static void
2386rtems_bdbuf_swapout_modified_processing (rtems_disk_device  **dd_ptr,
2387                                         rtems_chain_control* chain,
2388                                         rtems_chain_control* transfer,
2389                                         bool                 sync_active,
2390                                         bool                 update_timers,
2391                                         uint32_t             timer_delta)
2392{
2393  if (!rtems_chain_is_empty (chain))
2394  {
2395    rtems_chain_node* node = rtems_chain_head (chain);
2396    bool              sync_all;
2397
2398    node = node->next;
2399
2400    /*
2401     * A sync active with no valid dev means sync all.
2402     */
2403    if (sync_active && (*dd_ptr == BDBUF_INVALID_DEV))
2404      sync_all = true;
2405    else
2406      sync_all = false;
2407
2408    while (!rtems_chain_is_tail (chain, node))
2409    {
2410      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2411
2412      /*
2413       * Check if the buffer's hold timer has reached 0. If a sync is active
2414       * or someone waits for a buffer written force all the timers to 0.
2415       *
2416       * @note Lots of sync requests will skew this timer. It should be based
2417       *       on TOD to be accurate. Does it matter ?
2418       */
2419      if (sync_all || (sync_active && (*dd_ptr == bd->dd))
2420          || rtems_bdbuf_has_buffer_waiters ())
2421        bd->hold_timer = 0;
2422
2423      if (bd->hold_timer)
2424      {
2425        if (update_timers)
2426        {
2427          if (bd->hold_timer > timer_delta)
2428            bd->hold_timer -= timer_delta;
2429          else
2430            bd->hold_timer = 0;
2431        }
2432
2433        if (bd->hold_timer)
2434        {
2435          node = node->next;
2436          continue;
2437        }
2438      }
2439
2440      /*
2441       * This assumes we can set it to BDBUF_INVALID_DEV which is just an
2442       * assumption. Cannot use the transfer list being empty the sync dev
2443       * calls sets the dev to use.
2444       */
2445      if (*dd_ptr == BDBUF_INVALID_DEV)
2446        *dd_ptr = bd->dd;
2447
2448      if (bd->dd == *dd_ptr)
2449      {
2450        rtems_chain_node* next_node = node->next;
2451        rtems_chain_node* tnode = rtems_chain_tail (transfer);
2452
2453        /*
2454         * The blocks on the transfer list are sorted in block order. This
2455         * means multi-block transfers for drivers that require consecutive
2456         * blocks perform better with sorted blocks and for real disks it may
2457         * help lower head movement.
2458         */
2459
2460        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
2461
2462        rtems_chain_extract_unprotected (node);
2463
2464        tnode = tnode->previous;
2465
2466        while (node && !rtems_chain_is_head (transfer, tnode))
2467        {
2468          rtems_bdbuf_buffer* tbd = (rtems_bdbuf_buffer*) tnode;
2469
2470          if (bd->block > tbd->block)
2471          {
2472            rtems_chain_insert_unprotected (tnode, node);
2473            node = NULL;
2474          }
2475          else
2476            tnode = tnode->previous;
2477        }
2478
2479        if (node)
2480          rtems_chain_prepend_unprotected (transfer, node);
2481
2482        node = next_node;
2483      }
2484      else
2485      {
2486        node = node->next;
2487      }
2488    }
2489  }
2490}
2491
2492/**
2493 * Process the cache's modified buffers. Check the sync list first then the
2494 * modified list extracting the buffers suitable to be written to disk. We have
2495 * a device at a time. The task level loop will repeat this operation while
2496 * there are buffers to be written. If the transfer fails place the buffers
2497 * back on the modified list and try again later. The cache is unlocked while
2498 * the buffers are being written to disk.
2499 *
2500 * @param timer_delta It update_timers is true update the timers by this
2501 *                    amount.
2502 * @param update_timers If true update the timers.
2503 * @param transfer The transfer transaction data.
2504 *
2505 * @retval true Buffers where written to disk so scan again.
2506 * @retval false No buffers where written to disk.
2507 */
2508static bool
2509rtems_bdbuf_swapout_processing (unsigned long                 timer_delta,
2510                                bool                          update_timers,
2511                                rtems_bdbuf_swapout_transfer* transfer)
2512{
2513  rtems_bdbuf_swapout_worker* worker;
2514  bool                        transfered_buffers = false;
2515  bool                        sync_active;
2516
2517  rtems_bdbuf_lock_cache ();
2518
2519  /*
2520   * To set this to true you need the cache and the sync lock.
2521   */
2522  sync_active = bdbuf_cache.sync_active;
2523
2524  /*
2525   * If a sync is active do not use a worker because the current code does not
2526   * cleaning up after. We need to know the buffers have been written when
2527   * syncing to release sync lock and currently worker threads do not return to
2528   * here. We do not know the worker is the last in a sequence of sync writes
2529   * until after we have it running so we do not know to tell it to release the
2530   * lock. The simplest solution is to get the main swap out task perform all
2531   * sync operations.
2532   */
2533  if (sync_active)
2534    worker = NULL;
2535  else
2536  {
2537    worker = (rtems_bdbuf_swapout_worker*)
2538      rtems_chain_get_unprotected (&bdbuf_cache.swapout_free_workers);
2539    if (worker)
2540      transfer = &worker->transfer;
2541  }
2542
2543  rtems_chain_initialize_empty (&transfer->bds);
2544  transfer->dd = BDBUF_INVALID_DEV;
2545  transfer->syncing = sync_active;
2546
2547  /*
2548   * When the sync is for a device limit the sync to that device. If the sync
2549   * is for a buffer handle process the devices in the order on the sync
2550   * list. This means the dev is BDBUF_INVALID_DEV.
2551   */
2552  if (sync_active)
2553    transfer->dd = bdbuf_cache.sync_device;
2554
2555  /*
2556   * If we have any buffers in the sync queue move them to the modified
2557   * list. The first sync buffer will select the device we use.
2558   */
2559  rtems_bdbuf_swapout_modified_processing (&transfer->dd,
2560                                           &bdbuf_cache.sync,
2561                                           &transfer->bds,
2562                                           true, false,
2563                                           timer_delta);
2564
2565  /*
2566   * Process the cache's modified list.
2567   */
2568  rtems_bdbuf_swapout_modified_processing (&transfer->dd,
2569                                           &bdbuf_cache.modified,
2570                                           &transfer->bds,
2571                                           sync_active,
2572                                           update_timers,
2573                                           timer_delta);
2574
2575  /*
2576   * We have all the buffers that have been modified for this device so the
2577   * cache can be unlocked because the state of each buffer has been set to
2578   * TRANSFER.
2579   */
2580  rtems_bdbuf_unlock_cache ();
2581
2582  /*
2583   * If there are buffers to transfer to the media transfer them.
2584   */
2585  if (!rtems_chain_is_empty (&transfer->bds))
2586  {
2587    if (worker)
2588    {
2589      rtems_status_code sc = rtems_event_send (worker->id,
2590                                               RTEMS_BDBUF_SWAPOUT_SYNC);
2591      if (sc != RTEMS_SUCCESSFUL)
2592        rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_SO_WAKE_2);
2593    }
2594    else
2595    {
2596      rtems_bdbuf_swapout_write (transfer);
2597    }
2598
2599    transfered_buffers = true;
2600  }
2601
2602  if (sync_active && !transfered_buffers)
2603  {
2604    rtems_id sync_requester;
2605    rtems_bdbuf_lock_cache ();
2606    sync_requester = bdbuf_cache.sync_requester;
2607    bdbuf_cache.sync_active = false;
2608    bdbuf_cache.sync_requester = 0;
2609    rtems_bdbuf_unlock_cache ();
2610    if (sync_requester)
2611      rtems_event_transient_send (sync_requester);
2612  }
2613
2614  return transfered_buffers;
2615}
2616
2617/**
2618 * The swapout worker thread body.
2619 *
2620 * @param arg A pointer to the worker thread's private data.
2621 * @return rtems_task Not used.
2622 */
2623static rtems_task
2624rtems_bdbuf_swapout_worker_task (rtems_task_argument arg)
2625{
2626  rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) arg;
2627
2628  while (worker->enabled)
2629  {
2630    rtems_bdbuf_wait_for_event (RTEMS_BDBUF_SWAPOUT_SYNC);
2631
2632    rtems_bdbuf_swapout_write (&worker->transfer);
2633
2634    rtems_bdbuf_lock_cache ();
2635
2636    rtems_chain_initialize_empty (&worker->transfer.bds);
2637    worker->transfer.dd = BDBUF_INVALID_DEV;
2638
2639    rtems_chain_append_unprotected (&bdbuf_cache.swapout_free_workers, &worker->link);
2640
2641    rtems_bdbuf_unlock_cache ();
2642  }
2643
2644  free (worker);
2645
2646  rtems_task_exit();
2647}
2648
2649/**
2650 * Close the swapout worker threads.
2651 */
2652static void
2653rtems_bdbuf_swapout_workers_close (void)
2654{
2655  rtems_chain_node* node;
2656
2657  rtems_bdbuf_lock_cache ();
2658
2659  node = rtems_chain_first (&bdbuf_cache.swapout_free_workers);
2660  while (!rtems_chain_is_tail (&bdbuf_cache.swapout_free_workers, node))
2661  {
2662    rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) node;
2663    worker->enabled = false;
2664    rtems_event_send (worker->id, RTEMS_BDBUF_SWAPOUT_SYNC);
2665    node = rtems_chain_next (node);
2666  }
2667
2668  rtems_bdbuf_unlock_cache ();
2669}
2670
2671/**
2672 * Body of task which takes care on flushing modified buffers to the disk.
2673 *
2674 * @param arg A pointer to the global cache data. Use the global variable and
2675 *            not this.
2676 * @return rtems_task Not used.
2677 */
2678static rtems_task
2679rtems_bdbuf_swapout_task (rtems_task_argument arg)
2680{
2681  rtems_bdbuf_swapout_transfer* transfer = (rtems_bdbuf_swapout_transfer *) arg;
2682  uint32_t                      period_in_ticks;
2683  const uint32_t                period_in_msecs = bdbuf_config.swapout_period;
2684  uint32_t                      timer_delta;
2685
2686  /*
2687   * Localise the period.
2688   */
2689  period_in_ticks = RTEMS_MICROSECONDS_TO_TICKS (period_in_msecs * 1000);
2690
2691  /*
2692   * This is temporary. Needs to be changed to use the real time clock.
2693   */
2694  timer_delta = period_in_msecs;
2695
2696  while (bdbuf_cache.swapout_enabled)
2697  {
2698    rtems_event_set   out;
2699    rtems_status_code sc;
2700
2701    /*
2702     * Only update the timers once in the processing cycle.
2703     */
2704    bool update_timers = true;
2705
2706    /*
2707     * If we write buffers to any disk perform a check again. We only write a
2708     * single device at a time and the cache may have more than one device's
2709     * buffers modified waiting to be written.
2710     */
2711    bool transfered_buffers;
2712
2713    do
2714    {
2715      transfered_buffers = false;
2716
2717      /*
2718       * Extact all the buffers we find for a specific device. The device is
2719       * the first one we find on a modified list. Process the sync queue of
2720       * buffers first.
2721       */
2722      if (rtems_bdbuf_swapout_processing (timer_delta,
2723                                          update_timers,
2724                                          transfer))
2725      {
2726        transfered_buffers = true;
2727      }
2728
2729      /*
2730       * Only update the timers once.
2731       */
2732      update_timers = false;
2733    }
2734    while (transfered_buffers);
2735
2736    sc = rtems_event_receive (RTEMS_BDBUF_SWAPOUT_SYNC,
2737                              RTEMS_EVENT_ALL | RTEMS_WAIT,
2738                              period_in_ticks,
2739                              &out);
2740
2741    if ((sc != RTEMS_SUCCESSFUL) && (sc != RTEMS_TIMEOUT))
2742      rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_SWAPOUT_RE);
2743  }
2744
2745  rtems_bdbuf_swapout_workers_close ();
2746
2747  free (transfer);
2748
2749  rtems_task_exit();
2750}
2751
2752static void
2753rtems_bdbuf_purge_list (rtems_chain_control *purge_list)
2754{
2755  bool wake_buffer_waiters = false;
2756  rtems_chain_node *node = NULL;
2757
2758  while ((node = rtems_chain_get_unprotected (purge_list)) != NULL)
2759  {
2760    rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
2761
2762    if (bd->waiters == 0)
2763      wake_buffer_waiters = true;
2764
2765    rtems_bdbuf_discard_buffer (bd);
2766  }
2767
2768  if (wake_buffer_waiters)
2769    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
2770}
2771
2772static void
2773rtems_bdbuf_gather_for_purge (rtems_chain_control *purge_list,
2774                              const rtems_disk_device *dd)
2775{
2776  rtems_bdbuf_buffer *stack [RTEMS_BDBUF_AVL_MAX_HEIGHT];
2777  rtems_bdbuf_buffer **prev = stack;
2778  rtems_bdbuf_buffer *cur = bdbuf_cache.tree;
2779
2780  *prev = NULL;
2781
2782  while (cur != NULL)
2783  {
2784    if (cur->dd == dd)
2785    {
2786      switch (cur->state)
2787      {
2788        case RTEMS_BDBUF_STATE_FREE:
2789        case RTEMS_BDBUF_STATE_EMPTY:
2790        case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2791        case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
2792          break;
2793        case RTEMS_BDBUF_STATE_SYNC:
2794          rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
2795          /* Fall through */
2796        case RTEMS_BDBUF_STATE_MODIFIED:
2797          rtems_bdbuf_group_release (cur);
2798          /* Fall through */
2799        case RTEMS_BDBUF_STATE_CACHED:
2800          rtems_chain_extract_unprotected (&cur->link);
2801          rtems_chain_append_unprotected (purge_list, &cur->link);
2802          break;
2803        case RTEMS_BDBUF_STATE_TRANSFER:
2804          rtems_bdbuf_set_state (cur, RTEMS_BDBUF_STATE_TRANSFER_PURGED);
2805          break;
2806        case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2807        case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2808        case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2809          rtems_bdbuf_set_state (cur, RTEMS_BDBUF_STATE_ACCESS_PURGED);
2810          break;
2811        default:
2812          rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_STATE_11);
2813      }
2814    }
2815
2816    if (cur->avl.left != NULL)
2817    {
2818      /* Left */
2819      ++prev;
2820      *prev = cur;
2821      cur = cur->avl.left;
2822    }
2823    else if (cur->avl.right != NULL)
2824    {
2825      /* Right */
2826      ++prev;
2827      *prev = cur;
2828      cur = cur->avl.right;
2829    }
2830    else
2831    {
2832      while (*prev != NULL
2833             && (cur == (*prev)->avl.right || (*prev)->avl.right == NULL))
2834      {
2835        /* Up */
2836        cur = *prev;
2837        --prev;
2838      }
2839      if (*prev != NULL)
2840        /* Right */
2841        cur = (*prev)->avl.right;
2842      else
2843        /* Finished */
2844        cur = NULL;
2845    }
2846  }
2847}
2848
2849static void
2850rtems_bdbuf_do_purge_dev (rtems_disk_device *dd)
2851{
2852  rtems_chain_control purge_list;
2853
2854  rtems_chain_initialize_empty (&purge_list);
2855  rtems_bdbuf_read_ahead_reset (dd);
2856  rtems_bdbuf_gather_for_purge (&purge_list, dd);
2857  rtems_bdbuf_purge_list (&purge_list);
2858}
2859
2860void
2861rtems_bdbuf_purge_dev (rtems_disk_device *dd)
2862{
2863  rtems_bdbuf_lock_cache ();
2864  rtems_bdbuf_do_purge_dev (dd);
2865  rtems_bdbuf_unlock_cache ();
2866}
2867
2868rtems_status_code
2869rtems_bdbuf_set_block_size (rtems_disk_device *dd,
2870                            uint32_t           block_size,
2871                            bool               sync)
2872{
2873  rtems_status_code sc = RTEMS_SUCCESSFUL;
2874
2875  /*
2876   * We do not care about the synchronization status since we will purge the
2877   * device later.
2878   */
2879  if (sync)
2880    rtems_bdbuf_syncdev (dd);
2881
2882  rtems_bdbuf_lock_cache ();
2883
2884  if (block_size > 0)
2885  {
2886    size_t bds_per_group = rtems_bdbuf_bds_per_group (block_size);
2887
2888    if (bds_per_group != 0)
2889    {
2890      int block_to_media_block_shift = 0;
2891      uint32_t media_blocks_per_block = block_size / dd->media_block_size;
2892      uint32_t one = 1;
2893
2894      while ((one << block_to_media_block_shift) < media_blocks_per_block)
2895      {
2896        ++block_to_media_block_shift;
2897      }
2898
2899      if ((dd->media_block_size << block_to_media_block_shift) != block_size)
2900        block_to_media_block_shift = -1;
2901
2902      dd->block_size = block_size;
2903      dd->block_count = dd->size / media_blocks_per_block;
2904      dd->media_blocks_per_block = media_blocks_per_block;
2905      dd->block_to_media_block_shift = block_to_media_block_shift;
2906      dd->bds_per_group = bds_per_group;
2907
2908      rtems_bdbuf_do_purge_dev (dd);
2909    }
2910    else
2911    {
2912      sc = RTEMS_INVALID_NUMBER;
2913    }
2914  }
2915  else
2916  {
2917    sc = RTEMS_INVALID_NUMBER;
2918  }
2919
2920  rtems_bdbuf_unlock_cache ();
2921
2922  return sc;
2923}
2924
2925static rtems_task
2926rtems_bdbuf_read_ahead_task (rtems_task_argument arg)
2927{
2928  rtems_chain_control *chain = &bdbuf_cache.read_ahead_chain;
2929
2930  while (bdbuf_cache.read_ahead_enabled)
2931  {
2932    rtems_chain_node *node;
2933
2934    rtems_bdbuf_wait_for_event (RTEMS_BDBUF_READ_AHEAD_WAKE_UP);
2935    rtems_bdbuf_lock_cache ();
2936
2937    while ((node = rtems_chain_get_unprotected (chain)) != NULL)
2938    {
2939      rtems_disk_device *dd =
2940        RTEMS_CONTAINER_OF (node, rtems_disk_device, read_ahead.node);
2941      rtems_blkdev_bnum block = dd->read_ahead.next;
2942      rtems_blkdev_bnum media_block = 0;
2943      rtems_status_code sc =
2944        rtems_bdbuf_get_media_block (dd, block, &media_block);
2945
2946      rtems_chain_set_off_chain (&dd->read_ahead.node);
2947
2948      if (sc == RTEMS_SUCCESSFUL)
2949      {
2950        rtems_bdbuf_buffer *bd =
2951          rtems_bdbuf_get_buffer_for_read_ahead (dd, media_block);
2952
2953        if (bd != NULL)
2954        {
2955          uint32_t transfer_count = dd->block_count - block;
2956          uint32_t max_transfer_count = bdbuf_config.max_read_ahead_blocks;
2957
2958          if (transfer_count >= max_transfer_count)
2959          {
2960            transfer_count = max_transfer_count;
2961            dd->read_ahead.trigger = block + transfer_count / 2;
2962            dd->read_ahead.next = block + transfer_count;
2963          }
2964          else
2965          {
2966            dd->read_ahead.trigger = RTEMS_DISK_READ_AHEAD_NO_TRIGGER;
2967          }
2968
2969          ++dd->stats.read_ahead_transfers;
2970          rtems_bdbuf_execute_read_request (dd, bd, transfer_count);
2971        }
2972      }
2973      else
2974      {
2975        dd->read_ahead.trigger = RTEMS_DISK_READ_AHEAD_NO_TRIGGER;
2976      }
2977    }
2978
2979    rtems_bdbuf_unlock_cache ();
2980  }
2981
2982  rtems_task_exit();
2983}
2984
2985void rtems_bdbuf_get_device_stats (const rtems_disk_device *dd,
2986                                   rtems_blkdev_stats      *stats)
2987{
2988  rtems_bdbuf_lock_cache ();
2989  *stats = dd->stats;
2990  rtems_bdbuf_unlock_cache ();
2991}
2992
2993void rtems_bdbuf_reset_device_stats (rtems_disk_device *dd)
2994{
2995  rtems_bdbuf_lock_cache ();
2996  memset (&dd->stats, 0, sizeof(dd->stats));
2997  rtems_bdbuf_unlock_cache ();
2998}
Note: See TracBrowser for help on using the repository browser.