source: rtems/cpukit/libblock/src/bdbuf.c

Last change on this file was b03aba30, checked in by Kinsey Moore <kinsey.moore@…>, on 01/24/24 at 17:57:36

cpukit/libblock: Ignore sync status prior to purge

  • Property mode set to 100644
File size: 80.2 KB
Line 
1/**
2 * @file
3 *
4 * @ingroup rtems_bdbuf
5 *
6 * Block device buffer management.
7 */
8
9/*
10 * Disk I/O buffering
11 * Buffer managment
12 *
13 * Copyright (C) 2001 OKTET Ltd., St.-Peterburg, Russia
14 * Author: Andrey G. Ivanov <Andrey.Ivanov@oktet.ru>
15 *         Victor V. Vengerov <vvv@oktet.ru>
16 *         Alexander Kukuta <kam@oktet.ru>
17 *
18 * Copyright (C) 2008,2009 Chris Johns <chrisj@rtems.org>
19 *    Rewritten to remove score mutex access. Fixes many performance
20 *    issues.
21 *
22 * Copyright (C) 2009, 2017 embedded brains GmbH & Co. KG
23 */
24
25/**
26 * Set to 1 to enable debug tracing.
27 */
28#define RTEMS_BDBUF_TRACE 0
29
30#ifdef HAVE_CONFIG_H
31#include "config.h"
32#endif
33#include <limits.h>
34#include <errno.h>
35#include <stdio.h>
36#include <stdlib.h>
37#include <string.h>
38#include <inttypes.h>
39#include <pthread.h>
40
41#include <rtems.h>
42#include <rtems/thread.h>
43#include <rtems/score/assert.h>
44
45#include "rtems/bdbuf.h"
46
47#define BDBUF_INVALID_DEV NULL
48
49/*
50 * Simpler label for this file.
51 */
52#define bdbuf_config rtems_bdbuf_configuration
53
54/**
55 * A swapout transfer transaction data. This data is passed to a worked thread
56 * to handle the write phase of the transfer.
57 */
58typedef struct rtems_bdbuf_swapout_transfer
59{
60  rtems_chain_control   bds;         /**< The transfer list of BDs. */
61  rtems_disk_device    *dd;          /**< The device the transfer is for. */
62  bool                  syncing;     /**< The data is a sync'ing. */
63  rtems_blkdev_request  write_req;   /**< The write request. */
64} rtems_bdbuf_swapout_transfer;
65
66/**
67 * Swapout worker thread. These are available to take processing from the
68 * main swapout thread and handle the I/O operation.
69 */
70typedef struct rtems_bdbuf_swapout_worker
71{
72  rtems_chain_node             link;     /**< The threads sit on a chain when
73                                          * idle. */
74  rtems_id                     id;       /**< The id of the task so we can wake
75                                          * it. */
76  bool                         enabled;  /**< The worker is enabled. */
77  rtems_bdbuf_swapout_transfer transfer; /**< The transfer data for this
78                                          * thread. */
79} rtems_bdbuf_swapout_worker;
80
81/**
82 * Buffer waiters synchronization.
83 */
84typedef struct rtems_bdbuf_waiters {
85  unsigned                 count;
86  rtems_condition_variable cond_var;
87} rtems_bdbuf_waiters;
88
89/**
90 * The BD buffer cache.
91 */
92typedef struct rtems_bdbuf_cache
93{
94  rtems_id            swapout;           /**< Swapout task ID */
95  bool                swapout_enabled;   /**< Swapout is only running if
96                                          * enabled. Set to false to kill the
97                                          * swap out task. It deletes itself. */
98  rtems_chain_control swapout_free_workers; /**< The work threads for the swapout
99                                             * task. */
100
101  rtems_bdbuf_buffer* bds;               /**< Pointer to table of buffer
102                                          * descriptors. */
103  void*               buffers;           /**< The buffer's memory. */
104  size_t              buffer_min_count;  /**< Number of minimum size buffers
105                                          * that fit the buffer memory. */
106  size_t              max_bds_per_group; /**< The number of BDs of minimum
107                                          * buffer size that fit in a group. */
108  uint32_t            flags;             /**< Configuration flags. */
109
110  rtems_mutex         lock;              /**< The cache lock. It locks all
111                                          * cache data, BD and lists. */
112  rtems_mutex         sync_lock;         /**< Sync calls block writes. */
113  bool                sync_active;       /**< True if a sync is active. */
114  rtems_id            sync_requester;    /**< The sync requester. */
115  rtems_disk_device  *sync_device;       /**< The device to sync and
116                                          * BDBUF_INVALID_DEV not a device
117                                          * sync. */
118
119  rtems_bdbuf_buffer* tree;              /**< Buffer descriptor lookup AVL tree
120                                          * root. There is only one. */
121  rtems_chain_control lru;               /**< Least recently used list */
122  rtems_chain_control modified;          /**< Modified buffers list */
123  rtems_chain_control sync;              /**< Buffers to sync list */
124
125  rtems_bdbuf_waiters access_waiters;    /**< Wait for a buffer in
126                                          * ACCESS_CACHED, ACCESS_MODIFIED or
127                                          * ACCESS_EMPTY
128                                          * state. */
129  rtems_bdbuf_waiters transfer_waiters;  /**< Wait for a buffer in TRANSFER
130                                          * state. */
131  rtems_bdbuf_waiters buffer_waiters;    /**< Wait for a buffer and no one is
132                                          * available. */
133
134  rtems_bdbuf_swapout_transfer *swapout_transfer;
135  rtems_bdbuf_swapout_worker *swapout_workers;
136
137  size_t              group_count;       /**< The number of groups. */
138  rtems_bdbuf_group*  groups;            /**< The groups. */
139  rtems_id            read_ahead_task;   /**< Read-ahead task */
140  rtems_chain_control read_ahead_chain;  /**< Read-ahead request chain */
141  bool                read_ahead_enabled; /**< Read-ahead enabled */
142  rtems_status_code   init_status;       /**< The initialization status */
143  pthread_once_t      once;
144} rtems_bdbuf_cache;
145
146typedef enum {
147  RTEMS_BDBUF_FATAL_CACHE_WAIT_2,
148  RTEMS_BDBUF_FATAL_CACHE_WAIT_TO,
149  RTEMS_BDBUF_FATAL_CACHE_WAKE,
150  RTEMS_BDBUF_FATAL_PREEMPT_DIS,
151  RTEMS_BDBUF_FATAL_PREEMPT_RST,
152  RTEMS_BDBUF_FATAL_RA_WAKE_UP,
153  RTEMS_BDBUF_FATAL_RECYCLE,
154  RTEMS_BDBUF_FATAL_SO_WAKE_1,
155  RTEMS_BDBUF_FATAL_SO_WAKE_2,
156  RTEMS_BDBUF_FATAL_STATE_0,
157  RTEMS_BDBUF_FATAL_STATE_2,
158  RTEMS_BDBUF_FATAL_STATE_4,
159  RTEMS_BDBUF_FATAL_STATE_5,
160  RTEMS_BDBUF_FATAL_STATE_6,
161  RTEMS_BDBUF_FATAL_STATE_7,
162  RTEMS_BDBUF_FATAL_STATE_8,
163  RTEMS_BDBUF_FATAL_STATE_9,
164  RTEMS_BDBUF_FATAL_STATE_10,
165  RTEMS_BDBUF_FATAL_STATE_11,
166  RTEMS_BDBUF_FATAL_SWAPOUT_RE,
167  RTEMS_BDBUF_FATAL_TREE_RM,
168  RTEMS_BDBUF_FATAL_WAIT_EVNT,
169  RTEMS_BDBUF_FATAL_WAIT_TRANS_EVNT
170} rtems_bdbuf_fatal_code;
171
172/**
173 * The events used in this code. These should be system events rather than
174 * application events.
175 */
176#define RTEMS_BDBUF_SWAPOUT_SYNC   RTEMS_EVENT_2
177#define RTEMS_BDBUF_READ_AHEAD_WAKE_UP RTEMS_EVENT_1
178
179static rtems_task rtems_bdbuf_swapout_task(rtems_task_argument arg);
180
181static rtems_task rtems_bdbuf_read_ahead_task(rtems_task_argument arg);
182
183/**
184 * The Buffer Descriptor cache.
185 */
186static rtems_bdbuf_cache bdbuf_cache = {
187  .lock = RTEMS_MUTEX_INITIALIZER(NULL),
188  .sync_lock = RTEMS_MUTEX_INITIALIZER(NULL),
189  .access_waiters = { .cond_var = RTEMS_CONDITION_VARIABLE_INITIALIZER(NULL) },
190  .transfer_waiters = {
191    .cond_var = RTEMS_CONDITION_VARIABLE_INITIALIZER(NULL)
192  },
193  .buffer_waiters = { .cond_var = RTEMS_CONDITION_VARIABLE_INITIALIZER(NULL) },
194  .once = PTHREAD_ONCE_INIT
195};
196
197#if RTEMS_BDBUF_TRACE
198/**
199 * If true output the trace message.
200 */
201bool rtems_bdbuf_tracer;
202
203/**
204 * Return the number of items on the list.
205 *
206 * @param list The chain control.
207 * @return uint32_t The number of items on the list.
208 */
209uint32_t
210rtems_bdbuf_list_count (rtems_chain_control* list)
211{
212  rtems_chain_node* node = rtems_chain_first (list);
213  uint32_t          count = 0;
214  while (!rtems_chain_is_tail (list, node))
215  {
216    count++;
217    node = rtems_chain_next (node);
218  }
219  return count;
220}
221
222/**
223 * Show the usage for the bdbuf cache.
224 */
225void
226rtems_bdbuf_show_usage (void)
227{
228  uint32_t group;
229  uint32_t total = 0;
230  uint32_t val;
231
232  for (group = 0; group < bdbuf_cache.group_count; group++)
233    total += bdbuf_cache.groups[group].users;
234  printf ("bdbuf:group users=%lu", total);
235  val = rtems_bdbuf_list_count (&bdbuf_cache.lru);
236  printf (", lru=%lu", val);
237  total = val;
238  val = rtems_bdbuf_list_count (&bdbuf_cache.modified);
239  printf (", mod=%lu", val);
240  total += val;
241  val = rtems_bdbuf_list_count (&bdbuf_cache.sync);
242  printf (", sync=%lu", val);
243  total += val;
244  printf (", total=%lu\n", total);
245}
246
247/**
248 * Show the users for a group of a bd.
249 *
250 * @param where A label to show the context of output.
251 * @param bd The bd to show the users of.
252 */
253void
254rtems_bdbuf_show_users (const char* where, rtems_bdbuf_buffer* bd)
255{
256  const char* states[] =
257    { "FR", "EM", "CH", "AC", "AM", "AE", "AP", "MD", "SY", "TR", "TP" };
258
259  printf ("bdbuf:users: %15s: [%" PRIu32 " (%s)] %td:%td = %" PRIu32 " %s\n",
260          where,
261          bd->block, states[bd->state],
262          bd->group - bdbuf_cache.groups,
263          bd - bdbuf_cache.bds,
264          bd->group->users,
265          bd->group->users > 8 ? "<<<<<<<" : "");
266}
267#else
268#define rtems_bdbuf_tracer (0)
269#define rtems_bdbuf_show_usage() ((void) 0)
270#define rtems_bdbuf_show_users(_w, _b) ((void) 0)
271#endif
272
273/**
274 * The default maximum height of 32 allows for AVL trees having between
275 * 5,704,880 and 4,294,967,295 nodes, depending on order of insertion.  You may
276 * change this compile-time constant as you wish.
277 */
278#ifndef RTEMS_BDBUF_AVL_MAX_HEIGHT
279#define RTEMS_BDBUF_AVL_MAX_HEIGHT (32)
280#endif
281
282static void
283rtems_bdbuf_fatal (rtems_fatal_code error)
284{
285  rtems_fatal (RTEMS_FATAL_SOURCE_BDBUF, error);
286}
287
288static void
289rtems_bdbuf_fatal_with_state (rtems_bdbuf_buf_state state,
290                              rtems_bdbuf_fatal_code error)
291{
292  rtems_bdbuf_fatal ((((uint32_t) state) << 16) | error);
293}
294
295/**
296 * Searches for the node with specified dd/block.
297 *
298 * @param root pointer to the root node of the AVL-Tree
299 * @param dd disk device search key
300 * @param block block search key
301 * @retval NULL node with the specified dd/block is not found
302 * @return pointer to the node with specified dd/block
303 */
304static rtems_bdbuf_buffer *
305rtems_bdbuf_avl_search (rtems_bdbuf_buffer** root,
306                        const rtems_disk_device *dd,
307                        rtems_blkdev_bnum    block)
308{
309  rtems_bdbuf_buffer* p = *root;
310
311  while ((p != NULL) && ((p->dd != dd) || (p->block != block)))
312  {
313    if (((uintptr_t) p->dd < (uintptr_t) dd)
314        || ((p->dd == dd) && (p->block < block)))
315    {
316      p = p->avl.right;
317    }
318    else
319    {
320      p = p->avl.left;
321    }
322  }
323
324  return p;
325}
326
327/**
328 * Inserts the specified node to the AVl-Tree.
329 *
330 * @param root pointer to the root node of the AVL-Tree
331 * @param node Pointer to the node to add.
332 * @retval 0 The node added successfully
333 * @retval -1 An error occurred
334 */
335static int
336rtems_bdbuf_avl_insert(rtems_bdbuf_buffer** root,
337                       rtems_bdbuf_buffer*  node)
338{
339  const rtems_disk_device *dd = node->dd;
340  rtems_blkdev_bnum block = node->block;
341
342  rtems_bdbuf_buffer*  p = *root;
343  rtems_bdbuf_buffer*  q;
344  rtems_bdbuf_buffer*  p1;
345  rtems_bdbuf_buffer*  p2;
346  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
347  rtems_bdbuf_buffer** buf_prev = buf_stack;
348
349  bool modified = false;
350
351  if (p == NULL)
352  {
353    *root = node;
354    node->avl.left = NULL;
355    node->avl.right = NULL;
356    node->avl.bal = 0;
357    return 0;
358  }
359
360  while (p != NULL)
361  {
362    *buf_prev++ = p;
363
364    if (((uintptr_t) p->dd < (uintptr_t) dd)
365        || ((p->dd == dd) && (p->block < block)))
366    {
367      p->avl.cache = 1;
368      q = p->avl.right;
369      if (q == NULL)
370      {
371        q = node;
372        p->avl.right = q = node;
373        break;
374      }
375    }
376    else if ((p->dd != dd) || (p->block != block))
377    {
378      p->avl.cache = -1;
379      q = p->avl.left;
380      if (q == NULL)
381      {
382        q = node;
383        p->avl.left = q;
384        break;
385      }
386    }
387    else
388    {
389      return -1;
390    }
391
392    p = q;
393  }
394
395  q->avl.left = q->avl.right = NULL;
396  q->avl.bal = 0;
397  modified = true;
398  buf_prev--;
399
400  while (modified)
401  {
402    if (p->avl.cache == -1)
403    {
404      switch (p->avl.bal)
405      {
406        case 1:
407          p->avl.bal = 0;
408          modified = false;
409          break;
410
411        case 0:
412          p->avl.bal = -1;
413          break;
414
415        case -1:
416          p1 = p->avl.left;
417          if (p1->avl.bal == -1) /* simple LL-turn */
418          {
419            p->avl.left = p1->avl.right;
420            p1->avl.right = p;
421            p->avl.bal = 0;
422            p = p1;
423          }
424          else /* double LR-turn */
425          {
426            p2 = p1->avl.right;
427            p1->avl.right = p2->avl.left;
428            p2->avl.left = p1;
429            p->avl.left = p2->avl.right;
430            p2->avl.right = p;
431            if (p2->avl.bal == -1) p->avl.bal = +1; else p->avl.bal = 0;
432            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
433            p = p2;
434          }
435          p->avl.bal = 0;
436          modified = false;
437          break;
438
439        default:
440          break;
441      }
442    }
443    else
444    {
445      switch (p->avl.bal)
446      {
447        case -1:
448          p->avl.bal = 0;
449          modified = false;
450          break;
451
452        case 0:
453          p->avl.bal = 1;
454          break;
455
456        case 1:
457          p1 = p->avl.right;
458          if (p1->avl.bal == 1) /* simple RR-turn */
459          {
460            p->avl.right = p1->avl.left;
461            p1->avl.left = p;
462            p->avl.bal = 0;
463            p = p1;
464          }
465          else /* double RL-turn */
466          {
467            p2 = p1->avl.left;
468            p1->avl.left = p2->avl.right;
469            p2->avl.right = p1;
470            p->avl.right = p2->avl.left;
471            p2->avl.left = p;
472            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
473            if (p2->avl.bal == -1) p1->avl.bal = +1; else p1->avl.bal = 0;
474            p = p2;
475          }
476          p->avl.bal = 0;
477          modified = false;
478          break;
479
480        default:
481          break;
482      }
483    }
484    q = p;
485    if (buf_prev > buf_stack)
486    {
487      p = *--buf_prev;
488
489      if (p->avl.cache == -1)
490      {
491        p->avl.left = q;
492      }
493      else
494      {
495        p->avl.right = q;
496      }
497    }
498    else
499    {
500      *root = p;
501      break;
502    }
503  };
504
505  return 0;
506}
507
508
509/**
510 * Removes the node from the tree.
511 *
512 * @param root Pointer to pointer to the root node
513 * @param node Pointer to the node to remove
514 * @retval 0 Item removed
515 * @retval -1 No such item found
516 */
517static int
518rtems_bdbuf_avl_remove(rtems_bdbuf_buffer**      root,
519                       const rtems_bdbuf_buffer* node)
520{
521  const rtems_disk_device *dd = node->dd;
522  rtems_blkdev_bnum block = node->block;
523
524  rtems_bdbuf_buffer*  p = *root;
525  rtems_bdbuf_buffer*  q;
526  rtems_bdbuf_buffer*  r;
527  rtems_bdbuf_buffer*  s;
528  rtems_bdbuf_buffer*  p1;
529  rtems_bdbuf_buffer*  p2;
530  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
531  rtems_bdbuf_buffer** buf_prev = buf_stack;
532
533  bool modified = false;
534
535  memset (buf_stack, 0, sizeof(buf_stack));
536
537  while (p != NULL)
538  {
539    *buf_prev++ = p;
540
541    if (((uintptr_t) p->dd < (uintptr_t) dd)
542        || ((p->dd == dd) && (p->block < block)))
543    {
544      p->avl.cache = 1;
545      p = p->avl.right;
546    }
547    else if ((p->dd != dd) || (p->block != block))
548    {
549      p->avl.cache = -1;
550      p = p->avl.left;
551    }
552    else
553    {
554      /* node found */
555      break;
556    }
557  }
558
559  if (p == NULL)
560  {
561    /* there is no such node */
562    return -1;
563  }
564
565  q = p;
566
567  buf_prev--;
568  if (buf_prev > buf_stack)
569  {
570    p = *(buf_prev - 1);
571  }
572  else
573  {
574    p = NULL;
575  }
576
577  /* at this moment q - is a node to delete, p is q's parent */
578  if (q->avl.right == NULL)
579  {
580    r = q->avl.left;
581    if (r != NULL)
582    {
583      r->avl.bal = 0;
584    }
585    q = r;
586  }
587  else
588  {
589    rtems_bdbuf_buffer **t;
590
591    r = q->avl.right;
592
593    if (r->avl.left == NULL)
594    {
595      r->avl.left = q->avl.left;
596      r->avl.bal = q->avl.bal;
597      r->avl.cache = 1;
598      *buf_prev++ = q = r;
599    }
600    else
601    {
602      t = buf_prev++;
603      s = r;
604
605      while (s->avl.left != NULL)
606      {
607        *buf_prev++ = r = s;
608        s = r->avl.left;
609        r->avl.cache = -1;
610      }
611
612      s->avl.left = q->avl.left;
613      r->avl.left = s->avl.right;
614      s->avl.right = q->avl.right;
615      s->avl.bal = q->avl.bal;
616      s->avl.cache = 1;
617
618      *t = q = s;
619    }
620  }
621
622  if (p != NULL)
623  {
624    if (p->avl.cache == -1)
625    {
626      p->avl.left = q;
627    }
628    else
629    {
630      p->avl.right = q;
631    }
632  }
633  else
634  {
635    *root = q;
636  }
637
638  modified = true;
639
640  while (modified)
641  {
642    if (buf_prev > buf_stack)
643    {
644      p = *--buf_prev;
645    }
646    else
647    {
648      break;
649    }
650
651    if (p->avl.cache == -1)
652    {
653      /* rebalance left branch */
654      switch (p->avl.bal)
655      {
656        case -1:
657          p->avl.bal = 0;
658          break;
659        case  0:
660          p->avl.bal = 1;
661          modified = false;
662          break;
663
664        case +1:
665          p1 = p->avl.right;
666
667          if (p1->avl.bal >= 0) /* simple RR-turn */
668          {
669            p->avl.right = p1->avl.left;
670            p1->avl.left = p;
671
672            if (p1->avl.bal == 0)
673            {
674              p1->avl.bal = -1;
675              modified = false;
676            }
677            else
678            {
679              p->avl.bal = 0;
680              p1->avl.bal = 0;
681            }
682            p = p1;
683          }
684          else /* double RL-turn */
685          {
686            p2 = p1->avl.left;
687
688            p1->avl.left = p2->avl.right;
689            p2->avl.right = p1;
690            p->avl.right = p2->avl.left;
691            p2->avl.left = p;
692
693            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
694            if (p2->avl.bal == -1) p1->avl.bal = 1; else p1->avl.bal = 0;
695
696            p = p2;
697            p2->avl.bal = 0;
698          }
699          break;
700
701        default:
702          break;
703      }
704    }
705    else
706    {
707      /* rebalance right branch */
708      switch (p->avl.bal)
709      {
710        case +1:
711          p->avl.bal = 0;
712          break;
713
714        case  0:
715          p->avl.bal = -1;
716          modified = false;
717          break;
718
719        case -1:
720          p1 = p->avl.left;
721
722          if (p1->avl.bal <= 0) /* simple LL-turn */
723          {
724            p->avl.left = p1->avl.right;
725            p1->avl.right = p;
726            if (p1->avl.bal == 0)
727            {
728              p1->avl.bal = 1;
729              modified = false;
730            }
731            else
732            {
733              p->avl.bal = 0;
734              p1->avl.bal = 0;
735            }
736            p = p1;
737          }
738          else /* double LR-turn */
739          {
740            p2 = p1->avl.right;
741
742            p1->avl.right = p2->avl.left;
743            p2->avl.left = p1;
744            p->avl.left = p2->avl.right;
745            p2->avl.right = p;
746
747            if (p2->avl.bal == -1) p->avl.bal = 1; else p->avl.bal = 0;
748            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
749
750            p = p2;
751            p2->avl.bal = 0;
752          }
753          break;
754
755        default:
756          break;
757      }
758    }
759
760    if (buf_prev > buf_stack)
761    {
762      q = *(buf_prev - 1);
763
764      if (q->avl.cache == -1)
765      {
766        q->avl.left = p;
767      }
768      else
769      {
770        q->avl.right = p;
771      }
772    }
773    else
774    {
775      *root = p;
776      break;
777    }
778
779  }
780
781  return 0;
782}
783
784static void
785rtems_bdbuf_set_state (rtems_bdbuf_buffer *bd, rtems_bdbuf_buf_state state)
786{
787  bd->state = state;
788}
789
790static rtems_blkdev_bnum
791rtems_bdbuf_media_block (const rtems_disk_device *dd, rtems_blkdev_bnum block)
792{
793  if (dd->block_to_media_block_shift >= 0)
794    return block << dd->block_to_media_block_shift;
795  else
796    /*
797     * Change the block number for the block size to the block number for the media
798     * block size. We have to use 64bit maths. There is no short cut here.
799     */
800    return (rtems_blkdev_bnum)
801      ((((uint64_t) block) * dd->block_size) / dd->media_block_size);
802}
803
804/**
805 * Lock the mutex. A single task can nest calls.
806 *
807 * @param lock The mutex to lock.
808 */
809static void
810rtems_bdbuf_lock (rtems_mutex *lock)
811{
812  rtems_mutex_lock (lock);
813}
814
815/**
816 * Unlock the mutex.
817 *
818 * @param lock The mutex to unlock.
819 */
820static void
821rtems_bdbuf_unlock (rtems_mutex *lock)
822{
823  rtems_mutex_unlock (lock);
824}
825
826/**
827 * Lock the cache. A single task can nest calls.
828 */
829static void
830rtems_bdbuf_lock_cache (void)
831{
832  rtems_bdbuf_lock (&bdbuf_cache.lock);
833}
834
835/**
836 * Unlock the cache.
837 */
838static void
839rtems_bdbuf_unlock_cache (void)
840{
841  rtems_bdbuf_unlock (&bdbuf_cache.lock);
842}
843
844/**
845 * Lock the cache's sync. A single task can nest calls.
846 */
847static void
848rtems_bdbuf_lock_sync (void)
849{
850  rtems_bdbuf_lock (&bdbuf_cache.sync_lock);
851}
852
853/**
854 * Unlock the cache's sync lock. Any blocked writers are woken.
855 */
856static void
857rtems_bdbuf_unlock_sync (void)
858{
859  rtems_bdbuf_unlock (&bdbuf_cache.sync_lock);
860}
861
862static void
863rtems_bdbuf_group_obtain (rtems_bdbuf_buffer *bd)
864{
865  ++bd->group->users;
866}
867
868static void
869rtems_bdbuf_group_release (rtems_bdbuf_buffer *bd)
870{
871  --bd->group->users;
872}
873
874/**
875 * Wait until woken. Semaphores are used so a number of tasks can wait and can
876 * be woken at once. Task events would require we maintain a list of tasks to
877 * be woken and this would require storage and we do not know the number of
878 * tasks that could be waiting.
879 *
880 * While we have the cache locked we can try and claim the semaphore and
881 * therefore know when we release the lock to the cache we will block until the
882 * semaphore is released. This may even happen before we get to block.
883 *
884 * A counter is used to save the release call when no one is waiting.
885 *
886 * The function assumes the cache is locked on entry and it will be locked on
887 * exit.
888 */
889static void
890rtems_bdbuf_anonymous_wait (rtems_bdbuf_waiters *waiters)
891{
892  /*
893   * Indicate we are waiting.
894   */
895  ++waiters->count;
896
897  rtems_condition_variable_wait (&waiters->cond_var, &bdbuf_cache.lock);
898
899  --waiters->count;
900}
901
902static void
903rtems_bdbuf_wait (rtems_bdbuf_buffer *bd, rtems_bdbuf_waiters *waiters)
904{
905  rtems_bdbuf_group_obtain (bd);
906  ++bd->waiters;
907  rtems_bdbuf_anonymous_wait (waiters);
908  --bd->waiters;
909  rtems_bdbuf_group_release (bd);
910}
911
912/**
913 * Wake a blocked resource. The resource has a counter that lets us know if
914 * there are any waiters.
915 */
916static void
917rtems_bdbuf_wake (rtems_bdbuf_waiters *waiters)
918{
919  if (waiters->count > 0)
920  {
921    rtems_condition_variable_broadcast (&waiters->cond_var);
922  }
923}
924
925static void
926rtems_bdbuf_wake_swapper (void)
927{
928  rtems_status_code sc = rtems_event_send (bdbuf_cache.swapout,
929                                           RTEMS_BDBUF_SWAPOUT_SYNC);
930  if (sc != RTEMS_SUCCESSFUL)
931    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_SO_WAKE_1);
932}
933
934static bool
935rtems_bdbuf_has_buffer_waiters (void)
936{
937  return bdbuf_cache.buffer_waiters.count;
938}
939
940static void
941rtems_bdbuf_remove_from_tree (rtems_bdbuf_buffer *bd)
942{
943  if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
944    rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_TREE_RM);
945}
946
947static void
948rtems_bdbuf_remove_from_tree_and_lru_list (rtems_bdbuf_buffer *bd)
949{
950  switch (bd->state)
951  {
952    case RTEMS_BDBUF_STATE_FREE:
953      break;
954    case RTEMS_BDBUF_STATE_CACHED:
955      rtems_bdbuf_remove_from_tree (bd);
956      break;
957    default:
958      rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_10);
959  }
960
961  rtems_chain_extract_unprotected (&bd->link);
962}
963
964static void
965rtems_bdbuf_make_free_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
966{
967  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_FREE);
968  rtems_chain_prepend_unprotected (&bdbuf_cache.lru, &bd->link);
969}
970
971static void
972rtems_bdbuf_make_empty (rtems_bdbuf_buffer *bd)
973{
974  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_EMPTY);
975}
976
977static void
978rtems_bdbuf_make_cached_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
979{
980  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_CACHED);
981  rtems_chain_append_unprotected (&bdbuf_cache.lru, &bd->link);
982}
983
984static void
985rtems_bdbuf_discard_buffer (rtems_bdbuf_buffer *bd)
986{
987  rtems_bdbuf_make_empty (bd);
988
989  if (bd->waiters == 0)
990  {
991    rtems_bdbuf_remove_from_tree (bd);
992    rtems_bdbuf_make_free_and_add_to_lru_list (bd);
993  }
994}
995
996static void
997rtems_bdbuf_add_to_modified_list_after_access (rtems_bdbuf_buffer *bd)
998{
999  if (bdbuf_cache.sync_active && bdbuf_cache.sync_device == bd->dd)
1000  {
1001    rtems_bdbuf_unlock_cache ();
1002
1003    /*
1004     * Wait for the sync lock.
1005     */
1006    rtems_bdbuf_lock_sync ();
1007
1008    rtems_bdbuf_unlock_sync ();
1009    rtems_bdbuf_lock_cache ();
1010  }
1011
1012  /*
1013   * Only the first modified release sets the timer and any further user
1014   * accesses do not change the timer value which should move down. This
1015   * assumes the user's hold of the buffer is much less than the time on the
1016   * modified list. Resetting the timer on each access which could result in a
1017   * buffer never getting to 0 and never being forced onto disk. This raises a
1018   * difficult question. Is a snapshot of a block that is changing better than
1019   * nothing being written? We have tended to think we should hold changes for
1020   * only a specific period of time even if still changing and get onto disk
1021   * and letting the file system try and recover this position if it can.
1022   */
1023  if (bd->state == RTEMS_BDBUF_STATE_ACCESS_CACHED
1024        || bd->state == RTEMS_BDBUF_STATE_ACCESS_EMPTY)
1025    bd->hold_timer = bdbuf_config.swap_block_hold;
1026
1027  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_MODIFIED);
1028  rtems_chain_append_unprotected (&bdbuf_cache.modified, &bd->link);
1029
1030  if (bd->waiters)
1031    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1032  else if (rtems_bdbuf_has_buffer_waiters ())
1033    rtems_bdbuf_wake_swapper ();
1034}
1035
1036static void
1037rtems_bdbuf_add_to_lru_list_after_access (rtems_bdbuf_buffer *bd)
1038{
1039  rtems_bdbuf_group_release (bd);
1040  rtems_bdbuf_make_cached_and_add_to_lru_list (bd);
1041
1042  if (bd->waiters)
1043    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1044  else
1045    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1046}
1047
1048/**
1049 * Compute the number of BDs per group for a given buffer size.
1050 *
1051 * @param size The buffer size. It can be any size and we scale up.
1052 */
1053static size_t
1054rtems_bdbuf_bds_per_group (size_t size)
1055{
1056  size_t bufs_per_size;
1057  size_t bds_per_size;
1058
1059  if (size > bdbuf_config.buffer_max)
1060    return 0;
1061
1062  bufs_per_size = ((size - 1) / bdbuf_config.buffer_min) + 1;
1063
1064  for (bds_per_size = 1;
1065       bds_per_size < bufs_per_size;
1066       bds_per_size <<= 1)
1067    ;
1068
1069  return bdbuf_cache.max_bds_per_group / bds_per_size;
1070}
1071
1072static void
1073rtems_bdbuf_discard_buffer_after_access (rtems_bdbuf_buffer *bd)
1074{
1075  rtems_bdbuf_group_release (bd);
1076  rtems_bdbuf_discard_buffer (bd);
1077
1078  if (bd->waiters)
1079    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1080  else
1081    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1082}
1083
1084/**
1085 * Reallocate a group. The BDs currently allocated in the group are removed
1086 * from the ALV tree and any lists then the new BD's are prepended to the ready
1087 * list of the cache.
1088 *
1089 * @param group The group to reallocate.
1090 * @param new_bds_per_group The new count of BDs per group.
1091 * @return A buffer of this group.
1092 */
1093static rtems_bdbuf_buffer *
1094rtems_bdbuf_group_realloc (rtems_bdbuf_group* group, size_t new_bds_per_group)
1095{
1096  rtems_bdbuf_buffer* bd;
1097  size_t              b;
1098  size_t              bufs_per_bd;
1099
1100  if (rtems_bdbuf_tracer)
1101    printf ("bdbuf:realloc: %tu: %zd -> %zd\n",
1102            group - bdbuf_cache.groups, group->bds_per_group,
1103            new_bds_per_group);
1104
1105  bufs_per_bd = bdbuf_cache.max_bds_per_group / group->bds_per_group;
1106
1107  for (b = 0, bd = group->bdbuf;
1108       b < group->bds_per_group;
1109       b++, bd += bufs_per_bd)
1110    rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1111
1112  group->bds_per_group = new_bds_per_group;
1113  bufs_per_bd = bdbuf_cache.max_bds_per_group / new_bds_per_group;
1114
1115  for (b = 1, bd = group->bdbuf + bufs_per_bd;
1116       b < group->bds_per_group;
1117       b++, bd += bufs_per_bd)
1118    rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1119
1120  if (b > 1)
1121    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1122
1123  return group->bdbuf;
1124}
1125
1126static void
1127rtems_bdbuf_setup_empty_buffer (rtems_bdbuf_buffer *bd,
1128                                rtems_disk_device  *dd,
1129                                rtems_blkdev_bnum   block)
1130{
1131  bd->dd        = dd ;
1132  bd->block     = block;
1133  bd->avl.left  = NULL;
1134  bd->avl.right = NULL;
1135  bd->waiters   = 0;
1136
1137  if (rtems_bdbuf_avl_insert (&bdbuf_cache.tree, bd) != 0)
1138    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_RECYCLE);
1139
1140  rtems_bdbuf_make_empty (bd);
1141}
1142
1143static rtems_bdbuf_buffer *
1144rtems_bdbuf_get_buffer_from_lru_list (rtems_disk_device *dd,
1145                                      rtems_blkdev_bnum  block)
1146{
1147  rtems_chain_node *node = rtems_chain_first (&bdbuf_cache.lru);
1148
1149  while (!rtems_chain_is_tail (&bdbuf_cache.lru, node))
1150  {
1151    rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
1152    rtems_bdbuf_buffer *empty_bd = NULL;
1153
1154    if (rtems_bdbuf_tracer)
1155      printf ("bdbuf:next-bd: %tu (%td:%" PRId32 ") %zd -> %zd\n",
1156              bd - bdbuf_cache.bds,
1157              bd->group - bdbuf_cache.groups, bd->group->users,
1158              bd->group->bds_per_group, dd->bds_per_group);
1159
1160    /*
1161     * If nobody waits for this BD, we may recycle it.
1162     */
1163    if (bd->waiters == 0)
1164    {
1165      if (bd->group->bds_per_group == dd->bds_per_group)
1166      {
1167        rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1168
1169        empty_bd = bd;
1170      }
1171      else if (bd->group->users == 0)
1172        empty_bd = rtems_bdbuf_group_realloc (bd->group, dd->bds_per_group);
1173    }
1174
1175    if (empty_bd != NULL)
1176    {
1177      rtems_bdbuf_setup_empty_buffer (empty_bd, dd, block);
1178
1179      return empty_bd;
1180    }
1181
1182    node = rtems_chain_next (node);
1183  }
1184
1185  return NULL;
1186}
1187
1188static rtems_status_code
1189rtems_bdbuf_create_task(
1190  rtems_name name,
1191  rtems_task_priority priority,
1192  rtems_task_priority default_priority,
1193  rtems_id *id
1194)
1195{
1196  rtems_status_code sc;
1197  size_t stack_size = bdbuf_config.task_stack_size ?
1198    bdbuf_config.task_stack_size : RTEMS_BDBUF_TASK_STACK_SIZE_DEFAULT;
1199
1200  priority = priority != 0 ? priority : default_priority;
1201
1202  sc = rtems_task_create (name,
1203                          priority,
1204                          stack_size,
1205                          RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
1206                          RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT,
1207                          id);
1208
1209  return sc;
1210}
1211
1212static rtems_bdbuf_swapout_transfer*
1213rtems_bdbuf_swapout_transfer_alloc (void)
1214{
1215  /*
1216   * @note chrisj The rtems_blkdev_request and the array at the end is a hack.
1217   * I am disappointment at finding code like this in RTEMS. The request should
1218   * have been a rtems_chain_control. Simple, fast and less storage as the node
1219   * is already part of the buffer structure.
1220   */
1221  size_t transfer_size = sizeof (rtems_bdbuf_swapout_transfer)
1222    + (bdbuf_config.max_write_blocks * sizeof (rtems_blkdev_sg_buffer));
1223  return calloc (1, transfer_size);
1224}
1225
1226static void
1227rtems_bdbuf_transfer_done (rtems_blkdev_request* req, rtems_status_code status);
1228
1229static void
1230rtems_bdbuf_swapout_transfer_init (rtems_bdbuf_swapout_transfer* transfer,
1231                                   rtems_id id)
1232{
1233  rtems_chain_initialize_empty (&transfer->bds);
1234  transfer->dd = BDBUF_INVALID_DEV;
1235  transfer->syncing = false;
1236  transfer->write_req.req = RTEMS_BLKDEV_REQ_WRITE;
1237  transfer->write_req.done = rtems_bdbuf_transfer_done;
1238  transfer->write_req.io_task = id;
1239}
1240
1241static size_t
1242rtems_bdbuf_swapout_worker_size (void)
1243{
1244  return sizeof (rtems_bdbuf_swapout_worker)
1245    + (bdbuf_config.max_write_blocks * sizeof (rtems_blkdev_sg_buffer));
1246}
1247
1248static rtems_task
1249rtems_bdbuf_swapout_worker_task (rtems_task_argument arg);
1250
1251static rtems_status_code
1252rtems_bdbuf_swapout_workers_create (void)
1253{
1254  rtems_status_code  sc;
1255  size_t             w;
1256  size_t             worker_size;
1257  char              *worker_current;
1258
1259  worker_size = rtems_bdbuf_swapout_worker_size ();
1260  worker_current = calloc (1, bdbuf_config.swapout_workers * worker_size);
1261  sc = worker_current != NULL ? RTEMS_SUCCESSFUL : RTEMS_NO_MEMORY;
1262
1263  bdbuf_cache.swapout_workers = (rtems_bdbuf_swapout_worker *) worker_current;
1264
1265  for (w = 0;
1266       sc == RTEMS_SUCCESSFUL && w < bdbuf_config.swapout_workers;
1267       w++, worker_current += worker_size)
1268  {
1269    rtems_bdbuf_swapout_worker *worker = (rtems_bdbuf_swapout_worker *) worker_current;
1270
1271    sc = rtems_bdbuf_create_task (rtems_build_name('B', 'D', 'o', 'a' + w),
1272                                  bdbuf_config.swapout_worker_priority,
1273                                  RTEMS_BDBUF_SWAPOUT_WORKER_TASK_PRIORITY_DEFAULT,
1274                                  &worker->id);
1275    if (sc == RTEMS_SUCCESSFUL)
1276    {
1277      rtems_bdbuf_swapout_transfer_init (&worker->transfer, worker->id);
1278
1279      rtems_chain_append_unprotected (&bdbuf_cache.swapout_free_workers, &worker->link);
1280      worker->enabled = true;
1281
1282      sc = rtems_task_start (worker->id,
1283                             rtems_bdbuf_swapout_worker_task,
1284                             (rtems_task_argument) worker);
1285    }
1286  }
1287
1288  return sc;
1289}
1290
1291static size_t
1292rtems_bdbuf_read_request_size (uint32_t transfer_count)
1293{
1294  return sizeof (rtems_blkdev_request)
1295    + sizeof (rtems_blkdev_sg_buffer) * transfer_count;
1296}
1297
1298static rtems_status_code
1299rtems_bdbuf_do_init (void)
1300{
1301  rtems_bdbuf_group*  group;
1302  rtems_bdbuf_buffer* bd;
1303  uint8_t*            buffer;
1304  size_t              b;
1305  rtems_status_code   sc;
1306
1307  if (rtems_bdbuf_tracer)
1308    printf ("bdbuf:init\n");
1309
1310  if (rtems_interrupt_is_in_progress())
1311    return RTEMS_CALLED_FROM_ISR;
1312
1313  /*
1314   * Check the configuration table values.
1315   */
1316
1317  if ((bdbuf_config.buffer_max % bdbuf_config.buffer_min) != 0)
1318    return RTEMS_INVALID_NUMBER;
1319
1320  if (rtems_bdbuf_read_request_size (bdbuf_config.max_read_ahead_blocks)
1321      > RTEMS_MINIMUM_STACK_SIZE / 8U)
1322    return RTEMS_INVALID_NUMBER;
1323
1324  bdbuf_cache.sync_device = BDBUF_INVALID_DEV;
1325
1326  rtems_chain_initialize_empty (&bdbuf_cache.swapout_free_workers);
1327  rtems_chain_initialize_empty (&bdbuf_cache.lru);
1328  rtems_chain_initialize_empty (&bdbuf_cache.modified);
1329  rtems_chain_initialize_empty (&bdbuf_cache.sync);
1330  rtems_chain_initialize_empty (&bdbuf_cache.read_ahead_chain);
1331
1332  rtems_mutex_set_name (&bdbuf_cache.lock, "bdbuf lock");
1333  rtems_mutex_set_name (&bdbuf_cache.sync_lock, "bdbuf sync lock");
1334  rtems_condition_variable_set_name (&bdbuf_cache.access_waiters.cond_var,
1335                                     "bdbuf access");
1336  rtems_condition_variable_set_name (&bdbuf_cache.transfer_waiters.cond_var,
1337                                     "bdbuf transfer");
1338  rtems_condition_variable_set_name (&bdbuf_cache.buffer_waiters.cond_var,
1339                                     "bdbuf buffer");
1340
1341  rtems_bdbuf_lock_cache ();
1342
1343  /*
1344   * Compute the various number of elements in the cache.
1345   */
1346  bdbuf_cache.buffer_min_count =
1347    bdbuf_config.size / bdbuf_config.buffer_min;
1348  bdbuf_cache.max_bds_per_group =
1349    bdbuf_config.buffer_max / bdbuf_config.buffer_min;
1350  bdbuf_cache.group_count =
1351    bdbuf_cache.buffer_min_count / bdbuf_cache.max_bds_per_group;
1352
1353  /*
1354   * Allocate the memory for the buffer descriptors.
1355   */
1356  bdbuf_cache.bds = calloc (sizeof (rtems_bdbuf_buffer),
1357                            bdbuf_cache.buffer_min_count);
1358  if (!bdbuf_cache.bds)
1359    goto error;
1360
1361  /*
1362   * Allocate the memory for the buffer descriptors.
1363   */
1364  bdbuf_cache.groups = calloc (sizeof (rtems_bdbuf_group),
1365                               bdbuf_cache.group_count);
1366  if (!bdbuf_cache.groups)
1367    goto error;
1368
1369  /*
1370   * Allocate memory for buffer memory. The buffer memory will be cache
1371   * aligned. It is possible to free the memory allocated by
1372   * rtems_cache_aligned_malloc() with free().
1373   */
1374  bdbuf_cache.buffers = rtems_cache_aligned_malloc(bdbuf_cache.buffer_min_count
1375                                                   * bdbuf_config.buffer_min);
1376  if (bdbuf_cache.buffers == NULL)
1377    goto error;
1378
1379  /*
1380   * The cache is empty after opening so we need to add all the buffers to it
1381   * and initialise the groups.
1382   */
1383  for (b = 0, group = bdbuf_cache.groups,
1384         bd = bdbuf_cache.bds, buffer = bdbuf_cache.buffers;
1385       b < bdbuf_cache.buffer_min_count;
1386       b++, bd++, buffer += bdbuf_config.buffer_min)
1387  {
1388    bd->dd    = BDBUF_INVALID_DEV;
1389    bd->group  = group;
1390    bd->buffer = buffer;
1391
1392    rtems_chain_append_unprotected (&bdbuf_cache.lru, &bd->link);
1393
1394    if ((b % bdbuf_cache.max_bds_per_group) ==
1395        (bdbuf_cache.max_bds_per_group - 1))
1396      group++;
1397  }
1398
1399  for (b = 0,
1400         group = bdbuf_cache.groups,
1401         bd = bdbuf_cache.bds;
1402       b < bdbuf_cache.group_count;
1403       b++,
1404         group++,
1405         bd += bdbuf_cache.max_bds_per_group)
1406  {
1407    group->bds_per_group = bdbuf_cache.max_bds_per_group;
1408    group->bdbuf = bd;
1409  }
1410
1411  /*
1412   * Create and start swapout task.
1413   */
1414
1415  bdbuf_cache.swapout_transfer = rtems_bdbuf_swapout_transfer_alloc ();
1416  if (!bdbuf_cache.swapout_transfer)
1417    goto error;
1418
1419  bdbuf_cache.swapout_enabled = true;
1420
1421  sc = rtems_bdbuf_create_task (rtems_build_name('B', 'S', 'W', 'P'),
1422                                bdbuf_config.swapout_priority,
1423                                RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT,
1424                                &bdbuf_cache.swapout);
1425  if (sc != RTEMS_SUCCESSFUL)
1426    goto error;
1427
1428  rtems_bdbuf_swapout_transfer_init (bdbuf_cache.swapout_transfer, bdbuf_cache.swapout);
1429
1430  sc = rtems_task_start (bdbuf_cache.swapout,
1431                         rtems_bdbuf_swapout_task,
1432                         (rtems_task_argument) bdbuf_cache.swapout_transfer);
1433  if (sc != RTEMS_SUCCESSFUL)
1434    goto error;
1435
1436  if (bdbuf_config.swapout_workers > 0)
1437  {
1438    sc = rtems_bdbuf_swapout_workers_create ();
1439    if (sc != RTEMS_SUCCESSFUL)
1440      goto error;
1441  }
1442
1443  if (bdbuf_config.max_read_ahead_blocks > 0)
1444  {
1445    bdbuf_cache.read_ahead_enabled = true;
1446    sc = rtems_bdbuf_create_task (rtems_build_name('B', 'R', 'D', 'A'),
1447                                  bdbuf_config.read_ahead_priority,
1448                                  RTEMS_BDBUF_READ_AHEAD_TASK_PRIORITY_DEFAULT,
1449                                  &bdbuf_cache.read_ahead_task);
1450    if (sc != RTEMS_SUCCESSFUL)
1451      goto error;
1452
1453    sc = rtems_task_start (bdbuf_cache.read_ahead_task,
1454                           rtems_bdbuf_read_ahead_task,
1455                           0);
1456    if (sc != RTEMS_SUCCESSFUL)
1457      goto error;
1458  }
1459
1460  rtems_bdbuf_unlock_cache ();
1461
1462  return RTEMS_SUCCESSFUL;
1463
1464error:
1465
1466  if (bdbuf_cache.read_ahead_task != 0)
1467    rtems_task_delete (bdbuf_cache.read_ahead_task);
1468
1469  if (bdbuf_cache.swapout != 0)
1470    rtems_task_delete (bdbuf_cache.swapout);
1471
1472  if (bdbuf_cache.swapout_workers)
1473  {
1474    char   *worker_current = (char *) bdbuf_cache.swapout_workers;
1475    size_t  worker_size = rtems_bdbuf_swapout_worker_size ();
1476    size_t  w;
1477
1478    for (w = 0;
1479         w < bdbuf_config.swapout_workers;
1480         w++, worker_current += worker_size)
1481    {
1482      rtems_bdbuf_swapout_worker *worker = (rtems_bdbuf_swapout_worker *) worker_current;
1483
1484      if (worker->id != 0) {
1485        rtems_task_delete (worker->id);
1486      }
1487    }
1488  }
1489
1490  free (bdbuf_cache.buffers);
1491  free (bdbuf_cache.groups);
1492  free (bdbuf_cache.bds);
1493  free (bdbuf_cache.swapout_transfer);
1494  free (bdbuf_cache.swapout_workers);
1495
1496  rtems_bdbuf_unlock_cache ();
1497
1498  return RTEMS_UNSATISFIED;
1499}
1500
1501static void
1502rtems_bdbuf_init_once (void)
1503{
1504  bdbuf_cache.init_status = rtems_bdbuf_do_init();
1505}
1506
1507rtems_status_code
1508rtems_bdbuf_init (void)
1509{
1510  int eno;
1511
1512  eno = pthread_once (&bdbuf_cache.once, rtems_bdbuf_init_once);
1513  _Assert (eno == 0);
1514  (void) eno;
1515
1516  return bdbuf_cache.init_status;
1517}
1518
1519static void
1520rtems_bdbuf_wait_for_event (rtems_event_set event)
1521{
1522  rtems_status_code sc = RTEMS_SUCCESSFUL;
1523  rtems_event_set   out = 0;
1524
1525  sc = rtems_event_receive (event,
1526                            RTEMS_EVENT_ALL | RTEMS_WAIT,
1527                            RTEMS_NO_TIMEOUT,
1528                            &out);
1529
1530  if (sc != RTEMS_SUCCESSFUL || out != event)
1531    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_WAIT_EVNT);
1532}
1533
1534static void
1535rtems_bdbuf_wait_for_transient_event (void)
1536{
1537  rtems_status_code sc = RTEMS_SUCCESSFUL;
1538
1539  sc = rtems_event_transient_receive (RTEMS_WAIT, RTEMS_NO_TIMEOUT);
1540  if (sc != RTEMS_SUCCESSFUL)
1541    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_WAIT_TRANS_EVNT);
1542}
1543
1544static void
1545rtems_bdbuf_wait_for_access (rtems_bdbuf_buffer *bd)
1546{
1547  while (true)
1548  {
1549    switch (bd->state)
1550    {
1551      case RTEMS_BDBUF_STATE_MODIFIED:
1552        rtems_bdbuf_group_release (bd);
1553        /* Fall through */
1554      case RTEMS_BDBUF_STATE_CACHED:
1555        rtems_chain_extract_unprotected (&bd->link);
1556        /* Fall through */
1557      case RTEMS_BDBUF_STATE_EMPTY:
1558        return;
1559      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1560      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1561      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1562      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1563        rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
1564        break;
1565      case RTEMS_BDBUF_STATE_SYNC:
1566      case RTEMS_BDBUF_STATE_TRANSFER:
1567      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1568        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1569        break;
1570      default:
1571        rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_7);
1572    }
1573  }
1574}
1575
1576static void
1577rtems_bdbuf_request_sync_for_modified_buffer (rtems_bdbuf_buffer *bd)
1578{
1579  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
1580  rtems_chain_extract_unprotected (&bd->link);
1581  rtems_chain_append_unprotected (&bdbuf_cache.sync, &bd->link);
1582  rtems_bdbuf_wake_swapper ();
1583}
1584
1585/**
1586 * @brief Waits until the buffer is ready for recycling.
1587 *
1588 * @retval @c true Buffer is valid and may be recycled.
1589 * @retval @c false Buffer is invalid and has to searched again.
1590 */
1591static bool
1592rtems_bdbuf_wait_for_recycle (rtems_bdbuf_buffer *bd)
1593{
1594  while (true)
1595  {
1596    switch (bd->state)
1597    {
1598      case RTEMS_BDBUF_STATE_FREE:
1599        return true;
1600      case RTEMS_BDBUF_STATE_MODIFIED:
1601        rtems_bdbuf_request_sync_for_modified_buffer (bd);
1602        break;
1603      case RTEMS_BDBUF_STATE_CACHED:
1604      case RTEMS_BDBUF_STATE_EMPTY:
1605        if (bd->waiters == 0)
1606          return true;
1607        else
1608        {
1609          /*
1610           * It is essential that we wait here without a special wait count and
1611           * without the group in use.  Otherwise we could trigger a wait ping
1612           * pong with another recycle waiter.  The state of the buffer is
1613           * arbitrary afterwards.
1614           */
1615          rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
1616          return false;
1617        }
1618      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1619      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1620      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1621      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1622        rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
1623        break;
1624      case RTEMS_BDBUF_STATE_SYNC:
1625      case RTEMS_BDBUF_STATE_TRANSFER:
1626      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1627        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1628        break;
1629      default:
1630        rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_8);
1631    }
1632  }
1633}
1634
1635static void
1636rtems_bdbuf_wait_for_sync_done (rtems_bdbuf_buffer *bd)
1637{
1638  while (true)
1639  {
1640    switch (bd->state)
1641    {
1642      case RTEMS_BDBUF_STATE_CACHED:
1643      case RTEMS_BDBUF_STATE_EMPTY:
1644      case RTEMS_BDBUF_STATE_MODIFIED:
1645      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1646      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1647      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1648      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1649        return;
1650      case RTEMS_BDBUF_STATE_SYNC:
1651      case RTEMS_BDBUF_STATE_TRANSFER:
1652      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1653        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1654        break;
1655      default:
1656        rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_9);
1657    }
1658  }
1659}
1660
1661static void
1662rtems_bdbuf_wait_for_buffer (void)
1663{
1664  if (!rtems_chain_is_empty (&bdbuf_cache.modified))
1665    rtems_bdbuf_wake_swapper ();
1666
1667  rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
1668}
1669
1670static void
1671rtems_bdbuf_sync_after_access (rtems_bdbuf_buffer *bd)
1672{
1673  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
1674
1675  rtems_chain_append_unprotected (&bdbuf_cache.sync, &bd->link);
1676
1677  if (bd->waiters)
1678    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1679
1680  rtems_bdbuf_wake_swapper ();
1681  rtems_bdbuf_wait_for_sync_done (bd);
1682
1683  /*
1684   * We may have created a cached or empty buffer which may be recycled.
1685   */
1686  if (bd->waiters == 0
1687        && (bd->state == RTEMS_BDBUF_STATE_CACHED
1688          || bd->state == RTEMS_BDBUF_STATE_EMPTY))
1689  {
1690    if (bd->state == RTEMS_BDBUF_STATE_EMPTY)
1691    {
1692      rtems_bdbuf_remove_from_tree (bd);
1693      rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1694    }
1695    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1696  }
1697}
1698
1699static rtems_bdbuf_buffer *
1700rtems_bdbuf_get_buffer_for_read_ahead (rtems_disk_device *dd,
1701                                       rtems_blkdev_bnum  block)
1702{
1703  rtems_bdbuf_buffer *bd = NULL;
1704
1705  bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dd, block);
1706
1707  if (bd == NULL)
1708  {
1709    bd = rtems_bdbuf_get_buffer_from_lru_list (dd, block);
1710
1711    if (bd != NULL)
1712      rtems_bdbuf_group_obtain (bd);
1713  }
1714  else
1715    /*
1716     * The buffer is in the cache.  So it is already available or in use, and
1717     * thus no need for a read ahead.
1718     */
1719    bd = NULL;
1720
1721  return bd;
1722}
1723
1724static rtems_bdbuf_buffer *
1725rtems_bdbuf_get_buffer_for_access (rtems_disk_device *dd,
1726                                   rtems_blkdev_bnum  block)
1727{
1728  rtems_bdbuf_buffer *bd = NULL;
1729
1730  do
1731  {
1732    bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dd, block);
1733
1734    if (bd != NULL)
1735    {
1736      if (bd->group->bds_per_group != dd->bds_per_group)
1737      {
1738        if (rtems_bdbuf_wait_for_recycle (bd))
1739        {
1740          rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1741          rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1742          rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1743        }
1744        bd = NULL;
1745      }
1746    }
1747    else
1748    {
1749      bd = rtems_bdbuf_get_buffer_from_lru_list (dd, block);
1750
1751      if (bd == NULL)
1752        rtems_bdbuf_wait_for_buffer ();
1753    }
1754  }
1755  while (bd == NULL);
1756
1757  rtems_bdbuf_wait_for_access (bd);
1758  rtems_bdbuf_group_obtain (bd);
1759
1760  return bd;
1761}
1762
1763static rtems_status_code
1764rtems_bdbuf_get_media_block (const rtems_disk_device *dd,
1765                             rtems_blkdev_bnum        block,
1766                             rtems_blkdev_bnum       *media_block_ptr)
1767{
1768  rtems_status_code sc = RTEMS_SUCCESSFUL;
1769
1770  if (block < dd->block_count)
1771  {
1772    /*
1773     * Compute the media block number. Drivers work with media block number not
1774     * the block number a BD may have as this depends on the block size set by
1775     * the user.
1776     */
1777    *media_block_ptr = rtems_bdbuf_media_block (dd, block) + dd->start;
1778  }
1779  else
1780  {
1781    sc = RTEMS_INVALID_ID;
1782  }
1783
1784  return sc;
1785}
1786
1787rtems_status_code
1788rtems_bdbuf_get (rtems_disk_device   *dd,
1789                 rtems_blkdev_bnum    block,
1790                 rtems_bdbuf_buffer **bd_ptr)
1791{
1792  rtems_status_code   sc = RTEMS_SUCCESSFUL;
1793  rtems_bdbuf_buffer *bd = NULL;
1794  rtems_blkdev_bnum   media_block;
1795
1796  rtems_bdbuf_lock_cache ();
1797
1798  sc = rtems_bdbuf_get_media_block (dd, block, &media_block);
1799  if (sc == RTEMS_SUCCESSFUL)
1800  {
1801    /*
1802     * Print the block index relative to the physical disk.
1803     */
1804    if (rtems_bdbuf_tracer)
1805      printf ("bdbuf:get: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
1806              media_block, block, (unsigned) dd->dev);
1807
1808    bd = rtems_bdbuf_get_buffer_for_access (dd, media_block);
1809
1810    switch (bd->state)
1811    {
1812      case RTEMS_BDBUF_STATE_CACHED:
1813        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
1814        break;
1815      case RTEMS_BDBUF_STATE_EMPTY:
1816        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_EMPTY);
1817        break;
1818      case RTEMS_BDBUF_STATE_MODIFIED:
1819        /*
1820         * To get a modified buffer could be considered a bug in the caller
1821         * because you should not be getting an already modified buffer but
1822         * user may have modified a byte in a block then decided to seek the
1823         * start and write the whole block and the file system will have no
1824         * record of this so just gets the block to fill.
1825         */
1826        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
1827        break;
1828      default:
1829        rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_2);
1830        break;
1831    }
1832
1833    if (rtems_bdbuf_tracer)
1834    {
1835      rtems_bdbuf_show_users ("get", bd);
1836      rtems_bdbuf_show_usage ();
1837    }
1838  }
1839
1840  rtems_bdbuf_unlock_cache ();
1841
1842  *bd_ptr = bd;
1843
1844  return sc;
1845}
1846
1847/**
1848 * Call back handler called by the low level driver when the transfer has
1849 * completed. This function may be invoked from interrupt handler.
1850 *
1851 * @param arg Arbitrary argument specified in block device request
1852 *            structure (in this case - pointer to the appropriate
1853 *            block device request structure).
1854 * @param status I/O completion status
1855 */
1856static void
1857rtems_bdbuf_transfer_done (rtems_blkdev_request* req, rtems_status_code status)
1858{
1859  req->status = status;
1860
1861  rtems_event_transient_send (req->io_task);
1862}
1863
1864static rtems_status_code
1865rtems_bdbuf_execute_transfer_request (rtems_disk_device    *dd,
1866                                      rtems_blkdev_request *req,
1867                                      bool                  cache_locked)
1868{
1869  rtems_status_code sc = RTEMS_SUCCESSFUL;
1870  uint32_t transfer_index = 0;
1871  bool wake_transfer_waiters = false;
1872  bool wake_buffer_waiters = false;
1873
1874  if (cache_locked)
1875    rtems_bdbuf_unlock_cache ();
1876
1877  /* The return value will be ignored for transfer requests */
1878  dd->ioctl (dd->phys_dev, RTEMS_BLKIO_REQUEST, req);
1879
1880  /* Wait for transfer request completion */
1881  rtems_bdbuf_wait_for_transient_event ();
1882  sc = req->status;
1883
1884  rtems_bdbuf_lock_cache ();
1885
1886  /* Statistics */
1887  if (req->req == RTEMS_BLKDEV_REQ_READ)
1888  {
1889    dd->stats.read_blocks += req->bufnum;
1890    if (sc != RTEMS_SUCCESSFUL)
1891      ++dd->stats.read_errors;
1892  }
1893  else
1894  {
1895    dd->stats.write_blocks += req->bufnum;
1896    ++dd->stats.write_transfers;
1897    if (sc != RTEMS_SUCCESSFUL)
1898      ++dd->stats.write_errors;
1899  }
1900
1901  for (transfer_index = 0; transfer_index < req->bufnum; ++transfer_index)
1902  {
1903    rtems_bdbuf_buffer *bd = req->bufs [transfer_index].user;
1904    bool waiters = bd->waiters;
1905
1906    if (waiters)
1907      wake_transfer_waiters = true;
1908    else
1909      wake_buffer_waiters = true;
1910
1911    rtems_bdbuf_group_release (bd);
1912
1913    if (sc == RTEMS_SUCCESSFUL && bd->state == RTEMS_BDBUF_STATE_TRANSFER)
1914      rtems_bdbuf_make_cached_and_add_to_lru_list (bd);
1915    else
1916      rtems_bdbuf_discard_buffer (bd);
1917
1918    if (rtems_bdbuf_tracer)
1919      rtems_bdbuf_show_users ("transfer", bd);
1920  }
1921
1922  if (wake_transfer_waiters)
1923    rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
1924
1925  if (wake_buffer_waiters)
1926    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1927
1928  if (!cache_locked)
1929    rtems_bdbuf_unlock_cache ();
1930
1931  if (sc == RTEMS_SUCCESSFUL || sc == RTEMS_UNSATISFIED)
1932    return sc;
1933  else
1934    return RTEMS_IO_ERROR;
1935}
1936
1937static rtems_status_code
1938rtems_bdbuf_execute_read_request (rtems_disk_device  *dd,
1939                                  rtems_bdbuf_buffer *bd,
1940                                  uint32_t            transfer_count)
1941{
1942  rtems_blkdev_request *req = NULL;
1943  rtems_blkdev_bnum media_block = bd->block;
1944  uint32_t media_blocks_per_block = dd->media_blocks_per_block;
1945  uint32_t block_size = dd->block_size;
1946  uint32_t transfer_index = 1;
1947
1948  /*
1949   * TODO: This type of request structure is wrong and should be removed.
1950   */
1951#define bdbuf_alloc(size) __builtin_alloca (size)
1952
1953  req = bdbuf_alloc (rtems_bdbuf_read_request_size (transfer_count));
1954
1955  req->req = RTEMS_BLKDEV_REQ_READ;
1956  req->done = rtems_bdbuf_transfer_done;
1957  req->io_task = rtems_task_self ();
1958  req->bufnum = 0;
1959
1960  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
1961
1962  req->bufs [0].user   = bd;
1963  req->bufs [0].block  = media_block;
1964  req->bufs [0].length = block_size;
1965  req->bufs [0].buffer = bd->buffer;
1966
1967  if (rtems_bdbuf_tracer)
1968    rtems_bdbuf_show_users ("read", bd);
1969
1970  while (transfer_index < transfer_count)
1971  {
1972    media_block += media_blocks_per_block;
1973
1974    bd = rtems_bdbuf_get_buffer_for_read_ahead (dd, media_block);
1975
1976    if (bd == NULL)
1977      break;
1978
1979    rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
1980
1981    req->bufs [transfer_index].user   = bd;
1982    req->bufs [transfer_index].block  = media_block;
1983    req->bufs [transfer_index].length = block_size;
1984    req->bufs [transfer_index].buffer = bd->buffer;
1985
1986    if (rtems_bdbuf_tracer)
1987      rtems_bdbuf_show_users ("read", bd);
1988
1989    ++transfer_index;
1990  }
1991
1992  req->bufnum = transfer_index;
1993
1994  return rtems_bdbuf_execute_transfer_request (dd, req, true);
1995}
1996
1997static bool
1998rtems_bdbuf_is_read_ahead_active (const rtems_disk_device *dd)
1999{
2000  return !rtems_chain_is_node_off_chain (&dd->read_ahead.node);
2001}
2002
2003static void
2004rtems_bdbuf_read_ahead_cancel (rtems_disk_device *dd)
2005{
2006  if (rtems_bdbuf_is_read_ahead_active (dd))
2007  {
2008    rtems_chain_extract_unprotected (&dd->read_ahead.node);
2009    rtems_chain_set_off_chain (&dd->read_ahead.node);
2010  }
2011}
2012
2013static void
2014rtems_bdbuf_read_ahead_reset (rtems_disk_device *dd)
2015{
2016  rtems_bdbuf_read_ahead_cancel (dd);
2017  dd->read_ahead.trigger = RTEMS_DISK_READ_AHEAD_NO_TRIGGER;
2018}
2019
2020static void
2021rtems_bdbuf_read_ahead_add_to_chain (rtems_disk_device *dd)
2022{
2023  rtems_status_code sc;
2024  rtems_chain_control *chain = &bdbuf_cache.read_ahead_chain;
2025
2026  if (rtems_chain_is_empty (chain))
2027  {
2028    sc = rtems_event_send (bdbuf_cache.read_ahead_task,
2029                           RTEMS_BDBUF_READ_AHEAD_WAKE_UP);
2030    if (sc != RTEMS_SUCCESSFUL)
2031      rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_RA_WAKE_UP);
2032  }
2033
2034  rtems_chain_append_unprotected (chain, &dd->read_ahead.node);
2035}
2036
2037static void
2038rtems_bdbuf_check_read_ahead_trigger (rtems_disk_device *dd,
2039                                      rtems_blkdev_bnum  block)
2040{
2041  if (bdbuf_cache.read_ahead_task != 0
2042      && dd->read_ahead.trigger == block
2043      && !rtems_bdbuf_is_read_ahead_active (dd))
2044  {
2045    dd->read_ahead.nr_blocks = RTEMS_DISK_READ_AHEAD_SIZE_AUTO;
2046    rtems_bdbuf_read_ahead_add_to_chain(dd);
2047  }
2048}
2049
2050static void
2051rtems_bdbuf_set_read_ahead_trigger (rtems_disk_device *dd,
2052                                    rtems_blkdev_bnum  block)
2053{
2054  if (dd->read_ahead.trigger != block)
2055  {
2056    rtems_bdbuf_read_ahead_cancel (dd);
2057    dd->read_ahead.trigger = block + 1;
2058    dd->read_ahead.next = block + 2;
2059  }
2060}
2061
2062rtems_status_code
2063rtems_bdbuf_read (rtems_disk_device   *dd,
2064                  rtems_blkdev_bnum    block,
2065                  rtems_bdbuf_buffer **bd_ptr)
2066{
2067  rtems_status_code     sc = RTEMS_SUCCESSFUL;
2068  rtems_bdbuf_buffer   *bd = NULL;
2069  rtems_blkdev_bnum     media_block;
2070
2071  rtems_bdbuf_lock_cache ();
2072
2073  sc = rtems_bdbuf_get_media_block (dd, block, &media_block);
2074  if (sc == RTEMS_SUCCESSFUL)
2075  {
2076    if (rtems_bdbuf_tracer)
2077      printf ("bdbuf:read: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
2078              media_block, block, (unsigned) dd->dev);
2079
2080    bd = rtems_bdbuf_get_buffer_for_access (dd, media_block);
2081    switch (bd->state)
2082    {
2083      case RTEMS_BDBUF_STATE_CACHED:
2084        ++dd->stats.read_hits;
2085        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
2086        break;
2087      case RTEMS_BDBUF_STATE_MODIFIED:
2088        ++dd->stats.read_hits;
2089        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
2090        break;
2091      case RTEMS_BDBUF_STATE_EMPTY:
2092        ++dd->stats.read_misses;
2093        rtems_bdbuf_set_read_ahead_trigger (dd, block);
2094        sc = rtems_bdbuf_execute_read_request (dd, bd, 1);
2095        if (sc == RTEMS_SUCCESSFUL)
2096        {
2097          rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
2098          rtems_chain_extract_unprotected (&bd->link);
2099          rtems_bdbuf_group_obtain (bd);
2100        }
2101        else
2102        {
2103          bd = NULL;
2104        }
2105        break;
2106      default:
2107        rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_4);
2108        break;
2109    }
2110
2111    rtems_bdbuf_check_read_ahead_trigger (dd, block);
2112  }
2113
2114  rtems_bdbuf_unlock_cache ();
2115
2116  *bd_ptr = bd;
2117
2118  return sc;
2119}
2120
2121void
2122rtems_bdbuf_peek (rtems_disk_device *dd,
2123                  rtems_blkdev_bnum block,
2124                  uint32_t nr_blocks)
2125{
2126  rtems_bdbuf_lock_cache ();
2127
2128  if (bdbuf_cache.read_ahead_enabled && nr_blocks > 0)
2129  {
2130    rtems_bdbuf_read_ahead_reset(dd);
2131    dd->read_ahead.next = block;
2132    dd->read_ahead.nr_blocks = nr_blocks;
2133    rtems_bdbuf_read_ahead_add_to_chain(dd);
2134  }
2135
2136  rtems_bdbuf_unlock_cache ();
2137}
2138
2139static rtems_status_code
2140rtems_bdbuf_check_bd_and_lock_cache (rtems_bdbuf_buffer *bd, const char *kind)
2141{
2142  if (bd == NULL)
2143    return RTEMS_INVALID_ADDRESS;
2144  if (rtems_bdbuf_tracer)
2145  {
2146    printf ("bdbuf:%s: %" PRIu32 "\n", kind, bd->block);
2147    rtems_bdbuf_show_users (kind, bd);
2148  }
2149  rtems_bdbuf_lock_cache();
2150
2151  return RTEMS_SUCCESSFUL;
2152}
2153
2154rtems_status_code
2155rtems_bdbuf_release (rtems_bdbuf_buffer *bd)
2156{
2157  rtems_status_code sc = RTEMS_SUCCESSFUL;
2158
2159  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release");
2160  if (sc != RTEMS_SUCCESSFUL)
2161    return sc;
2162
2163  switch (bd->state)
2164  {
2165    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2166      rtems_bdbuf_add_to_lru_list_after_access (bd);
2167      break;
2168    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2169    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2170      rtems_bdbuf_discard_buffer_after_access (bd);
2171      break;
2172    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2173      rtems_bdbuf_add_to_modified_list_after_access (bd);
2174      break;
2175    default:
2176      rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_0);
2177      break;
2178  }
2179
2180  if (rtems_bdbuf_tracer)
2181    rtems_bdbuf_show_usage ();
2182
2183  rtems_bdbuf_unlock_cache ();
2184
2185  return RTEMS_SUCCESSFUL;
2186}
2187
2188rtems_status_code
2189rtems_bdbuf_release_modified (rtems_bdbuf_buffer *bd)
2190{
2191  rtems_status_code sc = RTEMS_SUCCESSFUL;
2192
2193  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release modified");
2194  if (sc != RTEMS_SUCCESSFUL)
2195    return sc;
2196
2197  switch (bd->state)
2198  {
2199    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2200    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2201    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2202      rtems_bdbuf_add_to_modified_list_after_access (bd);
2203      break;
2204    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2205      rtems_bdbuf_discard_buffer_after_access (bd);
2206      break;
2207    default:
2208      rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_6);
2209      break;
2210  }
2211
2212  if (rtems_bdbuf_tracer)
2213    rtems_bdbuf_show_usage ();
2214
2215  rtems_bdbuf_unlock_cache ();
2216
2217  return RTEMS_SUCCESSFUL;
2218}
2219
2220rtems_status_code
2221rtems_bdbuf_sync (rtems_bdbuf_buffer *bd)
2222{
2223  rtems_status_code sc = RTEMS_SUCCESSFUL;
2224
2225  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "sync");
2226  if (sc != RTEMS_SUCCESSFUL)
2227    return sc;
2228
2229  switch (bd->state)
2230  {
2231    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2232    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2233    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2234      rtems_bdbuf_sync_after_access (bd);
2235      break;
2236    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2237      rtems_bdbuf_discard_buffer_after_access (bd);
2238      break;
2239    default:
2240      rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_5);
2241      break;
2242  }
2243
2244  if (rtems_bdbuf_tracer)
2245    rtems_bdbuf_show_usage ();
2246
2247  rtems_bdbuf_unlock_cache ();
2248
2249  return RTEMS_SUCCESSFUL;
2250}
2251
2252rtems_status_code
2253rtems_bdbuf_syncdev (rtems_disk_device *dd)
2254{
2255  if (rtems_bdbuf_tracer)
2256    printf ("bdbuf:syncdev: %08x\n", (unsigned) dd->dev);
2257
2258  /*
2259   * Take the sync lock before locking the cache. Once we have the sync lock we
2260   * can lock the cache. If another thread has the sync lock it will cause this
2261   * thread to block until it owns the sync lock then it can own the cache. The
2262   * sync lock can only be obtained with the cache unlocked.
2263   */
2264  rtems_bdbuf_lock_sync ();
2265  rtems_bdbuf_lock_cache ();
2266
2267  /*
2268   * Set the cache to have a sync active for a specific device and let the swap
2269   * out task know the id of the requester to wake when done.
2270   *
2271   * The swap out task will negate the sync active flag when no more buffers
2272   * for the device are held on the "modified for sync" queues.
2273   */
2274  bdbuf_cache.sync_active    = true;
2275  bdbuf_cache.sync_requester = rtems_task_self ();
2276  bdbuf_cache.sync_device    = dd;
2277
2278  rtems_bdbuf_wake_swapper ();
2279  rtems_bdbuf_unlock_cache ();
2280  rtems_bdbuf_wait_for_transient_event ();
2281  rtems_bdbuf_unlock_sync ();
2282
2283  return RTEMS_SUCCESSFUL;
2284}
2285
2286/**
2287 * Swapout transfer to the driver. The driver will break this I/O into groups
2288 * of consecutive write requests is multiple consecutive buffers are required
2289 * by the driver. The cache is not locked.
2290 *
2291 * @param transfer The transfer transaction.
2292 */
2293static void
2294rtems_bdbuf_swapout_write (rtems_bdbuf_swapout_transfer* transfer)
2295{
2296  rtems_chain_node *node;
2297
2298  if (rtems_bdbuf_tracer)
2299    printf ("bdbuf:swapout transfer: %08x\n", (unsigned) transfer->dd->dev);
2300
2301  /*
2302   * If there are buffers to transfer to the media transfer them.
2303   */
2304  if (!rtems_chain_is_empty (&transfer->bds))
2305  {
2306    /*
2307     * The last block number used when the driver only supports
2308     * continuous blocks in a single request.
2309     */
2310    uint32_t last_block = 0;
2311
2312    rtems_disk_device *dd = transfer->dd;
2313    uint32_t media_blocks_per_block = dd->media_blocks_per_block;
2314    bool need_continuous_blocks =
2315      (dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_MULTISECTOR_CONT) != 0;
2316
2317    /*
2318     * Take as many buffers as configured and pass to the driver. Note, the
2319     * API to the drivers has an array of buffers and if a chain was passed
2320     * we could have just passed the list. If the driver API is updated it
2321     * should be possible to make this change with little effect in this
2322     * code. The array that is passed is broken in design and should be
2323     * removed. Merging members of a struct into the first member is
2324     * trouble waiting to happen.
2325     */
2326    transfer->write_req.status = RTEMS_RESOURCE_IN_USE;
2327    transfer->write_req.bufnum = 0;
2328
2329    while ((node = rtems_chain_get_unprotected(&transfer->bds)) != NULL)
2330    {
2331      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2332      bool                write = false;
2333
2334      /*
2335       * If the device only accepts sequential buffers and this is not the
2336       * first buffer (the first is always sequential, and the buffer is not
2337       * sequential then put the buffer back on the transfer chain and write
2338       * the committed buffers.
2339       */
2340
2341      if (rtems_bdbuf_tracer)
2342        printf ("bdbuf:swapout write: bd:%" PRIu32 ", bufnum:%" PRIu32 " mode:%s\n",
2343                bd->block, transfer->write_req.bufnum,
2344                need_continuous_blocks ? "MULTI" : "SCAT");
2345
2346      if (need_continuous_blocks && transfer->write_req.bufnum &&
2347          bd->block != last_block + media_blocks_per_block)
2348      {
2349        rtems_chain_prepend_unprotected (&transfer->bds, &bd->link);
2350        write = true;
2351      }
2352      else
2353      {
2354        rtems_blkdev_sg_buffer* buf;
2355        buf = &transfer->write_req.bufs[transfer->write_req.bufnum];
2356        transfer->write_req.bufnum++;
2357        buf->user   = bd;
2358        buf->block  = bd->block;
2359        buf->length = dd->block_size;
2360        buf->buffer = bd->buffer;
2361        last_block  = bd->block;
2362      }
2363
2364      /*
2365       * Perform the transfer if there are no more buffers, or the transfer
2366       * size has reached the configured max. value.
2367       */
2368
2369      if (rtems_chain_is_empty (&transfer->bds) ||
2370          (transfer->write_req.bufnum >= bdbuf_config.max_write_blocks))
2371        write = true;
2372
2373      if (write)
2374      {
2375        rtems_bdbuf_execute_transfer_request (dd, &transfer->write_req, false);
2376
2377        transfer->write_req.status = RTEMS_RESOURCE_IN_USE;
2378        transfer->write_req.bufnum = 0;
2379      }
2380    }
2381
2382    /*
2383     * If sync'ing and the deivce is capability of handling a sync IO control
2384     * call perform the call.
2385     */
2386    if (transfer->syncing &&
2387        (dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_SYNC))
2388    {
2389      /* int result = */ dd->ioctl (dd->phys_dev, RTEMS_BLKDEV_REQ_SYNC, NULL);
2390      /* How should the error be handled ? */
2391    }
2392  }
2393}
2394
2395/**
2396 * Process the modified list of buffers. There is a sync or modified list that
2397 * needs to be handled so we have a common function to do the work.
2398 *
2399 * @param dd_ptr Pointer to the device to handle. If BDBUF_INVALID_DEV no
2400 * device is selected so select the device of the first buffer to be written to
2401 * disk.
2402 * @param chain The modified chain to process.
2403 * @param transfer The chain to append buffers to be written too.
2404 * @param sync_active If true this is a sync operation so expire all timers.
2405 * @param update_timers If true update the timers.
2406 * @param timer_delta It update_timers is true update the timers by this
2407 *                    amount.
2408 */
2409static void
2410rtems_bdbuf_swapout_modified_processing (rtems_disk_device  **dd_ptr,
2411                                         rtems_chain_control* chain,
2412                                         rtems_chain_control* transfer,
2413                                         bool                 sync_active,
2414                                         bool                 update_timers,
2415                                         uint32_t             timer_delta)
2416{
2417  if (!rtems_chain_is_empty (chain))
2418  {
2419    rtems_chain_node* node = rtems_chain_head (chain);
2420    bool              sync_all;
2421
2422    node = node->next;
2423
2424    /*
2425     * A sync active with no valid dev means sync all.
2426     */
2427    if (sync_active && (*dd_ptr == BDBUF_INVALID_DEV))
2428      sync_all = true;
2429    else
2430      sync_all = false;
2431
2432    while (!rtems_chain_is_tail (chain, node))
2433    {
2434      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2435
2436      /*
2437       * Check if the buffer's hold timer has reached 0. If a sync is active
2438       * or someone waits for a buffer written force all the timers to 0.
2439       *
2440       * @note Lots of sync requests will skew this timer. It should be based
2441       *       on TOD to be accurate. Does it matter ?
2442       */
2443      if (sync_all || (sync_active && (*dd_ptr == bd->dd))
2444          || rtems_bdbuf_has_buffer_waiters ())
2445        bd->hold_timer = 0;
2446
2447      if (bd->hold_timer)
2448      {
2449        if (update_timers)
2450        {
2451          if (bd->hold_timer > timer_delta)
2452            bd->hold_timer -= timer_delta;
2453          else
2454            bd->hold_timer = 0;
2455        }
2456
2457        if (bd->hold_timer)
2458        {
2459          node = node->next;
2460          continue;
2461        }
2462      }
2463
2464      /*
2465       * This assumes we can set it to BDBUF_INVALID_DEV which is just an
2466       * assumption. Cannot use the transfer list being empty the sync dev
2467       * calls sets the dev to use.
2468       */
2469      if (*dd_ptr == BDBUF_INVALID_DEV)
2470        *dd_ptr = bd->dd;
2471
2472      if (bd->dd == *dd_ptr)
2473      {
2474        rtems_chain_node* next_node = node->next;
2475        rtems_chain_node* tnode = rtems_chain_tail (transfer);
2476
2477        /*
2478         * The blocks on the transfer list are sorted in block order. This
2479         * means multi-block transfers for drivers that require consecutive
2480         * blocks perform better with sorted blocks and for real disks it may
2481         * help lower head movement.
2482         */
2483
2484        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
2485
2486        rtems_chain_extract_unprotected (node);
2487
2488        tnode = tnode->previous;
2489
2490        while (node && !rtems_chain_is_head (transfer, tnode))
2491        {
2492          rtems_bdbuf_buffer* tbd = (rtems_bdbuf_buffer*) tnode;
2493
2494          if (bd->block > tbd->block)
2495          {
2496            rtems_chain_insert_unprotected (tnode, node);
2497            node = NULL;
2498          }
2499          else
2500            tnode = tnode->previous;
2501        }
2502
2503        if (node)
2504          rtems_chain_prepend_unprotected (transfer, node);
2505
2506        node = next_node;
2507      }
2508      else
2509      {
2510        node = node->next;
2511      }
2512    }
2513  }
2514}
2515
2516/**
2517 * Process the cache's modified buffers. Check the sync list first then the
2518 * modified list extracting the buffers suitable to be written to disk. We have
2519 * a device at a time. The task level loop will repeat this operation while
2520 * there are buffers to be written. If the transfer fails place the buffers
2521 * back on the modified list and try again later. The cache is unlocked while
2522 * the buffers are being written to disk.
2523 *
2524 * @param timer_delta It update_timers is true update the timers by this
2525 *                    amount.
2526 * @param update_timers If true update the timers.
2527 * @param transfer The transfer transaction data.
2528 *
2529 * @retval true Buffers where written to disk so scan again.
2530 * @retval false No buffers where written to disk.
2531 */
2532static bool
2533rtems_bdbuf_swapout_processing (unsigned long                 timer_delta,
2534                                bool                          update_timers,
2535                                rtems_bdbuf_swapout_transfer* transfer)
2536{
2537  rtems_bdbuf_swapout_worker* worker;
2538  bool                        transfered_buffers = false;
2539  bool                        sync_active;
2540
2541  rtems_bdbuf_lock_cache ();
2542
2543  /*
2544   * To set this to true you need the cache and the sync lock.
2545   */
2546  sync_active = bdbuf_cache.sync_active;
2547
2548  /*
2549   * If a sync is active do not use a worker because the current code does not
2550   * cleaning up after. We need to know the buffers have been written when
2551   * syncing to release sync lock and currently worker threads do not return to
2552   * here. We do not know the worker is the last in a sequence of sync writes
2553   * until after we have it running so we do not know to tell it to release the
2554   * lock. The simplest solution is to get the main swap out task perform all
2555   * sync operations.
2556   */
2557  if (sync_active)
2558    worker = NULL;
2559  else
2560  {
2561    worker = (rtems_bdbuf_swapout_worker*)
2562      rtems_chain_get_unprotected (&bdbuf_cache.swapout_free_workers);
2563    if (worker)
2564      transfer = &worker->transfer;
2565  }
2566
2567  rtems_chain_initialize_empty (&transfer->bds);
2568  transfer->dd = BDBUF_INVALID_DEV;
2569  transfer->syncing = sync_active;
2570
2571  /*
2572   * When the sync is for a device limit the sync to that device. If the sync
2573   * is for a buffer handle process the devices in the order on the sync
2574   * list. This means the dev is BDBUF_INVALID_DEV.
2575   */
2576  if (sync_active)
2577    transfer->dd = bdbuf_cache.sync_device;
2578
2579  /*
2580   * If we have any buffers in the sync queue move them to the modified
2581   * list. The first sync buffer will select the device we use.
2582   */
2583  rtems_bdbuf_swapout_modified_processing (&transfer->dd,
2584                                           &bdbuf_cache.sync,
2585                                           &transfer->bds,
2586                                           true, false,
2587                                           timer_delta);
2588
2589  /*
2590   * Process the cache's modified list.
2591   */
2592  rtems_bdbuf_swapout_modified_processing (&transfer->dd,
2593                                           &bdbuf_cache.modified,
2594                                           &transfer->bds,
2595                                           sync_active,
2596                                           update_timers,
2597                                           timer_delta);
2598
2599  /*
2600   * We have all the buffers that have been modified for this device so the
2601   * cache can be unlocked because the state of each buffer has been set to
2602   * TRANSFER.
2603   */
2604  rtems_bdbuf_unlock_cache ();
2605
2606  /*
2607   * If there are buffers to transfer to the media transfer them.
2608   */
2609  if (!rtems_chain_is_empty (&transfer->bds))
2610  {
2611    if (worker)
2612    {
2613      rtems_status_code sc = rtems_event_send (worker->id,
2614                                               RTEMS_BDBUF_SWAPOUT_SYNC);
2615      if (sc != RTEMS_SUCCESSFUL)
2616        rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_SO_WAKE_2);
2617    }
2618    else
2619    {
2620      rtems_bdbuf_swapout_write (transfer);
2621    }
2622
2623    transfered_buffers = true;
2624  }
2625
2626  if (sync_active && !transfered_buffers)
2627  {
2628    rtems_id sync_requester;
2629    rtems_bdbuf_lock_cache ();
2630    sync_requester = bdbuf_cache.sync_requester;
2631    bdbuf_cache.sync_active = false;
2632    bdbuf_cache.sync_requester = 0;
2633    rtems_bdbuf_unlock_cache ();
2634    if (sync_requester)
2635      rtems_event_transient_send (sync_requester);
2636  }
2637
2638  return transfered_buffers;
2639}
2640
2641/**
2642 * The swapout worker thread body.
2643 *
2644 * @param arg A pointer to the worker thread's private data.
2645 * @return rtems_task Not used.
2646 */
2647static rtems_task
2648rtems_bdbuf_swapout_worker_task (rtems_task_argument arg)
2649{
2650  rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) arg;
2651
2652  while (worker->enabled)
2653  {
2654    rtems_bdbuf_wait_for_event (RTEMS_BDBUF_SWAPOUT_SYNC);
2655
2656    rtems_bdbuf_swapout_write (&worker->transfer);
2657
2658    rtems_bdbuf_lock_cache ();
2659
2660    rtems_chain_initialize_empty (&worker->transfer.bds);
2661    worker->transfer.dd = BDBUF_INVALID_DEV;
2662
2663    rtems_chain_append_unprotected (&bdbuf_cache.swapout_free_workers, &worker->link);
2664
2665    rtems_bdbuf_unlock_cache ();
2666  }
2667
2668  free (worker);
2669
2670  rtems_task_exit();
2671}
2672
2673/**
2674 * Close the swapout worker threads.
2675 */
2676static void
2677rtems_bdbuf_swapout_workers_close (void)
2678{
2679  rtems_chain_node* node;
2680
2681  rtems_bdbuf_lock_cache ();
2682
2683  node = rtems_chain_first (&bdbuf_cache.swapout_free_workers);
2684  while (!rtems_chain_is_tail (&bdbuf_cache.swapout_free_workers, node))
2685  {
2686    rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) node;
2687    worker->enabled = false;
2688    rtems_event_send (worker->id, RTEMS_BDBUF_SWAPOUT_SYNC);
2689    node = rtems_chain_next (node);
2690  }
2691
2692  rtems_bdbuf_unlock_cache ();
2693}
2694
2695/**
2696 * Body of task which takes care on flushing modified buffers to the disk.
2697 *
2698 * @param arg A pointer to the global cache data. Use the global variable and
2699 *            not this.
2700 * @return rtems_task Not used.
2701 */
2702static rtems_task
2703rtems_bdbuf_swapout_task (rtems_task_argument arg)
2704{
2705  rtems_bdbuf_swapout_transfer* transfer = (rtems_bdbuf_swapout_transfer *) arg;
2706  uint32_t                      period_in_ticks;
2707  const uint32_t                period_in_msecs = bdbuf_config.swapout_period;
2708  uint32_t                      timer_delta;
2709
2710  /*
2711   * Localise the period.
2712   */
2713  period_in_ticks = RTEMS_MICROSECONDS_TO_TICKS (period_in_msecs * 1000);
2714
2715  /*
2716   * This is temporary. Needs to be changed to use the real time clock.
2717   */
2718  timer_delta = period_in_msecs;
2719
2720  while (bdbuf_cache.swapout_enabled)
2721  {
2722    rtems_event_set   out;
2723    rtems_status_code sc;
2724
2725    /*
2726     * Only update the timers once in the processing cycle.
2727     */
2728    bool update_timers = true;
2729
2730    /*
2731     * If we write buffers to any disk perform a check again. We only write a
2732     * single device at a time and the cache may have more than one device's
2733     * buffers modified waiting to be written.
2734     */
2735    bool transfered_buffers;
2736
2737    do
2738    {
2739      transfered_buffers = false;
2740
2741      /*
2742       * Extact all the buffers we find for a specific device. The device is
2743       * the first one we find on a modified list. Process the sync queue of
2744       * buffers first.
2745       */
2746      if (rtems_bdbuf_swapout_processing (timer_delta,
2747                                          update_timers,
2748                                          transfer))
2749      {
2750        transfered_buffers = true;
2751      }
2752
2753      /*
2754       * Only update the timers once.
2755       */
2756      update_timers = false;
2757    }
2758    while (transfered_buffers);
2759
2760    sc = rtems_event_receive (RTEMS_BDBUF_SWAPOUT_SYNC,
2761                              RTEMS_EVENT_ALL | RTEMS_WAIT,
2762                              period_in_ticks,
2763                              &out);
2764
2765    if ((sc != RTEMS_SUCCESSFUL) && (sc != RTEMS_TIMEOUT))
2766      rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_SWAPOUT_RE);
2767  }
2768
2769  rtems_bdbuf_swapout_workers_close ();
2770
2771  free (transfer);
2772
2773  rtems_task_exit();
2774}
2775
2776static void
2777rtems_bdbuf_purge_list (rtems_chain_control *purge_list)
2778{
2779  bool wake_buffer_waiters = false;
2780  rtems_chain_node *node = NULL;
2781
2782  while ((node = rtems_chain_get_unprotected (purge_list)) != NULL)
2783  {
2784    rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
2785
2786    if (bd->waiters == 0)
2787      wake_buffer_waiters = true;
2788
2789    rtems_bdbuf_discard_buffer (bd);
2790  }
2791
2792  if (wake_buffer_waiters)
2793    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
2794}
2795
2796static void
2797rtems_bdbuf_gather_for_purge (rtems_chain_control *purge_list,
2798                              const rtems_disk_device *dd)
2799{
2800  rtems_bdbuf_buffer *stack [RTEMS_BDBUF_AVL_MAX_HEIGHT];
2801  rtems_bdbuf_buffer **prev = stack;
2802  rtems_bdbuf_buffer *cur = bdbuf_cache.tree;
2803
2804  *prev = NULL;
2805
2806  while (cur != NULL)
2807  {
2808    if (cur->dd == dd)
2809    {
2810      switch (cur->state)
2811      {
2812        case RTEMS_BDBUF_STATE_FREE:
2813        case RTEMS_BDBUF_STATE_EMPTY:
2814        case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2815        case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
2816          break;
2817        case RTEMS_BDBUF_STATE_SYNC:
2818          rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
2819          /* Fall through */
2820        case RTEMS_BDBUF_STATE_MODIFIED:
2821          rtems_bdbuf_group_release (cur);
2822          /* Fall through */
2823        case RTEMS_BDBUF_STATE_CACHED:
2824          rtems_chain_extract_unprotected (&cur->link);
2825          rtems_chain_append_unprotected (purge_list, &cur->link);
2826          break;
2827        case RTEMS_BDBUF_STATE_TRANSFER:
2828          rtems_bdbuf_set_state (cur, RTEMS_BDBUF_STATE_TRANSFER_PURGED);
2829          break;
2830        case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2831        case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2832        case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2833          rtems_bdbuf_set_state (cur, RTEMS_BDBUF_STATE_ACCESS_PURGED);
2834          break;
2835        default:
2836          rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_STATE_11);
2837      }
2838    }
2839
2840    if (cur->avl.left != NULL)
2841    {
2842      /* Left */
2843      ++prev;
2844      *prev = cur;
2845      cur = cur->avl.left;
2846    }
2847    else if (cur->avl.right != NULL)
2848    {
2849      /* Right */
2850      ++prev;
2851      *prev = cur;
2852      cur = cur->avl.right;
2853    }
2854    else
2855    {
2856      while (*prev != NULL
2857             && (cur == (*prev)->avl.right || (*prev)->avl.right == NULL))
2858      {
2859        /* Up */
2860        cur = *prev;
2861        --prev;
2862      }
2863      if (*prev != NULL)
2864        /* Right */
2865        cur = (*prev)->avl.right;
2866      else
2867        /* Finished */
2868        cur = NULL;
2869    }
2870  }
2871}
2872
2873static void
2874rtems_bdbuf_do_purge_dev (rtems_disk_device *dd)
2875{
2876  rtems_chain_control purge_list;
2877
2878  rtems_chain_initialize_empty (&purge_list);
2879  rtems_bdbuf_read_ahead_reset (dd);
2880  rtems_bdbuf_gather_for_purge (&purge_list, dd);
2881  rtems_bdbuf_purge_list (&purge_list);
2882}
2883
2884void
2885rtems_bdbuf_purge_dev (rtems_disk_device *dd)
2886{
2887  rtems_bdbuf_lock_cache ();
2888  rtems_bdbuf_do_purge_dev (dd);
2889  rtems_bdbuf_unlock_cache ();
2890}
2891
2892rtems_status_code
2893rtems_bdbuf_set_block_size (rtems_disk_device *dd,
2894                            uint32_t           block_size,
2895                            bool               sync)
2896{
2897  rtems_status_code sc = RTEMS_SUCCESSFUL;
2898
2899  /*
2900   * We do not care about the synchronization status since we will purge the
2901   * device later.
2902   */
2903  if (sync)
2904    (void) rtems_bdbuf_syncdev (dd);
2905
2906  rtems_bdbuf_lock_cache ();
2907
2908  if (block_size > 0)
2909  {
2910    size_t bds_per_group = rtems_bdbuf_bds_per_group (block_size);
2911
2912    if (bds_per_group != 0)
2913    {
2914      int block_to_media_block_shift = 0;
2915      uint32_t media_blocks_per_block = block_size / dd->media_block_size;
2916      uint32_t one = 1;
2917
2918      while ((one << block_to_media_block_shift) < media_blocks_per_block)
2919      {
2920        ++block_to_media_block_shift;
2921      }
2922
2923      if ((dd->media_block_size << block_to_media_block_shift) != block_size)
2924        block_to_media_block_shift = -1;
2925
2926      dd->block_size = block_size;
2927      dd->block_count = dd->size / media_blocks_per_block;
2928      dd->media_blocks_per_block = media_blocks_per_block;
2929      dd->block_to_media_block_shift = block_to_media_block_shift;
2930      dd->bds_per_group = bds_per_group;
2931
2932      rtems_bdbuf_do_purge_dev (dd);
2933    }
2934    else
2935    {
2936      sc = RTEMS_INVALID_NUMBER;
2937    }
2938  }
2939  else
2940  {
2941    sc = RTEMS_INVALID_NUMBER;
2942  }
2943
2944  rtems_bdbuf_unlock_cache ();
2945
2946  return sc;
2947}
2948
2949static rtems_task
2950rtems_bdbuf_read_ahead_task (rtems_task_argument arg)
2951{
2952  rtems_chain_control *chain = &bdbuf_cache.read_ahead_chain;
2953
2954  while (bdbuf_cache.read_ahead_enabled)
2955  {
2956    rtems_chain_node *node;
2957
2958    rtems_bdbuf_wait_for_event (RTEMS_BDBUF_READ_AHEAD_WAKE_UP);
2959    rtems_bdbuf_lock_cache ();
2960
2961    while ((node = rtems_chain_get_unprotected (chain)) != NULL)
2962    {
2963      rtems_disk_device *dd =
2964        RTEMS_CONTAINER_OF (node, rtems_disk_device, read_ahead.node);
2965      rtems_blkdev_bnum block = dd->read_ahead.next;
2966      rtems_blkdev_bnum media_block = 0;
2967      rtems_status_code sc =
2968        rtems_bdbuf_get_media_block (dd, block, &media_block);
2969
2970      rtems_chain_set_off_chain (&dd->read_ahead.node);
2971
2972      if (sc == RTEMS_SUCCESSFUL)
2973      {
2974        rtems_bdbuf_buffer *bd =
2975          rtems_bdbuf_get_buffer_for_read_ahead (dd, media_block);
2976
2977        if (bd != NULL)
2978        {
2979          uint32_t transfer_count = dd->read_ahead.nr_blocks;
2980          uint32_t blocks_until_end_of_disk = dd->block_count - block;
2981          uint32_t max_transfer_count = bdbuf_config.max_read_ahead_blocks;
2982
2983          if (transfer_count == RTEMS_DISK_READ_AHEAD_SIZE_AUTO) {
2984            transfer_count = blocks_until_end_of_disk;
2985
2986            if (transfer_count >= max_transfer_count)
2987            {
2988              transfer_count = max_transfer_count;
2989              dd->read_ahead.trigger = block + transfer_count / 2;
2990              dd->read_ahead.next = block + transfer_count;
2991            }
2992            else
2993            {
2994              dd->read_ahead.trigger = RTEMS_DISK_READ_AHEAD_NO_TRIGGER;
2995            }
2996          } else {
2997            if (transfer_count > blocks_until_end_of_disk) {
2998              transfer_count = blocks_until_end_of_disk;
2999            }
3000
3001            if (transfer_count > max_transfer_count) {
3002              transfer_count = max_transfer_count;
3003            }
3004
3005            ++dd->stats.read_ahead_peeks;
3006          }
3007
3008          ++dd->stats.read_ahead_transfers;
3009          rtems_bdbuf_execute_read_request (dd, bd, transfer_count);
3010        }
3011      }
3012      else
3013      {
3014        dd->read_ahead.trigger = RTEMS_DISK_READ_AHEAD_NO_TRIGGER;
3015      }
3016    }
3017
3018    rtems_bdbuf_unlock_cache ();
3019  }
3020
3021  rtems_task_exit();
3022}
3023
3024void rtems_bdbuf_get_device_stats (const rtems_disk_device *dd,
3025                                   rtems_blkdev_stats      *stats)
3026{
3027  rtems_bdbuf_lock_cache ();
3028  *stats = dd->stats;
3029  rtems_bdbuf_unlock_cache ();
3030}
3031
3032void rtems_bdbuf_reset_device_stats (rtems_disk_device *dd)
3033{
3034  rtems_bdbuf_lock_cache ();
3035  memset (&dd->stats, 0, sizeof(dd->stats));
3036  rtems_bdbuf_unlock_cache ();
3037}
Note: See TracBrowser for help on using the repository browser.