source: rtems/cpukit/libblock/src/bdbuf.c @ 606ed52

4.11
Last change on this file since 606ed52 was 606ed52, checked in by Ralf Kirchner <ralf.kirchner@…>, on May 23, 2014 at 3:09:22 PM

libblock: Use pthread_once() for initialization

Enabling and disabling preemption as done for single core will not work
for SMP. In the bdbuf initialization preemption handling can be avoided
in general by using pthread_once().

  • Property mode set to 100644
File size: 83.4 KB
Line 
1/**
2 * @file
3 *
4 * @ingroup rtems_bdbuf
5 *
6 * Block device buffer management.
7 */
8
9/*
10 * Disk I/O buffering
11 * Buffer managment
12 *
13 * Copyright (C) 2001 OKTET Ltd., St.-Peterburg, Russia
14 * Author: Andrey G. Ivanov <Andrey.Ivanov@oktet.ru>
15 *         Victor V. Vengerov <vvv@oktet.ru>
16 *         Alexander Kukuta <kam@oktet.ru>
17 *
18 * Copyright (C) 2008,2009 Chris Johns <chrisj@rtems.org>
19 *    Rewritten to remove score mutex access. Fixes many performance
20 *    issues.
21 *
22 * Copyright (c) 2009-2012 embedded brains GmbH.
23 */
24
25/**
26 * Set to 1 to enable debug tracing.
27 */
28#define RTEMS_BDBUF_TRACE 0
29
30#if HAVE_CONFIG_H
31#include "config.h"
32#endif
33#include <limits.h>
34#include <errno.h>
35#include <stdio.h>
36#include <string.h>
37#include <inttypes.h>
38#include <pthread.h>
39
40#include <rtems.h>
41#include <rtems/error.h>
42#include <rtems/malloc.h>
43
44#include "rtems/bdbuf.h"
45
46#define BDBUF_INVALID_DEV NULL
47
48/*
49 * Simpler label for this file.
50 */
51#define bdbuf_config rtems_bdbuf_configuration
52
53/**
54 * A swapout transfer transaction data. This data is passed to a worked thread
55 * to handle the write phase of the transfer.
56 */
57typedef struct rtems_bdbuf_swapout_transfer
58{
59  rtems_chain_control   bds;         /**< The transfer list of BDs. */
60  rtems_disk_device    *dd;          /**< The device the transfer is for. */
61  bool                  syncing;     /**< The data is a sync'ing. */
62  rtems_blkdev_request  write_req;   /**< The write request. */
63} rtems_bdbuf_swapout_transfer;
64
65/**
66 * Swapout worker thread. These are available to take processing from the
67 * main swapout thread and handle the I/O operation.
68 */
69typedef struct rtems_bdbuf_swapout_worker
70{
71  rtems_chain_node             link;     /**< The threads sit on a chain when
72                                          * idle. */
73  rtems_id                     id;       /**< The id of the task so we can wake
74                                          * it. */
75  bool                         enabled;  /**< The worker is enabled. */
76  rtems_bdbuf_swapout_transfer transfer; /**< The transfer data for this
77                                          * thread. */
78} rtems_bdbuf_swapout_worker;
79
80/**
81 * Buffer waiters synchronization.
82 */
83typedef struct rtems_bdbuf_waiters {
84  unsigned count;
85  rtems_id sema;
86} rtems_bdbuf_waiters;
87
88/**
89 * The BD buffer cache.
90 */
91typedef struct rtems_bdbuf_cache
92{
93  rtems_id            swapout;           /**< Swapout task ID */
94  bool                swapout_enabled;   /**< Swapout is only running if
95                                          * enabled. Set to false to kill the
96                                          * swap out task. It deletes itself. */
97  rtems_chain_control swapout_free_workers; /**< The work threads for the swapout
98                                             * task. */
99
100  rtems_bdbuf_buffer* bds;               /**< Pointer to table of buffer
101                                          * descriptors. */
102  void*               buffers;           /**< The buffer's memory. */
103  size_t              buffer_min_count;  /**< Number of minimum size buffers
104                                          * that fit the buffer memory. */
105  size_t              max_bds_per_group; /**< The number of BDs of minimum
106                                          * buffer size that fit in a group. */
107  uint32_t            flags;             /**< Configuration flags. */
108
109  rtems_id            lock;              /**< The cache lock. It locks all
110                                          * cache data, BD and lists. */
111  rtems_id            sync_lock;         /**< Sync calls block writes. */
112  bool                sync_active;       /**< True if a sync is active. */
113  rtems_id            sync_requester;    /**< The sync requester. */
114  rtems_disk_device  *sync_device;       /**< The device to sync and
115                                          * BDBUF_INVALID_DEV not a device
116                                          * sync. */
117
118  rtems_bdbuf_buffer* tree;              /**< Buffer descriptor lookup AVL tree
119                                          * root. There is only one. */
120  rtems_chain_control lru;               /**< Least recently used list */
121  rtems_chain_control modified;          /**< Modified buffers list */
122  rtems_chain_control sync;              /**< Buffers to sync list */
123
124  rtems_bdbuf_waiters access_waiters;    /**< Wait for a buffer in
125                                          * ACCESS_CACHED, ACCESS_MODIFIED or
126                                          * ACCESS_EMPTY
127                                          * state. */
128  rtems_bdbuf_waiters transfer_waiters;  /**< Wait for a buffer in TRANSFER
129                                          * state. */
130  rtems_bdbuf_waiters buffer_waiters;    /**< Wait for a buffer and no one is
131                                          * available. */
132
133  rtems_bdbuf_swapout_transfer *swapout_transfer;
134  rtems_bdbuf_swapout_worker *swapout_workers;
135
136  size_t              group_count;       /**< The number of groups. */
137  rtems_bdbuf_group*  groups;            /**< The groups. */
138  rtems_id            read_ahead_task;   /**< Read-ahead task */
139  rtems_chain_control read_ahead_chain;  /**< Read-ahead request chain */
140  bool                read_ahead_enabled; /**< Read-ahead enabled */
141  rtems_status_code   init_status;       /**< The initialization status */
142} rtems_bdbuf_cache;
143
144typedef enum {
145  RTEMS_BDBUF_FATAL_CACHE_LOCK,
146  RTEMS_BDBUF_FATAL_CACHE_UNLOCK,
147  RTEMS_BDBUF_FATAL_CACHE_WAIT_2,
148  RTEMS_BDBUF_FATAL_CACHE_WAIT_TO,
149  RTEMS_BDBUF_FATAL_CACHE_WAKE,
150  RTEMS_BDBUF_FATAL_PREEMPT_DIS,
151  RTEMS_BDBUF_FATAL_PREEMPT_RST,
152  RTEMS_BDBUF_FATAL_RA_WAKE_UP,
153  RTEMS_BDBUF_FATAL_RECYCLE,
154  RTEMS_BDBUF_FATAL_SO_WAKE_1,
155  RTEMS_BDBUF_FATAL_SO_WAKE_2,
156  RTEMS_BDBUF_FATAL_STATE_0,
157  RTEMS_BDBUF_FATAL_STATE_2,
158  RTEMS_BDBUF_FATAL_STATE_4,
159  RTEMS_BDBUF_FATAL_STATE_5,
160  RTEMS_BDBUF_FATAL_STATE_6,
161  RTEMS_BDBUF_FATAL_STATE_7,
162  RTEMS_BDBUF_FATAL_STATE_8,
163  RTEMS_BDBUF_FATAL_STATE_9,
164  RTEMS_BDBUF_FATAL_STATE_10,
165  RTEMS_BDBUF_FATAL_STATE_11,
166  RTEMS_BDBUF_FATAL_SWAPOUT_RE,
167  RTEMS_BDBUF_FATAL_SYNC_LOCK,
168  RTEMS_BDBUF_FATAL_SYNC_UNLOCK,
169  RTEMS_BDBUF_FATAL_TREE_RM,
170  RTEMS_BDBUF_FATAL_WAIT_EVNT,
171  RTEMS_BDBUF_FATAL_WAIT_TRANS_EVNT,
172  RTEMS_BDBUF_FATAL_ONCE
173} rtems_bdbuf_fatal_code;
174
175/**
176 * The events used in this code. These should be system events rather than
177 * application events.
178 */
179#define RTEMS_BDBUF_SWAPOUT_SYNC   RTEMS_EVENT_2
180#define RTEMS_BDBUF_READ_AHEAD_WAKE_UP RTEMS_EVENT_1
181
182/**
183 * Lock semaphore attributes. This is used for locking type mutexes.
184 *
185 * @warning Priority inheritance is on.
186 */
187#define RTEMS_BDBUF_CACHE_LOCK_ATTRIBS \
188  (RTEMS_PRIORITY | RTEMS_BINARY_SEMAPHORE | \
189   RTEMS_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
190
191/**
192 * Waiter semaphore attributes.
193 *
194 * @warning Do not configure as inherit priority. If a driver is in the driver
195 *          initialisation table this locked semaphore will have the IDLE task
196 *          as the holder and a blocking task will raise the priority of the
197 *          IDLE task which can cause unsual side effects.
198 */
199#define RTEMS_BDBUF_CACHE_WAITER_ATTRIBS \
200  (RTEMS_PRIORITY | RTEMS_SIMPLE_BINARY_SEMAPHORE | \
201   RTEMS_NO_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
202
203/**
204 * Waiter timeout. Set to non-zero to find some info on a waiter that is
205 * waiting too long.
206 */
207#define RTEMS_BDBUF_WAIT_TIMEOUT RTEMS_NO_TIMEOUT
208#if !defined (RTEMS_BDBUF_WAIT_TIMEOUT)
209#define RTEMS_BDBUF_WAIT_TIMEOUT \
210  (RTEMS_MICROSECONDS_TO_TICKS (20000000))
211#endif
212
213static rtems_task rtems_bdbuf_swapout_task(rtems_task_argument arg);
214
215static rtems_task rtems_bdbuf_read_ahead_task(rtems_task_argument arg);
216
217/**
218 * The Buffer Descriptor cache.
219 */
220static rtems_bdbuf_cache bdbuf_cache;
221
222static pthread_once_t rtems_bdbuf_once_state = PTHREAD_ONCE_INIT;
223
224#if RTEMS_BDBUF_TRACE
225/**
226 * If true output the trace message.
227 */
228bool rtems_bdbuf_tracer;
229
230/**
231 * Return the number of items on the list.
232 *
233 * @param list The chain control.
234 * @return uint32_t The number of items on the list.
235 */
236uint32_t
237rtems_bdbuf_list_count (rtems_chain_control* list)
238{
239  rtems_chain_node* node = rtems_chain_first (list);
240  uint32_t          count = 0;
241  while (!rtems_chain_is_tail (list, node))
242  {
243    count++;
244    node = rtems_chain_next (node);
245  }
246  return count;
247}
248
249/**
250 * Show the usage for the bdbuf cache.
251 */
252void
253rtems_bdbuf_show_usage (void)
254{
255  uint32_t group;
256  uint32_t total = 0;
257  uint32_t val;
258
259  for (group = 0; group < bdbuf_cache.group_count; group++)
260    total += bdbuf_cache.groups[group].users;
261  printf ("bdbuf:group users=%lu", total);
262  val = rtems_bdbuf_list_count (&bdbuf_cache.lru);
263  printf (", lru=%lu", val);
264  total = val;
265  val = rtems_bdbuf_list_count (&bdbuf_cache.modified);
266  printf (", mod=%lu", val);
267  total += val;
268  val = rtems_bdbuf_list_count (&bdbuf_cache.sync);
269  printf (", sync=%lu", val);
270  total += val;
271  printf (", total=%lu\n", total);
272}
273
274/**
275 * Show the users for a group of a bd.
276 *
277 * @param where A label to show the context of output.
278 * @param bd The bd to show the users of.
279 */
280void
281rtems_bdbuf_show_users (const char* where, rtems_bdbuf_buffer* bd)
282{
283  const char* states[] =
284    { "FR", "EM", "CH", "AC", "AM", "AE", "AP", "MD", "SY", "TR", "TP" };
285
286  printf ("bdbuf:users: %15s: [%" PRIu32 " (%s)] %td:%td = %" PRIu32 " %s\n",
287          where,
288          bd->block, states[bd->state],
289          bd->group - bdbuf_cache.groups,
290          bd - bdbuf_cache.bds,
291          bd->group->users,
292          bd->group->users > 8 ? "<<<<<<<" : "");
293}
294#else
295#define rtems_bdbuf_tracer (0)
296#define rtems_bdbuf_show_usage() ((void) 0)
297#define rtems_bdbuf_show_users(_w, _b) ((void) 0)
298#endif
299
300/**
301 * The default maximum height of 32 allows for AVL trees having between
302 * 5,704,880 and 4,294,967,295 nodes, depending on order of insertion.  You may
303 * change this compile-time constant as you wish.
304 */
305#ifndef RTEMS_BDBUF_AVL_MAX_HEIGHT
306#define RTEMS_BDBUF_AVL_MAX_HEIGHT (32)
307#endif
308
309static void
310rtems_bdbuf_fatal (rtems_fatal_code error)
311{
312  rtems_fatal (RTEMS_FATAL_SOURCE_BDBUF, error);
313}
314
315static void
316rtems_bdbuf_fatal_with_state (rtems_bdbuf_buf_state state,
317                              rtems_bdbuf_fatal_code error)
318{
319  rtems_bdbuf_fatal ((((uint32_t) state) << 16) | error);
320}
321
322/**
323 * Searches for the node with specified dd/block.
324 *
325 * @param root pointer to the root node of the AVL-Tree
326 * @param dd disk device search key
327 * @param block block search key
328 * @retval NULL node with the specified dd/block is not found
329 * @return pointer to the node with specified dd/block
330 */
331static rtems_bdbuf_buffer *
332rtems_bdbuf_avl_search (rtems_bdbuf_buffer** root,
333                        const rtems_disk_device *dd,
334                        rtems_blkdev_bnum    block)
335{
336  rtems_bdbuf_buffer* p = *root;
337
338  while ((p != NULL) && ((p->dd != dd) || (p->block != block)))
339  {
340    if (((uintptr_t) p->dd < (uintptr_t) dd)
341        || ((p->dd == dd) && (p->block < block)))
342    {
343      p = p->avl.right;
344    }
345    else
346    {
347      p = p->avl.left;
348    }
349  }
350
351  return p;
352}
353
354/**
355 * Inserts the specified node to the AVl-Tree.
356 *
357 * @param root pointer to the root node of the AVL-Tree
358 * @param node Pointer to the node to add.
359 * @retval 0 The node added successfully
360 * @retval -1 An error occured
361 */
362static int
363rtems_bdbuf_avl_insert(rtems_bdbuf_buffer** root,
364                       rtems_bdbuf_buffer*  node)
365{
366  const rtems_disk_device *dd = node->dd;
367  rtems_blkdev_bnum block = node->block;
368
369  rtems_bdbuf_buffer*  p = *root;
370  rtems_bdbuf_buffer*  q;
371  rtems_bdbuf_buffer*  p1;
372  rtems_bdbuf_buffer*  p2;
373  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
374  rtems_bdbuf_buffer** buf_prev = buf_stack;
375
376  bool modified = false;
377
378  if (p == NULL)
379  {
380    *root = node;
381    node->avl.left = NULL;
382    node->avl.right = NULL;
383    node->avl.bal = 0;
384    return 0;
385  }
386
387  while (p != NULL)
388  {
389    *buf_prev++ = p;
390
391    if (((uintptr_t) p->dd < (uintptr_t) dd)
392        || ((p->dd == dd) && (p->block < block)))
393    {
394      p->avl.cache = 1;
395      q = p->avl.right;
396      if (q == NULL)
397      {
398        q = node;
399        p->avl.right = q = node;
400        break;
401      }
402    }
403    else if ((p->dd != dd) || (p->block != block))
404    {
405      p->avl.cache = -1;
406      q = p->avl.left;
407      if (q == NULL)
408      {
409        q = node;
410        p->avl.left = q;
411        break;
412      }
413    }
414    else
415    {
416      return -1;
417    }
418
419    p = q;
420  }
421
422  q->avl.left = q->avl.right = NULL;
423  q->avl.bal = 0;
424  modified = true;
425  buf_prev--;
426
427  while (modified)
428  {
429    if (p->avl.cache == -1)
430    {
431      switch (p->avl.bal)
432      {
433        case 1:
434          p->avl.bal = 0;
435          modified = false;
436          break;
437
438        case 0:
439          p->avl.bal = -1;
440          break;
441
442        case -1:
443          p1 = p->avl.left;
444          if (p1->avl.bal == -1) /* simple LL-turn */
445          {
446            p->avl.left = p1->avl.right;
447            p1->avl.right = p;
448            p->avl.bal = 0;
449            p = p1;
450          }
451          else /* double LR-turn */
452          {
453            p2 = p1->avl.right;
454            p1->avl.right = p2->avl.left;
455            p2->avl.left = p1;
456            p->avl.left = p2->avl.right;
457            p2->avl.right = p;
458            if (p2->avl.bal == -1) p->avl.bal = +1; else p->avl.bal = 0;
459            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
460            p = p2;
461          }
462          p->avl.bal = 0;
463          modified = false;
464          break;
465
466        default:
467          break;
468      }
469    }
470    else
471    {
472      switch (p->avl.bal)
473      {
474        case -1:
475          p->avl.bal = 0;
476          modified = false;
477          break;
478
479        case 0:
480          p->avl.bal = 1;
481          break;
482
483        case 1:
484          p1 = p->avl.right;
485          if (p1->avl.bal == 1) /* simple RR-turn */
486          {
487            p->avl.right = p1->avl.left;
488            p1->avl.left = p;
489            p->avl.bal = 0;
490            p = p1;
491          }
492          else /* double RL-turn */
493          {
494            p2 = p1->avl.left;
495            p1->avl.left = p2->avl.right;
496            p2->avl.right = p1;
497            p->avl.right = p2->avl.left;
498            p2->avl.left = p;
499            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
500            if (p2->avl.bal == -1) p1->avl.bal = +1; else p1->avl.bal = 0;
501            p = p2;
502          }
503          p->avl.bal = 0;
504          modified = false;
505          break;
506
507        default:
508          break;
509      }
510    }
511    q = p;
512    if (buf_prev > buf_stack)
513    {
514      p = *--buf_prev;
515
516      if (p->avl.cache == -1)
517      {
518        p->avl.left = q;
519      }
520      else
521      {
522        p->avl.right = q;
523      }
524    }
525    else
526    {
527      *root = p;
528      break;
529    }
530  };
531
532  return 0;
533}
534
535
536/**
537 * Removes the node from the tree.
538 *
539 * @param root Pointer to pointer to the root node
540 * @param node Pointer to the node to remove
541 * @retval 0 Item removed
542 * @retval -1 No such item found
543 */
544static int
545rtems_bdbuf_avl_remove(rtems_bdbuf_buffer**      root,
546                       const rtems_bdbuf_buffer* node)
547{
548  const rtems_disk_device *dd = node->dd;
549  rtems_blkdev_bnum block = node->block;
550
551  rtems_bdbuf_buffer*  p = *root;
552  rtems_bdbuf_buffer*  q;
553  rtems_bdbuf_buffer*  r;
554  rtems_bdbuf_buffer*  s;
555  rtems_bdbuf_buffer*  p1;
556  rtems_bdbuf_buffer*  p2;
557  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
558  rtems_bdbuf_buffer** buf_prev = buf_stack;
559
560  bool modified = false;
561
562  memset (buf_stack, 0, sizeof(buf_stack));
563
564  while (p != NULL)
565  {
566    *buf_prev++ = p;
567
568    if (((uintptr_t) p->dd < (uintptr_t) dd)
569        || ((p->dd == dd) && (p->block < block)))
570    {
571      p->avl.cache = 1;
572      p = p->avl.right;
573    }
574    else if ((p->dd != dd) || (p->block != block))
575    {
576      p->avl.cache = -1;
577      p = p->avl.left;
578    }
579    else
580    {
581      /* node found */
582      break;
583    }
584  }
585
586  if (p == NULL)
587  {
588    /* there is no such node */
589    return -1;
590  }
591
592  q = p;
593
594  buf_prev--;
595  if (buf_prev > buf_stack)
596  {
597    p = *(buf_prev - 1);
598  }
599  else
600  {
601    p = NULL;
602  }
603
604  /* at this moment q - is a node to delete, p is q's parent */
605  if (q->avl.right == NULL)
606  {
607    r = q->avl.left;
608    if (r != NULL)
609    {
610      r->avl.bal = 0;
611    }
612    q = r;
613  }
614  else
615  {
616    rtems_bdbuf_buffer **t;
617
618    r = q->avl.right;
619
620    if (r->avl.left == NULL)
621    {
622      r->avl.left = q->avl.left;
623      r->avl.bal = q->avl.bal;
624      r->avl.cache = 1;
625      *buf_prev++ = q = r;
626    }
627    else
628    {
629      t = buf_prev++;
630      s = r;
631
632      while (s->avl.left != NULL)
633      {
634        *buf_prev++ = r = s;
635        s = r->avl.left;
636        r->avl.cache = -1;
637      }
638
639      s->avl.left = q->avl.left;
640      r->avl.left = s->avl.right;
641      s->avl.right = q->avl.right;
642      s->avl.bal = q->avl.bal;
643      s->avl.cache = 1;
644
645      *t = q = s;
646    }
647  }
648
649  if (p != NULL)
650  {
651    if (p->avl.cache == -1)
652    {
653      p->avl.left = q;
654    }
655    else
656    {
657      p->avl.right = q;
658    }
659  }
660  else
661  {
662    *root = q;
663  }
664
665  modified = true;
666
667  while (modified)
668  {
669    if (buf_prev > buf_stack)
670    {
671      p = *--buf_prev;
672    }
673    else
674    {
675      break;
676    }
677
678    if (p->avl.cache == -1)
679    {
680      /* rebalance left branch */
681      switch (p->avl.bal)
682      {
683        case -1:
684          p->avl.bal = 0;
685          break;
686        case  0:
687          p->avl.bal = 1;
688          modified = false;
689          break;
690
691        case +1:
692          p1 = p->avl.right;
693
694          if (p1->avl.bal >= 0) /* simple RR-turn */
695          {
696            p->avl.right = p1->avl.left;
697            p1->avl.left = p;
698
699            if (p1->avl.bal == 0)
700            {
701              p1->avl.bal = -1;
702              modified = false;
703            }
704            else
705            {
706              p->avl.bal = 0;
707              p1->avl.bal = 0;
708            }
709            p = p1;
710          }
711          else /* double RL-turn */
712          {
713            p2 = p1->avl.left;
714
715            p1->avl.left = p2->avl.right;
716            p2->avl.right = p1;
717            p->avl.right = p2->avl.left;
718            p2->avl.left = p;
719
720            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
721            if (p2->avl.bal == -1) p1->avl.bal = 1; else p1->avl.bal = 0;
722
723            p = p2;
724            p2->avl.bal = 0;
725          }
726          break;
727
728        default:
729          break;
730      }
731    }
732    else
733    {
734      /* rebalance right branch */
735      switch (p->avl.bal)
736      {
737        case +1:
738          p->avl.bal = 0;
739          break;
740
741        case  0:
742          p->avl.bal = -1;
743          modified = false;
744          break;
745
746        case -1:
747          p1 = p->avl.left;
748
749          if (p1->avl.bal <= 0) /* simple LL-turn */
750          {
751            p->avl.left = p1->avl.right;
752            p1->avl.right = p;
753            if (p1->avl.bal == 0)
754            {
755              p1->avl.bal = 1;
756              modified = false;
757            }
758            else
759            {
760              p->avl.bal = 0;
761              p1->avl.bal = 0;
762            }
763            p = p1;
764          }
765          else /* double LR-turn */
766          {
767            p2 = p1->avl.right;
768
769            p1->avl.right = p2->avl.left;
770            p2->avl.left = p1;
771            p->avl.left = p2->avl.right;
772            p2->avl.right = p;
773
774            if (p2->avl.bal == -1) p->avl.bal = 1; else p->avl.bal = 0;
775            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
776
777            p = p2;
778            p2->avl.bal = 0;
779          }
780          break;
781
782        default:
783          break;
784      }
785    }
786
787    if (buf_prev > buf_stack)
788    {
789      q = *(buf_prev - 1);
790
791      if (q->avl.cache == -1)
792      {
793        q->avl.left = p;
794      }
795      else
796      {
797        q->avl.right = p;
798      }
799    }
800    else
801    {
802      *root = p;
803      break;
804    }
805
806  }
807
808  return 0;
809}
810
811static void
812rtems_bdbuf_set_state (rtems_bdbuf_buffer *bd, rtems_bdbuf_buf_state state)
813{
814  bd->state = state;
815}
816
817static rtems_blkdev_bnum
818rtems_bdbuf_media_block (const rtems_disk_device *dd, rtems_blkdev_bnum block)
819{
820  if (dd->block_to_media_block_shift >= 0)
821    return block << dd->block_to_media_block_shift;
822  else
823    /*
824     * Change the block number for the block size to the block number for the media
825     * block size. We have to use 64bit maths. There is no short cut here.
826     */
827    return (rtems_blkdev_bnum)
828      ((((uint64_t) block) * dd->block_size) / dd->media_block_size);
829}
830
831/**
832 * Lock the mutex. A single task can nest calls.
833 *
834 * @param lock The mutex to lock.
835 * @param fatal_error_code The error code if the call fails.
836 */
837static void
838rtems_bdbuf_lock (rtems_id lock, uint32_t fatal_error_code)
839{
840  rtems_status_code sc = rtems_semaphore_obtain (lock,
841                                                 RTEMS_WAIT,
842                                                 RTEMS_NO_TIMEOUT);
843  if (sc != RTEMS_SUCCESSFUL)
844    rtems_bdbuf_fatal (fatal_error_code);
845}
846
847/**
848 * Unlock the mutex.
849 *
850 * @param lock The mutex to unlock.
851 * @param fatal_error_code The error code if the call fails.
852 */
853static void
854rtems_bdbuf_unlock (rtems_id lock, uint32_t fatal_error_code)
855{
856  rtems_status_code sc = rtems_semaphore_release (lock);
857  if (sc != RTEMS_SUCCESSFUL)
858    rtems_bdbuf_fatal (fatal_error_code);
859}
860
861/**
862 * Lock the cache. A single task can nest calls.
863 */
864static void
865rtems_bdbuf_lock_cache (void)
866{
867  rtems_bdbuf_lock (bdbuf_cache.lock, RTEMS_BDBUF_FATAL_CACHE_LOCK);
868}
869
870/**
871 * Unlock the cache.
872 */
873static void
874rtems_bdbuf_unlock_cache (void)
875{
876  rtems_bdbuf_unlock (bdbuf_cache.lock, RTEMS_BDBUF_FATAL_CACHE_UNLOCK);
877}
878
879/**
880 * Lock the cache's sync. A single task can nest calls.
881 */
882static void
883rtems_bdbuf_lock_sync (void)
884{
885  rtems_bdbuf_lock (bdbuf_cache.sync_lock, RTEMS_BDBUF_FATAL_SYNC_LOCK);
886}
887
888/**
889 * Unlock the cache's sync lock. Any blocked writers are woken.
890 */
891static void
892rtems_bdbuf_unlock_sync (void)
893{
894  rtems_bdbuf_unlock (bdbuf_cache.sync_lock,
895                      RTEMS_BDBUF_FATAL_SYNC_UNLOCK);
896}
897
898static void
899rtems_bdbuf_group_obtain (rtems_bdbuf_buffer *bd)
900{
901  ++bd->group->users;
902}
903
904static void
905rtems_bdbuf_group_release (rtems_bdbuf_buffer *bd)
906{
907  --bd->group->users;
908}
909
910static rtems_mode
911rtems_bdbuf_disable_preemption (void)
912{
913  rtems_status_code sc = RTEMS_SUCCESSFUL;
914  rtems_mode prev_mode = 0;
915
916  sc = rtems_task_mode (RTEMS_NO_PREEMPT, RTEMS_PREEMPT_MASK, &prev_mode);
917  if (sc != RTEMS_SUCCESSFUL)
918    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_PREEMPT_DIS);
919
920  return prev_mode;
921}
922
923static void
924rtems_bdbuf_restore_preemption (rtems_mode prev_mode)
925{
926  rtems_status_code sc = RTEMS_SUCCESSFUL;
927
928  sc = rtems_task_mode (prev_mode, RTEMS_ALL_MODE_MASKS, &prev_mode);
929  if (sc != RTEMS_SUCCESSFUL)
930    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_PREEMPT_RST);
931}
932
933/**
934 * Wait until woken. Semaphores are used so a number of tasks can wait and can
935 * be woken at once. Task events would require we maintain a list of tasks to
936 * be woken and this would require storage and we do not know the number of
937 * tasks that could be waiting.
938 *
939 * While we have the cache locked we can try and claim the semaphore and
940 * therefore know when we release the lock to the cache we will block until the
941 * semaphore is released. This may even happen before we get to block.
942 *
943 * A counter is used to save the release call when no one is waiting.
944 *
945 * The function assumes the cache is locked on entry and it will be locked on
946 * exit.
947 */
948static void
949rtems_bdbuf_anonymous_wait (rtems_bdbuf_waiters *waiters)
950{
951  rtems_status_code sc;
952  rtems_mode        prev_mode;
953
954  /*
955   * Indicate we are waiting.
956   */
957  ++waiters->count;
958
959  /*
960   * Disable preemption then unlock the cache and block.  There is no POSIX
961   * condition variable in the core API so this is a work around.
962   *
963   * The issue is a task could preempt after the cache is unlocked because it is
964   * blocking or just hits that window, and before this task has blocked on the
965   * semaphore. If the preempting task flushes the queue this task will not see
966   * the flush and may block for ever or until another transaction flushes this
967   * semaphore.
968   */
969  prev_mode = rtems_bdbuf_disable_preemption ();
970
971  /*
972   * Unlock the cache, wait, and lock the cache when we return.
973   */
974  rtems_bdbuf_unlock_cache ();
975
976  sc = rtems_semaphore_obtain (waiters->sema, RTEMS_WAIT, RTEMS_BDBUF_WAIT_TIMEOUT);
977
978  if (sc == RTEMS_TIMEOUT)
979    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_CACHE_WAIT_TO);
980
981  if (sc != RTEMS_UNSATISFIED)
982    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_CACHE_WAIT_2);
983
984  rtems_bdbuf_lock_cache ();
985
986  rtems_bdbuf_restore_preemption (prev_mode);
987
988  --waiters->count;
989}
990
991static void
992rtems_bdbuf_wait (rtems_bdbuf_buffer *bd, rtems_bdbuf_waiters *waiters)
993{
994  rtems_bdbuf_group_obtain (bd);
995  ++bd->waiters;
996  rtems_bdbuf_anonymous_wait (waiters);
997  --bd->waiters;
998  rtems_bdbuf_group_release (bd);
999}
1000
1001/**
1002 * Wake a blocked resource. The resource has a counter that lets us know if
1003 * there are any waiters.
1004 */
1005static void
1006rtems_bdbuf_wake (const rtems_bdbuf_waiters *waiters)
1007{
1008  rtems_status_code sc = RTEMS_SUCCESSFUL;
1009
1010  if (waiters->count > 0)
1011  {
1012    sc = rtems_semaphore_flush (waiters->sema);
1013    if (sc != RTEMS_SUCCESSFUL)
1014      rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_CACHE_WAKE);
1015  }
1016}
1017
1018static void
1019rtems_bdbuf_wake_swapper (void)
1020{
1021  rtems_status_code sc = rtems_event_send (bdbuf_cache.swapout,
1022                                           RTEMS_BDBUF_SWAPOUT_SYNC);
1023  if (sc != RTEMS_SUCCESSFUL)
1024    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_SO_WAKE_1);
1025}
1026
1027static bool
1028rtems_bdbuf_has_buffer_waiters (void)
1029{
1030  return bdbuf_cache.buffer_waiters.count;
1031}
1032
1033static void
1034rtems_bdbuf_remove_from_tree (rtems_bdbuf_buffer *bd)
1035{
1036  if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
1037    rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_TREE_RM);
1038}
1039
1040static void
1041rtems_bdbuf_remove_from_tree_and_lru_list (rtems_bdbuf_buffer *bd)
1042{
1043  switch (bd->state)
1044  {
1045    case RTEMS_BDBUF_STATE_FREE:
1046      break;
1047    case RTEMS_BDBUF_STATE_CACHED:
1048      rtems_bdbuf_remove_from_tree (bd);
1049      break;
1050    default:
1051      rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_10);
1052  }
1053
1054  rtems_chain_extract_unprotected (&bd->link);
1055}
1056
1057static void
1058rtems_bdbuf_make_free_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
1059{
1060  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_FREE);
1061  rtems_chain_prepend_unprotected (&bdbuf_cache.lru, &bd->link);
1062}
1063
1064static void
1065rtems_bdbuf_make_empty (rtems_bdbuf_buffer *bd)
1066{
1067  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_EMPTY);
1068}
1069
1070static void
1071rtems_bdbuf_make_cached_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
1072{
1073  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_CACHED);
1074  rtems_chain_append_unprotected (&bdbuf_cache.lru, &bd->link);
1075}
1076
1077static void
1078rtems_bdbuf_discard_buffer (rtems_bdbuf_buffer *bd)
1079{
1080  rtems_bdbuf_make_empty (bd);
1081
1082  if (bd->waiters == 0)
1083  {
1084    rtems_bdbuf_remove_from_tree (bd);
1085    rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1086  }
1087}
1088
1089static void
1090rtems_bdbuf_add_to_modified_list_after_access (rtems_bdbuf_buffer *bd)
1091{
1092  if (bdbuf_cache.sync_active && bdbuf_cache.sync_device == bd->dd)
1093  {
1094    rtems_bdbuf_unlock_cache ();
1095
1096    /*
1097     * Wait for the sync lock.
1098     */
1099    rtems_bdbuf_lock_sync ();
1100
1101    rtems_bdbuf_unlock_sync ();
1102    rtems_bdbuf_lock_cache ();
1103  }
1104
1105  /*
1106   * Only the first modified release sets the timer and any further user
1107   * accesses do not change the timer value which should move down. This
1108   * assumes the user's hold of the buffer is much less than the time on the
1109   * modified list. Resetting the timer on each access which could result in a
1110   * buffer never getting to 0 and never being forced onto disk. This raises a
1111   * difficult question. Is a snapshot of a block that is changing better than
1112   * nothing being written? We have tended to think we should hold changes for
1113   * only a specific period of time even if still changing and get onto disk
1114   * and letting the file system try and recover this position if it can.
1115   */
1116  if (bd->state == RTEMS_BDBUF_STATE_ACCESS_CACHED
1117        || bd->state == RTEMS_BDBUF_STATE_ACCESS_EMPTY)
1118    bd->hold_timer = bdbuf_config.swap_block_hold;
1119
1120  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_MODIFIED);
1121  rtems_chain_append_unprotected (&bdbuf_cache.modified, &bd->link);
1122
1123  if (bd->waiters)
1124    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1125  else if (rtems_bdbuf_has_buffer_waiters ())
1126    rtems_bdbuf_wake_swapper ();
1127}
1128
1129static void
1130rtems_bdbuf_add_to_lru_list_after_access (rtems_bdbuf_buffer *bd)
1131{
1132  rtems_bdbuf_group_release (bd);
1133  rtems_bdbuf_make_cached_and_add_to_lru_list (bd);
1134
1135  if (bd->waiters)
1136    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1137  else
1138    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1139}
1140
1141/**
1142 * Compute the number of BDs per group for a given buffer size.
1143 *
1144 * @param size The buffer size. It can be any size and we scale up.
1145 */
1146static size_t
1147rtems_bdbuf_bds_per_group (size_t size)
1148{
1149  size_t bufs_per_size;
1150  size_t bds_per_size;
1151
1152  if (size > bdbuf_config.buffer_max)
1153    return 0;
1154
1155  bufs_per_size = ((size - 1) / bdbuf_config.buffer_min) + 1;
1156
1157  for (bds_per_size = 1;
1158       bds_per_size < bufs_per_size;
1159       bds_per_size <<= 1)
1160    ;
1161
1162  return bdbuf_cache.max_bds_per_group / bds_per_size;
1163}
1164
1165static void
1166rtems_bdbuf_discard_buffer_after_access (rtems_bdbuf_buffer *bd)
1167{
1168  rtems_bdbuf_group_release (bd);
1169  rtems_bdbuf_discard_buffer (bd);
1170
1171  if (bd->waiters)
1172    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1173  else
1174    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1175}
1176
1177/**
1178 * Reallocate a group. The BDs currently allocated in the group are removed
1179 * from the ALV tree and any lists then the new BD's are prepended to the ready
1180 * list of the cache.
1181 *
1182 * @param group The group to reallocate.
1183 * @param new_bds_per_group The new count of BDs per group.
1184 * @return A buffer of this group.
1185 */
1186static rtems_bdbuf_buffer *
1187rtems_bdbuf_group_realloc (rtems_bdbuf_group* group, size_t new_bds_per_group)
1188{
1189  rtems_bdbuf_buffer* bd;
1190  size_t              b;
1191  size_t              bufs_per_bd;
1192
1193  if (rtems_bdbuf_tracer)
1194    printf ("bdbuf:realloc: %tu: %zd -> %zd\n",
1195            group - bdbuf_cache.groups, group->bds_per_group,
1196            new_bds_per_group);
1197
1198  bufs_per_bd = bdbuf_cache.max_bds_per_group / group->bds_per_group;
1199
1200  for (b = 0, bd = group->bdbuf;
1201       b < group->bds_per_group;
1202       b++, bd += bufs_per_bd)
1203    rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1204
1205  group->bds_per_group = new_bds_per_group;
1206  bufs_per_bd = bdbuf_cache.max_bds_per_group / new_bds_per_group;
1207
1208  for (b = 1, bd = group->bdbuf + bufs_per_bd;
1209       b < group->bds_per_group;
1210       b++, bd += bufs_per_bd)
1211    rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1212
1213  if (b > 1)
1214    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1215
1216  return group->bdbuf;
1217}
1218
1219static void
1220rtems_bdbuf_setup_empty_buffer (rtems_bdbuf_buffer *bd,
1221                                rtems_disk_device  *dd,
1222                                rtems_blkdev_bnum   block)
1223{
1224  bd->dd        = dd ;
1225  bd->block     = block;
1226  bd->avl.left  = NULL;
1227  bd->avl.right = NULL;
1228  bd->waiters   = 0;
1229
1230  if (rtems_bdbuf_avl_insert (&bdbuf_cache.tree, bd) != 0)
1231    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_RECYCLE);
1232
1233  rtems_bdbuf_make_empty (bd);
1234}
1235
1236static rtems_bdbuf_buffer *
1237rtems_bdbuf_get_buffer_from_lru_list (rtems_disk_device *dd,
1238                                      rtems_blkdev_bnum  block)
1239{
1240  rtems_chain_node *node = rtems_chain_first (&bdbuf_cache.lru);
1241
1242  while (!rtems_chain_is_tail (&bdbuf_cache.lru, node))
1243  {
1244    rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
1245    rtems_bdbuf_buffer *empty_bd = NULL;
1246
1247    if (rtems_bdbuf_tracer)
1248      printf ("bdbuf:next-bd: %tu (%td:%" PRId32 ") %zd -> %zd\n",
1249              bd - bdbuf_cache.bds,
1250              bd->group - bdbuf_cache.groups, bd->group->users,
1251              bd->group->bds_per_group, dd->bds_per_group);
1252
1253    /*
1254     * If nobody waits for this BD, we may recycle it.
1255     */
1256    if (bd->waiters == 0)
1257    {
1258      if (bd->group->bds_per_group == dd->bds_per_group)
1259      {
1260        rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1261
1262        empty_bd = bd;
1263      }
1264      else if (bd->group->users == 0)
1265        empty_bd = rtems_bdbuf_group_realloc (bd->group, dd->bds_per_group);
1266    }
1267
1268    if (empty_bd != NULL)
1269    {
1270      rtems_bdbuf_setup_empty_buffer (empty_bd, dd, block);
1271
1272      return empty_bd;
1273    }
1274
1275    node = rtems_chain_next (node);
1276  }
1277
1278  return NULL;
1279}
1280
1281static rtems_status_code
1282rtems_bdbuf_create_task(
1283  rtems_name name,
1284  rtems_task_priority priority,
1285  rtems_task_priority default_priority,
1286  rtems_id *id
1287)
1288{
1289  rtems_status_code sc;
1290  size_t stack_size = bdbuf_config.task_stack_size ?
1291    bdbuf_config.task_stack_size : RTEMS_BDBUF_TASK_STACK_SIZE_DEFAULT;
1292
1293  priority = priority != 0 ? priority : default_priority;
1294
1295  sc = rtems_task_create (name,
1296                          priority,
1297                          stack_size,
1298                          RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
1299                          RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT,
1300                          id);
1301
1302  return sc;
1303}
1304
1305static rtems_bdbuf_swapout_transfer*
1306rtems_bdbuf_swapout_transfer_alloc (void)
1307{
1308  /*
1309   * @note chrisj The rtems_blkdev_request and the array at the end is a hack.
1310   * I am disappointment at finding code like this in RTEMS. The request should
1311   * have been a rtems_chain_control. Simple, fast and less storage as the node
1312   * is already part of the buffer structure.
1313   */
1314  size_t transfer_size = sizeof (rtems_bdbuf_swapout_transfer)
1315    + (bdbuf_config.max_write_blocks * sizeof (rtems_blkdev_sg_buffer));
1316  return calloc (1, transfer_size);
1317}
1318
1319static void
1320rtems_bdbuf_transfer_done (rtems_blkdev_request* req, rtems_status_code status);
1321
1322static void
1323rtems_bdbuf_swapout_transfer_init (rtems_bdbuf_swapout_transfer* transfer,
1324                                   rtems_id id)
1325{
1326  rtems_chain_initialize_empty (&transfer->bds);
1327  transfer->dd = BDBUF_INVALID_DEV;
1328  transfer->syncing = false;
1329  transfer->write_req.req = RTEMS_BLKDEV_REQ_WRITE;
1330  transfer->write_req.done = rtems_bdbuf_transfer_done;
1331  transfer->write_req.io_task = id;
1332}
1333
1334static size_t
1335rtems_bdbuf_swapout_worker_size (void)
1336{
1337  return sizeof (rtems_bdbuf_swapout_worker)
1338    + (bdbuf_config.max_write_blocks * sizeof (rtems_blkdev_sg_buffer));
1339}
1340
1341static rtems_task
1342rtems_bdbuf_swapout_worker_task (rtems_task_argument arg);
1343
1344static rtems_status_code
1345rtems_bdbuf_swapout_workers_create (void)
1346{
1347  rtems_status_code  sc;
1348  size_t             w;
1349  size_t             worker_size;
1350  char              *worker_current;
1351
1352  worker_size = rtems_bdbuf_swapout_worker_size ();
1353  worker_current = calloc (1, bdbuf_config.swapout_workers * worker_size);
1354  sc = worker_current != NULL ? RTEMS_SUCCESSFUL : RTEMS_NO_MEMORY;
1355
1356  bdbuf_cache.swapout_workers = (rtems_bdbuf_swapout_worker *) worker_current;
1357
1358  for (w = 0;
1359       sc == RTEMS_SUCCESSFUL && w < bdbuf_config.swapout_workers;
1360       w++, worker_current += worker_size)
1361  {
1362    rtems_bdbuf_swapout_worker *worker = (rtems_bdbuf_swapout_worker *) worker_current;
1363
1364    sc = rtems_bdbuf_create_task (rtems_build_name('B', 'D', 'o', 'a' + w),
1365                                  bdbuf_config.swapout_worker_priority,
1366                                  RTEMS_BDBUF_SWAPOUT_WORKER_TASK_PRIORITY_DEFAULT,
1367                                  &worker->id);
1368    if (sc == RTEMS_SUCCESSFUL)
1369    {
1370      rtems_bdbuf_swapout_transfer_init (&worker->transfer, worker->id);
1371
1372      rtems_chain_append_unprotected (&bdbuf_cache.swapout_free_workers, &worker->link);
1373      worker->enabled = true;
1374
1375      sc = rtems_task_start (worker->id,
1376                             rtems_bdbuf_swapout_worker_task,
1377                             (rtems_task_argument) worker);
1378    }
1379  }
1380
1381  return sc;
1382}
1383
1384static size_t
1385rtems_bdbuf_read_request_size (uint32_t transfer_count)
1386{
1387  return sizeof (rtems_blkdev_request)
1388    + sizeof (rtems_blkdev_sg_buffer) * transfer_count;
1389}
1390
1391static rtems_status_code
1392rtems_bdbuf_do_init (void)
1393{
1394  rtems_bdbuf_group*  group;
1395  rtems_bdbuf_buffer* bd;
1396  uint8_t*            buffer;
1397  size_t              b;
1398  size_t              cache_aligment;
1399  rtems_status_code   sc;
1400
1401  if (rtems_bdbuf_tracer)
1402    printf ("bdbuf:init\n");
1403
1404  if (rtems_interrupt_is_in_progress())
1405    return RTEMS_CALLED_FROM_ISR;
1406
1407  /*
1408   * Check the configuration table values.
1409   */
1410
1411  if ((bdbuf_config.buffer_max % bdbuf_config.buffer_min) != 0)
1412    return RTEMS_INVALID_NUMBER;
1413
1414  if (rtems_bdbuf_read_request_size (bdbuf_config.max_read_ahead_blocks)
1415      > RTEMS_MINIMUM_STACK_SIZE / 8U)
1416    return RTEMS_INVALID_NUMBER;
1417
1418  /*
1419   * For unspecified cache alignments we use the CPU alignment.
1420   */
1421  cache_aligment = 32; /* FIXME rtems_cache_get_data_line_size() */
1422  if (cache_aligment <= 0)
1423    cache_aligment = CPU_ALIGNMENT;
1424
1425  bdbuf_cache.sync_device = BDBUF_INVALID_DEV;
1426
1427  rtems_chain_initialize_empty (&bdbuf_cache.swapout_free_workers);
1428  rtems_chain_initialize_empty (&bdbuf_cache.lru);
1429  rtems_chain_initialize_empty (&bdbuf_cache.modified);
1430  rtems_chain_initialize_empty (&bdbuf_cache.sync);
1431  rtems_chain_initialize_empty (&bdbuf_cache.read_ahead_chain);
1432
1433  /*
1434   * Create the locks for the cache.
1435   */
1436  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'l'),
1437                               1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0,
1438                               &bdbuf_cache.lock);
1439  if (sc != RTEMS_SUCCESSFUL)
1440    goto error;
1441
1442  rtems_bdbuf_lock_cache ();
1443
1444  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 's'),
1445                               1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0,
1446                               &bdbuf_cache.sync_lock);
1447  if (sc != RTEMS_SUCCESSFUL)
1448    goto error;
1449
1450  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'a'),
1451                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1452                               &bdbuf_cache.access_waiters.sema);
1453  if (sc != RTEMS_SUCCESSFUL)
1454    goto error;
1455
1456  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 't'),
1457                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1458                               &bdbuf_cache.transfer_waiters.sema);
1459  if (sc != RTEMS_SUCCESSFUL)
1460    goto error;
1461
1462  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'b'),
1463                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1464                               &bdbuf_cache.buffer_waiters.sema);
1465  if (sc != RTEMS_SUCCESSFUL)
1466    goto error;
1467
1468  /*
1469   * Compute the various number of elements in the cache.
1470   */
1471  bdbuf_cache.buffer_min_count =
1472    bdbuf_config.size / bdbuf_config.buffer_min;
1473  bdbuf_cache.max_bds_per_group =
1474    bdbuf_config.buffer_max / bdbuf_config.buffer_min;
1475  bdbuf_cache.group_count =
1476    bdbuf_cache.buffer_min_count / bdbuf_cache.max_bds_per_group;
1477
1478  /*
1479   * Allocate the memory for the buffer descriptors.
1480   */
1481  bdbuf_cache.bds = calloc (sizeof (rtems_bdbuf_buffer),
1482                            bdbuf_cache.buffer_min_count);
1483  if (!bdbuf_cache.bds)
1484    goto error;
1485
1486  /*
1487   * Allocate the memory for the buffer descriptors.
1488   */
1489  bdbuf_cache.groups = calloc (sizeof (rtems_bdbuf_group),
1490                               bdbuf_cache.group_count);
1491  if (!bdbuf_cache.groups)
1492    goto error;
1493
1494  /*
1495   * Allocate memory for buffer memory. The buffer memory will be cache
1496   * aligned. It is possible to free the memory allocated by rtems_memalign()
1497   * with free(). Return 0 if allocated.
1498   *
1499   * The memory allocate allows a
1500   */
1501  if (rtems_memalign ((void **) &bdbuf_cache.buffers,
1502                      cache_aligment,
1503                      bdbuf_cache.buffer_min_count * bdbuf_config.buffer_min) != 0)
1504    goto error;
1505
1506  /*
1507   * The cache is empty after opening so we need to add all the buffers to it
1508   * and initialise the groups.
1509   */
1510  for (b = 0, group = bdbuf_cache.groups,
1511         bd = bdbuf_cache.bds, buffer = bdbuf_cache.buffers;
1512       b < bdbuf_cache.buffer_min_count;
1513       b++, bd++, buffer += bdbuf_config.buffer_min)
1514  {
1515    bd->dd    = BDBUF_INVALID_DEV;
1516    bd->group  = group;
1517    bd->buffer = buffer;
1518
1519    rtems_chain_append_unprotected (&bdbuf_cache.lru, &bd->link);
1520
1521    if ((b % bdbuf_cache.max_bds_per_group) ==
1522        (bdbuf_cache.max_bds_per_group - 1))
1523      group++;
1524  }
1525
1526  for (b = 0,
1527         group = bdbuf_cache.groups,
1528         bd = bdbuf_cache.bds;
1529       b < bdbuf_cache.group_count;
1530       b++,
1531         group++,
1532         bd += bdbuf_cache.max_bds_per_group)
1533  {
1534    group->bds_per_group = bdbuf_cache.max_bds_per_group;
1535    group->bdbuf = bd;
1536  }
1537
1538  /*
1539   * Create and start swapout task.
1540   */
1541
1542  bdbuf_cache.swapout_transfer = rtems_bdbuf_swapout_transfer_alloc ();
1543  if (!bdbuf_cache.swapout_transfer)
1544    goto error;
1545
1546  bdbuf_cache.swapout_enabled = true;
1547
1548  sc = rtems_bdbuf_create_task (rtems_build_name('B', 'S', 'W', 'P'),
1549                                bdbuf_config.swapout_priority,
1550                                RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT,
1551                                &bdbuf_cache.swapout);
1552  if (sc != RTEMS_SUCCESSFUL)
1553    goto error;
1554
1555  rtems_bdbuf_swapout_transfer_init (bdbuf_cache.swapout_transfer, bdbuf_cache.swapout);
1556
1557  sc = rtems_task_start (bdbuf_cache.swapout,
1558                         rtems_bdbuf_swapout_task,
1559                         (rtems_task_argument) bdbuf_cache.swapout_transfer);
1560  if (sc != RTEMS_SUCCESSFUL)
1561    goto error;
1562
1563  if (bdbuf_config.swapout_workers > 0)
1564  {
1565    sc = rtems_bdbuf_swapout_workers_create ();
1566    if (sc != RTEMS_SUCCESSFUL)
1567      goto error;
1568  }
1569
1570  if (bdbuf_config.max_read_ahead_blocks > 0)
1571  {
1572    bdbuf_cache.read_ahead_enabled = true;
1573    sc = rtems_bdbuf_create_task (rtems_build_name('B', 'R', 'D', 'A'),
1574                                  bdbuf_config.read_ahead_priority,
1575                                  RTEMS_BDBUF_READ_AHEAD_TASK_PRIORITY_DEFAULT,
1576                                  &bdbuf_cache.read_ahead_task);
1577    if (sc != RTEMS_SUCCESSFUL)
1578      goto error;
1579
1580    sc = rtems_task_start (bdbuf_cache.read_ahead_task,
1581                           rtems_bdbuf_read_ahead_task,
1582                           0);
1583    if (sc != RTEMS_SUCCESSFUL)
1584      goto error;
1585  }
1586
1587  rtems_bdbuf_unlock_cache ();
1588
1589  return RTEMS_SUCCESSFUL;
1590
1591error:
1592
1593  if (bdbuf_cache.read_ahead_task != 0)
1594    rtems_task_delete (bdbuf_cache.read_ahead_task);
1595
1596  if (bdbuf_cache.swapout != 0)
1597    rtems_task_delete (bdbuf_cache.swapout);
1598
1599  if (bdbuf_cache.swapout_workers)
1600  {
1601    char   *worker_current = (char *) bdbuf_cache.swapout_workers;
1602    size_t  worker_size = rtems_bdbuf_swapout_worker_size ();
1603    size_t  w;
1604
1605    for (w = 0;
1606         w < bdbuf_config.swapout_workers;
1607         w++, worker_current += worker_size)
1608    {
1609      rtems_bdbuf_swapout_worker *worker = (rtems_bdbuf_swapout_worker *) worker_current;
1610
1611      if (worker->id != 0) {
1612        rtems_task_delete (worker->id);
1613      }
1614    }
1615  }
1616
1617  free (bdbuf_cache.buffers);
1618  free (bdbuf_cache.groups);
1619  free (bdbuf_cache.bds);
1620  free (bdbuf_cache.swapout_transfer);
1621  free (bdbuf_cache.swapout_workers);
1622
1623  rtems_semaphore_delete (bdbuf_cache.buffer_waiters.sema);
1624  rtems_semaphore_delete (bdbuf_cache.access_waiters.sema);
1625  rtems_semaphore_delete (bdbuf_cache.transfer_waiters.sema);
1626  rtems_semaphore_delete (bdbuf_cache.sync_lock);
1627
1628  if (bdbuf_cache.lock != 0)
1629  {
1630    rtems_bdbuf_unlock_cache ();
1631    rtems_semaphore_delete (bdbuf_cache.lock);
1632  }
1633
1634  return RTEMS_UNSATISFIED;
1635}
1636
1637static void
1638rtems_bdbuf_init_once (void)
1639{
1640  bdbuf_cache.init_status = rtems_bdbuf_do_init();
1641}
1642
1643rtems_status_code
1644rtems_bdbuf_init (void)
1645{
1646  int eno = pthread_once (&rtems_bdbuf_once_state, rtems_bdbuf_init_once);
1647
1648  if (eno != 0)
1649    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_ONCE);
1650
1651  return bdbuf_cache.init_status;
1652}
1653
1654static void
1655rtems_bdbuf_wait_for_event (rtems_event_set event)
1656{
1657  rtems_status_code sc = RTEMS_SUCCESSFUL;
1658  rtems_event_set   out = 0;
1659
1660  sc = rtems_event_receive (event,
1661                            RTEMS_EVENT_ALL | RTEMS_WAIT,
1662                            RTEMS_NO_TIMEOUT,
1663                            &out);
1664
1665  if (sc != RTEMS_SUCCESSFUL || out != event)
1666    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_WAIT_EVNT);
1667}
1668
1669static void
1670rtems_bdbuf_wait_for_transient_event (void)
1671{
1672  rtems_status_code sc = RTEMS_SUCCESSFUL;
1673
1674  sc = rtems_event_transient_receive (RTEMS_WAIT, RTEMS_NO_TIMEOUT);
1675  if (sc != RTEMS_SUCCESSFUL)
1676    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_WAIT_TRANS_EVNT);
1677}
1678
1679static void
1680rtems_bdbuf_wait_for_access (rtems_bdbuf_buffer *bd)
1681{
1682  while (true)
1683  {
1684    switch (bd->state)
1685    {
1686      case RTEMS_BDBUF_STATE_MODIFIED:
1687        rtems_bdbuf_group_release (bd);
1688        /* Fall through */
1689      case RTEMS_BDBUF_STATE_CACHED:
1690        rtems_chain_extract_unprotected (&bd->link);
1691        /* Fall through */
1692      case RTEMS_BDBUF_STATE_EMPTY:
1693        return;
1694      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1695      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1696      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1697      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1698        rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
1699        break;
1700      case RTEMS_BDBUF_STATE_SYNC:
1701      case RTEMS_BDBUF_STATE_TRANSFER:
1702      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1703        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1704        break;
1705      default:
1706        rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_7);
1707    }
1708  }
1709}
1710
1711static void
1712rtems_bdbuf_request_sync_for_modified_buffer (rtems_bdbuf_buffer *bd)
1713{
1714  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
1715  rtems_chain_extract_unprotected (&bd->link);
1716  rtems_chain_append_unprotected (&bdbuf_cache.sync, &bd->link);
1717  rtems_bdbuf_wake_swapper ();
1718}
1719
1720/**
1721 * @brief Waits until the buffer is ready for recycling.
1722 *
1723 * @retval @c true Buffer is valid and may be recycled.
1724 * @retval @c false Buffer is invalid and has to searched again.
1725 */
1726static bool
1727rtems_bdbuf_wait_for_recycle (rtems_bdbuf_buffer *bd)
1728{
1729  while (true)
1730  {
1731    switch (bd->state)
1732    {
1733      case RTEMS_BDBUF_STATE_FREE:
1734        return true;
1735      case RTEMS_BDBUF_STATE_MODIFIED:
1736        rtems_bdbuf_request_sync_for_modified_buffer (bd);
1737        break;
1738      case RTEMS_BDBUF_STATE_CACHED:
1739      case RTEMS_BDBUF_STATE_EMPTY:
1740        if (bd->waiters == 0)
1741          return true;
1742        else
1743        {
1744          /*
1745           * It is essential that we wait here without a special wait count and
1746           * without the group in use.  Otherwise we could trigger a wait ping
1747           * pong with another recycle waiter.  The state of the buffer is
1748           * arbitrary afterwards.
1749           */
1750          rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
1751          return false;
1752        }
1753      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1754      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1755      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1756      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1757        rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
1758        break;
1759      case RTEMS_BDBUF_STATE_SYNC:
1760      case RTEMS_BDBUF_STATE_TRANSFER:
1761      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1762        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1763        break;
1764      default:
1765        rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_8);
1766    }
1767  }
1768}
1769
1770static void
1771rtems_bdbuf_wait_for_sync_done (rtems_bdbuf_buffer *bd)
1772{
1773  while (true)
1774  {
1775    switch (bd->state)
1776    {
1777      case RTEMS_BDBUF_STATE_CACHED:
1778      case RTEMS_BDBUF_STATE_EMPTY:
1779      case RTEMS_BDBUF_STATE_MODIFIED:
1780      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1781      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1782      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1783      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1784        return;
1785      case RTEMS_BDBUF_STATE_SYNC:
1786      case RTEMS_BDBUF_STATE_TRANSFER:
1787      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1788        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1789        break;
1790      default:
1791        rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_9);
1792    }
1793  }
1794}
1795
1796static void
1797rtems_bdbuf_wait_for_buffer (void)
1798{
1799  if (!rtems_chain_is_empty (&bdbuf_cache.modified))
1800    rtems_bdbuf_wake_swapper ();
1801
1802  rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
1803}
1804
1805static void
1806rtems_bdbuf_sync_after_access (rtems_bdbuf_buffer *bd)
1807{
1808  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
1809
1810  rtems_chain_append_unprotected (&bdbuf_cache.sync, &bd->link);
1811
1812  if (bd->waiters)
1813    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1814
1815  rtems_bdbuf_wake_swapper ();
1816  rtems_bdbuf_wait_for_sync_done (bd);
1817
1818  /*
1819   * We may have created a cached or empty buffer which may be recycled.
1820   */
1821  if (bd->waiters == 0
1822        && (bd->state == RTEMS_BDBUF_STATE_CACHED
1823          || bd->state == RTEMS_BDBUF_STATE_EMPTY))
1824  {
1825    if (bd->state == RTEMS_BDBUF_STATE_EMPTY)
1826    {
1827      rtems_bdbuf_remove_from_tree (bd);
1828      rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1829    }
1830    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1831  }
1832}
1833
1834static rtems_bdbuf_buffer *
1835rtems_bdbuf_get_buffer_for_read_ahead (rtems_disk_device *dd,
1836                                       rtems_blkdev_bnum  block)
1837{
1838  rtems_bdbuf_buffer *bd = NULL;
1839
1840  bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dd, block);
1841
1842  if (bd == NULL)
1843  {
1844    bd = rtems_bdbuf_get_buffer_from_lru_list (dd, block);
1845
1846    if (bd != NULL)
1847      rtems_bdbuf_group_obtain (bd);
1848  }
1849  else
1850    /*
1851     * The buffer is in the cache.  So it is already available or in use, and
1852     * thus no need for a read ahead.
1853     */
1854    bd = NULL;
1855
1856  return bd;
1857}
1858
1859static rtems_bdbuf_buffer *
1860rtems_bdbuf_get_buffer_for_access (rtems_disk_device *dd,
1861                                   rtems_blkdev_bnum  block)
1862{
1863  rtems_bdbuf_buffer *bd = NULL;
1864
1865  do
1866  {
1867    bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dd, block);
1868
1869    if (bd != NULL)
1870    {
1871      if (bd->group->bds_per_group != dd->bds_per_group)
1872      {
1873        if (rtems_bdbuf_wait_for_recycle (bd))
1874        {
1875          rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1876          rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1877          rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1878        }
1879        bd = NULL;
1880      }
1881    }
1882    else
1883    {
1884      bd = rtems_bdbuf_get_buffer_from_lru_list (dd, block);
1885
1886      if (bd == NULL)
1887        rtems_bdbuf_wait_for_buffer ();
1888    }
1889  }
1890  while (bd == NULL);
1891
1892  rtems_bdbuf_wait_for_access (bd);
1893  rtems_bdbuf_group_obtain (bd);
1894
1895  return bd;
1896}
1897
1898static rtems_status_code
1899rtems_bdbuf_get_media_block (const rtems_disk_device *dd,
1900                             rtems_blkdev_bnum        block,
1901                             rtems_blkdev_bnum       *media_block_ptr)
1902{
1903  rtems_status_code sc = RTEMS_SUCCESSFUL;
1904
1905  if (block < dd->block_count)
1906  {
1907    /*
1908     * Compute the media block number. Drivers work with media block number not
1909     * the block number a BD may have as this depends on the block size set by
1910     * the user.
1911     */
1912    *media_block_ptr = rtems_bdbuf_media_block (dd, block) + dd->start;
1913  }
1914  else
1915  {
1916    sc = RTEMS_INVALID_ID;
1917  }
1918
1919  return sc;
1920}
1921
1922rtems_status_code
1923rtems_bdbuf_get (rtems_disk_device   *dd,
1924                 rtems_blkdev_bnum    block,
1925                 rtems_bdbuf_buffer **bd_ptr)
1926{
1927  rtems_status_code   sc = RTEMS_SUCCESSFUL;
1928  rtems_bdbuf_buffer *bd = NULL;
1929  rtems_blkdev_bnum   media_block;
1930
1931  rtems_bdbuf_lock_cache ();
1932
1933  sc = rtems_bdbuf_get_media_block (dd, block, &media_block);
1934  if (sc == RTEMS_SUCCESSFUL)
1935  {
1936    /*
1937     * Print the block index relative to the physical disk.
1938     */
1939    if (rtems_bdbuf_tracer)
1940      printf ("bdbuf:get: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
1941              media_block, block, (unsigned) dd->dev);
1942
1943    bd = rtems_bdbuf_get_buffer_for_access (dd, media_block);
1944
1945    switch (bd->state)
1946    {
1947      case RTEMS_BDBUF_STATE_CACHED:
1948        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
1949        break;
1950      case RTEMS_BDBUF_STATE_EMPTY:
1951        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_EMPTY);
1952        break;
1953      case RTEMS_BDBUF_STATE_MODIFIED:
1954        /*
1955         * To get a modified buffer could be considered a bug in the caller
1956         * because you should not be getting an already modified buffer but
1957         * user may have modified a byte in a block then decided to seek the
1958         * start and write the whole block and the file system will have no
1959         * record of this so just gets the block to fill.
1960         */
1961        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
1962        break;
1963      default:
1964        rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_2);
1965        break;
1966    }
1967
1968    if (rtems_bdbuf_tracer)
1969    {
1970      rtems_bdbuf_show_users ("get", bd);
1971      rtems_bdbuf_show_usage ();
1972    }
1973  }
1974
1975  rtems_bdbuf_unlock_cache ();
1976
1977  *bd_ptr = bd;
1978
1979  return sc;
1980}
1981
1982/**
1983 * Call back handler called by the low level driver when the transfer has
1984 * completed. This function may be invoked from interrupt handler.
1985 *
1986 * @param arg Arbitrary argument specified in block device request
1987 *            structure (in this case - pointer to the appropriate
1988 *            block device request structure).
1989 * @param status I/O completion status
1990 */
1991static void
1992rtems_bdbuf_transfer_done (rtems_blkdev_request* req, rtems_status_code status)
1993{
1994  req->status = status;
1995
1996  rtems_event_transient_send (req->io_task);
1997}
1998
1999static rtems_status_code
2000rtems_bdbuf_execute_transfer_request (rtems_disk_device    *dd,
2001                                      rtems_blkdev_request *req,
2002                                      bool                  cache_locked)
2003{
2004  rtems_status_code sc = RTEMS_SUCCESSFUL;
2005  uint32_t transfer_index = 0;
2006  bool wake_transfer_waiters = false;
2007  bool wake_buffer_waiters = false;
2008
2009  if (cache_locked)
2010    rtems_bdbuf_unlock_cache ();
2011
2012  /* The return value will be ignored for transfer requests */
2013  dd->ioctl (dd->phys_dev, RTEMS_BLKIO_REQUEST, req);
2014
2015  /* Wait for transfer request completion */
2016  rtems_bdbuf_wait_for_transient_event ();
2017  sc = req->status;
2018
2019  rtems_bdbuf_lock_cache ();
2020
2021  /* Statistics */
2022  if (req->req == RTEMS_BLKDEV_REQ_READ)
2023  {
2024    dd->stats.read_blocks += req->bufnum;
2025    if (sc != RTEMS_SUCCESSFUL)
2026      ++dd->stats.read_errors;
2027  }
2028  else
2029  {
2030    dd->stats.write_blocks += req->bufnum;
2031    ++dd->stats.write_transfers;
2032    if (sc != RTEMS_SUCCESSFUL)
2033      ++dd->stats.write_errors;
2034  }
2035
2036  for (transfer_index = 0; transfer_index < req->bufnum; ++transfer_index)
2037  {
2038    rtems_bdbuf_buffer *bd = req->bufs [transfer_index].user;
2039    bool waiters = bd->waiters;
2040
2041    if (waiters)
2042      wake_transfer_waiters = true;
2043    else
2044      wake_buffer_waiters = true;
2045
2046    rtems_bdbuf_group_release (bd);
2047
2048    if (sc == RTEMS_SUCCESSFUL && bd->state == RTEMS_BDBUF_STATE_TRANSFER)
2049      rtems_bdbuf_make_cached_and_add_to_lru_list (bd);
2050    else
2051      rtems_bdbuf_discard_buffer (bd);
2052
2053    if (rtems_bdbuf_tracer)
2054      rtems_bdbuf_show_users ("transfer", bd);
2055  }
2056
2057  if (wake_transfer_waiters)
2058    rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
2059
2060  if (wake_buffer_waiters)
2061    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
2062
2063  if (!cache_locked)
2064    rtems_bdbuf_unlock_cache ();
2065
2066  if (sc == RTEMS_SUCCESSFUL || sc == RTEMS_UNSATISFIED)
2067    return sc;
2068  else
2069    return RTEMS_IO_ERROR;
2070}
2071
2072static rtems_status_code
2073rtems_bdbuf_execute_read_request (rtems_disk_device  *dd,
2074                                  rtems_bdbuf_buffer *bd,
2075                                  uint32_t            transfer_count)
2076{
2077  rtems_blkdev_request *req = NULL;
2078  rtems_blkdev_bnum media_block = bd->block;
2079  uint32_t media_blocks_per_block = dd->media_blocks_per_block;
2080  uint32_t block_size = dd->block_size;
2081  uint32_t transfer_index = 1;
2082
2083  /*
2084   * TODO: This type of request structure is wrong and should be removed.
2085   */
2086#define bdbuf_alloc(size) __builtin_alloca (size)
2087
2088  req = bdbuf_alloc (rtems_bdbuf_read_request_size (transfer_count));
2089
2090  req->req = RTEMS_BLKDEV_REQ_READ;
2091  req->done = rtems_bdbuf_transfer_done;
2092  req->io_task = rtems_task_self ();
2093  req->bufnum = 0;
2094
2095  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
2096
2097  req->bufs [0].user   = bd;
2098  req->bufs [0].block  = media_block;
2099  req->bufs [0].length = block_size;
2100  req->bufs [0].buffer = bd->buffer;
2101
2102  if (rtems_bdbuf_tracer)
2103    rtems_bdbuf_show_users ("read", bd);
2104
2105  while (transfer_index < transfer_count)
2106  {
2107    media_block += media_blocks_per_block;
2108
2109    bd = rtems_bdbuf_get_buffer_for_read_ahead (dd, media_block);
2110
2111    if (bd == NULL)
2112      break;
2113
2114    rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
2115
2116    req->bufs [transfer_index].user   = bd;
2117    req->bufs [transfer_index].block  = media_block;
2118    req->bufs [transfer_index].length = block_size;
2119    req->bufs [transfer_index].buffer = bd->buffer;
2120
2121    if (rtems_bdbuf_tracer)
2122      rtems_bdbuf_show_users ("read", bd);
2123
2124    ++transfer_index;
2125  }
2126
2127  req->bufnum = transfer_index;
2128
2129  return rtems_bdbuf_execute_transfer_request (dd, req, true);
2130}
2131
2132static bool
2133rtems_bdbuf_is_read_ahead_active (const rtems_disk_device *dd)
2134{
2135  return !rtems_chain_is_node_off_chain (&dd->read_ahead.node);
2136}
2137
2138static void
2139rtems_bdbuf_read_ahead_cancel (rtems_disk_device *dd)
2140{
2141  if (rtems_bdbuf_is_read_ahead_active (dd))
2142  {
2143    rtems_chain_extract_unprotected (&dd->read_ahead.node);
2144    rtems_chain_set_off_chain (&dd->read_ahead.node);
2145  }
2146}
2147
2148static void
2149rtems_bdbuf_read_ahead_reset (rtems_disk_device *dd)
2150{
2151  rtems_bdbuf_read_ahead_cancel (dd);
2152  dd->read_ahead.trigger = RTEMS_DISK_READ_AHEAD_NO_TRIGGER;
2153}
2154
2155static void
2156rtems_bdbuf_check_read_ahead_trigger (rtems_disk_device *dd,
2157                                      rtems_blkdev_bnum  block)
2158{
2159  if (bdbuf_cache.read_ahead_task != 0
2160      && dd->read_ahead.trigger == block
2161      && !rtems_bdbuf_is_read_ahead_active (dd))
2162  {
2163    rtems_status_code sc;
2164    rtems_chain_control *chain = &bdbuf_cache.read_ahead_chain;
2165
2166    if (rtems_chain_is_empty (chain))
2167    {
2168      sc = rtems_event_send (bdbuf_cache.read_ahead_task,
2169                             RTEMS_BDBUF_READ_AHEAD_WAKE_UP);
2170      if (sc != RTEMS_SUCCESSFUL)
2171        rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_RA_WAKE_UP);
2172    }
2173
2174    rtems_chain_append_unprotected (chain, &dd->read_ahead.node);
2175  }
2176}
2177
2178static void
2179rtems_bdbuf_set_read_ahead_trigger (rtems_disk_device *dd,
2180                                    rtems_blkdev_bnum  block)
2181{
2182  if (dd->read_ahead.trigger != block)
2183  {
2184    rtems_bdbuf_read_ahead_cancel (dd);
2185    dd->read_ahead.trigger = block + 1;
2186    dd->read_ahead.next = block + 2;
2187  }
2188}
2189
2190rtems_status_code
2191rtems_bdbuf_read (rtems_disk_device   *dd,
2192                  rtems_blkdev_bnum    block,
2193                  rtems_bdbuf_buffer **bd_ptr)
2194{
2195  rtems_status_code     sc = RTEMS_SUCCESSFUL;
2196  rtems_bdbuf_buffer   *bd = NULL;
2197  rtems_blkdev_bnum     media_block;
2198
2199  rtems_bdbuf_lock_cache ();
2200
2201  sc = rtems_bdbuf_get_media_block (dd, block, &media_block);
2202  if (sc == RTEMS_SUCCESSFUL)
2203  {
2204    if (rtems_bdbuf_tracer)
2205      printf ("bdbuf:read: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
2206              media_block, block, (unsigned) dd->dev);
2207
2208    bd = rtems_bdbuf_get_buffer_for_access (dd, media_block);
2209    switch (bd->state)
2210    {
2211      case RTEMS_BDBUF_STATE_CACHED:
2212        ++dd->stats.read_hits;
2213        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
2214        break;
2215      case RTEMS_BDBUF_STATE_MODIFIED:
2216        ++dd->stats.read_hits;
2217        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
2218        break;
2219      case RTEMS_BDBUF_STATE_EMPTY:
2220        ++dd->stats.read_misses;
2221        rtems_bdbuf_set_read_ahead_trigger (dd, block);
2222        sc = rtems_bdbuf_execute_read_request (dd, bd, 1);
2223        if (sc == RTEMS_SUCCESSFUL)
2224        {
2225          rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
2226          rtems_chain_extract_unprotected (&bd->link);
2227          rtems_bdbuf_group_obtain (bd);
2228        }
2229        else
2230        {
2231          bd = NULL;
2232        }
2233        break;
2234      default:
2235        rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_4);
2236        break;
2237    }
2238
2239    rtems_bdbuf_check_read_ahead_trigger (dd, block);
2240  }
2241
2242  rtems_bdbuf_unlock_cache ();
2243
2244  *bd_ptr = bd;
2245
2246  return sc;
2247}
2248
2249static rtems_status_code
2250rtems_bdbuf_check_bd_and_lock_cache (rtems_bdbuf_buffer *bd, const char *kind)
2251{
2252  if (bd == NULL)
2253    return RTEMS_INVALID_ADDRESS;
2254  if (rtems_bdbuf_tracer)
2255  {
2256    printf ("bdbuf:%s: %" PRIu32 "\n", kind, bd->block);
2257    rtems_bdbuf_show_users (kind, bd);
2258  }
2259  rtems_bdbuf_lock_cache();
2260
2261  return RTEMS_SUCCESSFUL;
2262}
2263
2264rtems_status_code
2265rtems_bdbuf_release (rtems_bdbuf_buffer *bd)
2266{
2267  rtems_status_code sc = RTEMS_SUCCESSFUL;
2268
2269  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release");
2270  if (sc != RTEMS_SUCCESSFUL)
2271    return sc;
2272
2273  switch (bd->state)
2274  {
2275    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2276      rtems_bdbuf_add_to_lru_list_after_access (bd);
2277      break;
2278    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2279    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2280      rtems_bdbuf_discard_buffer_after_access (bd);
2281      break;
2282    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2283      rtems_bdbuf_add_to_modified_list_after_access (bd);
2284      break;
2285    default:
2286      rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_0);
2287      break;
2288  }
2289
2290  if (rtems_bdbuf_tracer)
2291    rtems_bdbuf_show_usage ();
2292
2293  rtems_bdbuf_unlock_cache ();
2294
2295  return RTEMS_SUCCESSFUL;
2296}
2297
2298rtems_status_code
2299rtems_bdbuf_release_modified (rtems_bdbuf_buffer *bd)
2300{
2301  rtems_status_code sc = RTEMS_SUCCESSFUL;
2302
2303  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release modified");
2304  if (sc != RTEMS_SUCCESSFUL)
2305    return sc;
2306
2307  switch (bd->state)
2308  {
2309    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2310    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2311    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2312      rtems_bdbuf_add_to_modified_list_after_access (bd);
2313      break;
2314    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2315      rtems_bdbuf_discard_buffer_after_access (bd);
2316      break;
2317    default:
2318      rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_6);
2319      break;
2320  }
2321
2322  if (rtems_bdbuf_tracer)
2323    rtems_bdbuf_show_usage ();
2324
2325  rtems_bdbuf_unlock_cache ();
2326
2327  return RTEMS_SUCCESSFUL;
2328}
2329
2330rtems_status_code
2331rtems_bdbuf_sync (rtems_bdbuf_buffer *bd)
2332{
2333  rtems_status_code sc = RTEMS_SUCCESSFUL;
2334
2335  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "sync");
2336  if (sc != RTEMS_SUCCESSFUL)
2337    return sc;
2338
2339  switch (bd->state)
2340  {
2341    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2342    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2343    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2344      rtems_bdbuf_sync_after_access (bd);
2345      break;
2346    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2347      rtems_bdbuf_discard_buffer_after_access (bd);
2348      break;
2349    default:
2350      rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_5);
2351      break;
2352  }
2353
2354  if (rtems_bdbuf_tracer)
2355    rtems_bdbuf_show_usage ();
2356
2357  rtems_bdbuf_unlock_cache ();
2358
2359  return RTEMS_SUCCESSFUL;
2360}
2361
2362rtems_status_code
2363rtems_bdbuf_syncdev (rtems_disk_device *dd)
2364{
2365  if (rtems_bdbuf_tracer)
2366    printf ("bdbuf:syncdev: %08x\n", (unsigned) dd->dev);
2367
2368  /*
2369   * Take the sync lock before locking the cache. Once we have the sync lock we
2370   * can lock the cache. If another thread has the sync lock it will cause this
2371   * thread to block until it owns the sync lock then it can own the cache. The
2372   * sync lock can only be obtained with the cache unlocked.
2373   */
2374  rtems_bdbuf_lock_sync ();
2375  rtems_bdbuf_lock_cache ();
2376
2377  /*
2378   * Set the cache to have a sync active for a specific device and let the swap
2379   * out task know the id of the requester to wake when done.
2380   *
2381   * The swap out task will negate the sync active flag when no more buffers
2382   * for the device are held on the "modified for sync" queues.
2383   */
2384  bdbuf_cache.sync_active    = true;
2385  bdbuf_cache.sync_requester = rtems_task_self ();
2386  bdbuf_cache.sync_device    = dd;
2387
2388  rtems_bdbuf_wake_swapper ();
2389  rtems_bdbuf_unlock_cache ();
2390  rtems_bdbuf_wait_for_transient_event ();
2391  rtems_bdbuf_unlock_sync ();
2392
2393  return RTEMS_SUCCESSFUL;
2394}
2395
2396/**
2397 * Swapout transfer to the driver. The driver will break this I/O into groups
2398 * of consecutive write requests is multiple consecutive buffers are required
2399 * by the driver. The cache is not locked.
2400 *
2401 * @param transfer The transfer transaction.
2402 */
2403static void
2404rtems_bdbuf_swapout_write (rtems_bdbuf_swapout_transfer* transfer)
2405{
2406  rtems_chain_node *node;
2407
2408  if (rtems_bdbuf_tracer)
2409    printf ("bdbuf:swapout transfer: %08x\n", (unsigned) transfer->dd->dev);
2410
2411  /*
2412   * If there are buffers to transfer to the media transfer them.
2413   */
2414  if (!rtems_chain_is_empty (&transfer->bds))
2415  {
2416    /*
2417     * The last block number used when the driver only supports
2418     * continuous blocks in a single request.
2419     */
2420    uint32_t last_block = 0;
2421
2422    rtems_disk_device *dd = transfer->dd;
2423    uint32_t media_blocks_per_block = dd->media_blocks_per_block;
2424    bool need_continuous_blocks =
2425      (dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_MULTISECTOR_CONT) != 0;
2426
2427    /*
2428     * Take as many buffers as configured and pass to the driver. Note, the
2429     * API to the drivers has an array of buffers and if a chain was passed
2430     * we could have just passed the list. If the driver API is updated it
2431     * should be possible to make this change with little effect in this
2432     * code. The array that is passed is broken in design and should be
2433     * removed. Merging members of a struct into the first member is
2434     * trouble waiting to happen.
2435     */
2436    transfer->write_req.status = RTEMS_RESOURCE_IN_USE;
2437    transfer->write_req.bufnum = 0;
2438
2439    while ((node = rtems_chain_get_unprotected(&transfer->bds)) != NULL)
2440    {
2441      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2442      bool                write = false;
2443
2444      /*
2445       * If the device only accepts sequential buffers and this is not the
2446       * first buffer (the first is always sequential, and the buffer is not
2447       * sequential then put the buffer back on the transfer chain and write
2448       * the committed buffers.
2449       */
2450
2451      if (rtems_bdbuf_tracer)
2452        printf ("bdbuf:swapout write: bd:%" PRIu32 ", bufnum:%" PRIu32 " mode:%s\n",
2453                bd->block, transfer->write_req.bufnum,
2454                need_continuous_blocks ? "MULTI" : "SCAT");
2455
2456      if (need_continuous_blocks && transfer->write_req.bufnum &&
2457          bd->block != last_block + media_blocks_per_block)
2458      {
2459        rtems_chain_prepend_unprotected (&transfer->bds, &bd->link);
2460        write = true;
2461      }
2462      else
2463      {
2464        rtems_blkdev_sg_buffer* buf;
2465        buf = &transfer->write_req.bufs[transfer->write_req.bufnum];
2466        transfer->write_req.bufnum++;
2467        buf->user   = bd;
2468        buf->block  = bd->block;
2469        buf->length = dd->block_size;
2470        buf->buffer = bd->buffer;
2471        last_block  = bd->block;
2472      }
2473
2474      /*
2475       * Perform the transfer if there are no more buffers, or the transfer
2476       * size has reached the configured max. value.
2477       */
2478
2479      if (rtems_chain_is_empty (&transfer->bds) ||
2480          (transfer->write_req.bufnum >= bdbuf_config.max_write_blocks))
2481        write = true;
2482
2483      if (write)
2484      {
2485        rtems_bdbuf_execute_transfer_request (dd, &transfer->write_req, false);
2486
2487        transfer->write_req.status = RTEMS_RESOURCE_IN_USE;
2488        transfer->write_req.bufnum = 0;
2489      }
2490    }
2491
2492    /*
2493     * If sync'ing and the deivce is capability of handling a sync IO control
2494     * call perform the call.
2495     */
2496    if (transfer->syncing &&
2497        (dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_SYNC))
2498    {
2499      /* int result = */ dd->ioctl (dd->phys_dev, RTEMS_BLKDEV_REQ_SYNC, NULL);
2500      /* How should the error be handled ? */
2501    }
2502  }
2503}
2504
2505/**
2506 * Process the modified list of buffers. There is a sync or modified list that
2507 * needs to be handled so we have a common function to do the work.
2508 *
2509 * @param dd_ptr Pointer to the device to handle. If BDBUF_INVALID_DEV no
2510 * device is selected so select the device of the first buffer to be written to
2511 * disk.
2512 * @param chain The modified chain to process.
2513 * @param transfer The chain to append buffers to be written too.
2514 * @param sync_active If true this is a sync operation so expire all timers.
2515 * @param update_timers If true update the timers.
2516 * @param timer_delta It update_timers is true update the timers by this
2517 *                    amount.
2518 */
2519static void
2520rtems_bdbuf_swapout_modified_processing (rtems_disk_device  **dd_ptr,
2521                                         rtems_chain_control* chain,
2522                                         rtems_chain_control* transfer,
2523                                         bool                 sync_active,
2524                                         bool                 update_timers,
2525                                         uint32_t             timer_delta)
2526{
2527  if (!rtems_chain_is_empty (chain))
2528  {
2529    rtems_chain_node* node = rtems_chain_head (chain);
2530    bool              sync_all;
2531
2532    node = node->next;
2533
2534    /*
2535     * A sync active with no valid dev means sync all.
2536     */
2537    if (sync_active && (*dd_ptr == BDBUF_INVALID_DEV))
2538      sync_all = true;
2539    else
2540      sync_all = false;
2541
2542    while (!rtems_chain_is_tail (chain, node))
2543    {
2544      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2545
2546      /*
2547       * Check if the buffer's hold timer has reached 0. If a sync is active
2548       * or someone waits for a buffer written force all the timers to 0.
2549       *
2550       * @note Lots of sync requests will skew this timer. It should be based
2551       *       on TOD to be accurate. Does it matter ?
2552       */
2553      if (sync_all || (sync_active && (*dd_ptr == bd->dd))
2554          || rtems_bdbuf_has_buffer_waiters ())
2555        bd->hold_timer = 0;
2556
2557      if (bd->hold_timer)
2558      {
2559        if (update_timers)
2560        {
2561          if (bd->hold_timer > timer_delta)
2562            bd->hold_timer -= timer_delta;
2563          else
2564            bd->hold_timer = 0;
2565        }
2566
2567        if (bd->hold_timer)
2568        {
2569          node = node->next;
2570          continue;
2571        }
2572      }
2573
2574      /*
2575       * This assumes we can set it to BDBUF_INVALID_DEV which is just an
2576       * assumption. Cannot use the transfer list being empty the sync dev
2577       * calls sets the dev to use.
2578       */
2579      if (*dd_ptr == BDBUF_INVALID_DEV)
2580        *dd_ptr = bd->dd;
2581
2582      if (bd->dd == *dd_ptr)
2583      {
2584        rtems_chain_node* next_node = node->next;
2585        rtems_chain_node* tnode = rtems_chain_tail (transfer);
2586
2587        /*
2588         * The blocks on the transfer list are sorted in block order. This
2589         * means multi-block transfers for drivers that require consecutive
2590         * blocks perform better with sorted blocks and for real disks it may
2591         * help lower head movement.
2592         */
2593
2594        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
2595
2596        rtems_chain_extract_unprotected (node);
2597
2598        tnode = tnode->previous;
2599
2600        while (node && !rtems_chain_is_head (transfer, tnode))
2601        {
2602          rtems_bdbuf_buffer* tbd = (rtems_bdbuf_buffer*) tnode;
2603
2604          if (bd->block > tbd->block)
2605          {
2606            rtems_chain_insert_unprotected (tnode, node);
2607            node = NULL;
2608          }
2609          else
2610            tnode = tnode->previous;
2611        }
2612
2613        if (node)
2614          rtems_chain_prepend_unprotected (transfer, node);
2615
2616        node = next_node;
2617      }
2618      else
2619      {
2620        node = node->next;
2621      }
2622    }
2623  }
2624}
2625
2626/**
2627 * Process the cache's modified buffers. Check the sync list first then the
2628 * modified list extracting the buffers suitable to be written to disk. We have
2629 * a device at a time. The task level loop will repeat this operation while
2630 * there are buffers to be written. If the transfer fails place the buffers
2631 * back on the modified list and try again later. The cache is unlocked while
2632 * the buffers are being written to disk.
2633 *
2634 * @param timer_delta It update_timers is true update the timers by this
2635 *                    amount.
2636 * @param update_timers If true update the timers.
2637 * @param transfer The transfer transaction data.
2638 *
2639 * @retval true Buffers where written to disk so scan again.
2640 * @retval false No buffers where written to disk.
2641 */
2642static bool
2643rtems_bdbuf_swapout_processing (unsigned long                 timer_delta,
2644                                bool                          update_timers,
2645                                rtems_bdbuf_swapout_transfer* transfer)
2646{
2647  rtems_bdbuf_swapout_worker* worker;
2648  bool                        transfered_buffers = false;
2649
2650  rtems_bdbuf_lock_cache ();
2651
2652  /*
2653   * If a sync is active do not use a worker because the current code does not
2654   * cleaning up after. We need to know the buffers have been written when
2655   * syncing to release sync lock and currently worker threads do not return to
2656   * here. We do not know the worker is the last in a sequence of sync writes
2657   * until after we have it running so we do not know to tell it to release the
2658   * lock. The simplest solution is to get the main swap out task perform all
2659   * sync operations.
2660   */
2661  if (bdbuf_cache.sync_active)
2662    worker = NULL;
2663  else
2664  {
2665    worker = (rtems_bdbuf_swapout_worker*)
2666      rtems_chain_get_unprotected (&bdbuf_cache.swapout_free_workers);
2667    if (worker)
2668      transfer = &worker->transfer;
2669  }
2670
2671  rtems_chain_initialize_empty (&transfer->bds);
2672  transfer->dd = BDBUF_INVALID_DEV;
2673  transfer->syncing = bdbuf_cache.sync_active;
2674
2675  /*
2676   * When the sync is for a device limit the sync to that device. If the sync
2677   * is for a buffer handle process the devices in the order on the sync
2678   * list. This means the dev is BDBUF_INVALID_DEV.
2679   */
2680  if (bdbuf_cache.sync_active)
2681    transfer->dd = bdbuf_cache.sync_device;
2682
2683  /*
2684   * If we have any buffers in the sync queue move them to the modified
2685   * list. The first sync buffer will select the device we use.
2686   */
2687  rtems_bdbuf_swapout_modified_processing (&transfer->dd,
2688                                           &bdbuf_cache.sync,
2689                                           &transfer->bds,
2690                                           true, false,
2691                                           timer_delta);
2692
2693  /*
2694   * Process the cache's modified list.
2695   */
2696  rtems_bdbuf_swapout_modified_processing (&transfer->dd,
2697                                           &bdbuf_cache.modified,
2698                                           &transfer->bds,
2699                                           bdbuf_cache.sync_active,
2700                                           update_timers,
2701                                           timer_delta);
2702
2703  /*
2704   * We have all the buffers that have been modified for this device so the
2705   * cache can be unlocked because the state of each buffer has been set to
2706   * TRANSFER.
2707   */
2708  rtems_bdbuf_unlock_cache ();
2709
2710  /*
2711   * If there are buffers to transfer to the media transfer them.
2712   */
2713  if (!rtems_chain_is_empty (&transfer->bds))
2714  {
2715    if (worker)
2716    {
2717      rtems_status_code sc = rtems_event_send (worker->id,
2718                                               RTEMS_BDBUF_SWAPOUT_SYNC);
2719      if (sc != RTEMS_SUCCESSFUL)
2720        rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_SO_WAKE_2);
2721    }
2722    else
2723    {
2724      rtems_bdbuf_swapout_write (transfer);
2725    }
2726
2727    transfered_buffers = true;
2728  }
2729
2730  if (bdbuf_cache.sync_active && !transfered_buffers)
2731  {
2732    rtems_id sync_requester;
2733    rtems_bdbuf_lock_cache ();
2734    sync_requester = bdbuf_cache.sync_requester;
2735    bdbuf_cache.sync_active = false;
2736    bdbuf_cache.sync_requester = 0;
2737    rtems_bdbuf_unlock_cache ();
2738    if (sync_requester)
2739      rtems_event_transient_send (sync_requester);
2740  }
2741
2742  return transfered_buffers;
2743}
2744
2745/**
2746 * The swapout worker thread body.
2747 *
2748 * @param arg A pointer to the worker thread's private data.
2749 * @return rtems_task Not used.
2750 */
2751static rtems_task
2752rtems_bdbuf_swapout_worker_task (rtems_task_argument arg)
2753{
2754  rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) arg;
2755
2756  while (worker->enabled)
2757  {
2758    rtems_bdbuf_wait_for_event (RTEMS_BDBUF_SWAPOUT_SYNC);
2759
2760    rtems_bdbuf_swapout_write (&worker->transfer);
2761
2762    rtems_bdbuf_lock_cache ();
2763
2764    rtems_chain_initialize_empty (&worker->transfer.bds);
2765    worker->transfer.dd = BDBUF_INVALID_DEV;
2766
2767    rtems_chain_append_unprotected (&bdbuf_cache.swapout_free_workers, &worker->link);
2768
2769    rtems_bdbuf_unlock_cache ();
2770  }
2771
2772  free (worker);
2773
2774  rtems_task_delete (RTEMS_SELF);
2775}
2776
2777/**
2778 * Close the swapout worker threads.
2779 */
2780static void
2781rtems_bdbuf_swapout_workers_close (void)
2782{
2783  rtems_chain_node* node;
2784
2785  rtems_bdbuf_lock_cache ();
2786
2787  node = rtems_chain_first (&bdbuf_cache.swapout_free_workers);
2788  while (!rtems_chain_is_tail (&bdbuf_cache.swapout_free_workers, node))
2789  {
2790    rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) node;
2791    worker->enabled = false;
2792    rtems_event_send (worker->id, RTEMS_BDBUF_SWAPOUT_SYNC);
2793    node = rtems_chain_next (node);
2794  }
2795
2796  rtems_bdbuf_unlock_cache ();
2797}
2798
2799/**
2800 * Body of task which takes care on flushing modified buffers to the disk.
2801 *
2802 * @param arg A pointer to the global cache data. Use the global variable and
2803 *            not this.
2804 * @return rtems_task Not used.
2805 */
2806static rtems_task
2807rtems_bdbuf_swapout_task (rtems_task_argument arg)
2808{
2809  rtems_bdbuf_swapout_transfer* transfer = (rtems_bdbuf_swapout_transfer *) arg;
2810  uint32_t                      period_in_ticks;
2811  const uint32_t                period_in_msecs = bdbuf_config.swapout_period;
2812  uint32_t                      timer_delta;
2813
2814  /*
2815   * Localise the period.
2816   */
2817  period_in_ticks = RTEMS_MICROSECONDS_TO_TICKS (period_in_msecs * 1000);
2818
2819  /*
2820   * This is temporary. Needs to be changed to use the real time clock.
2821   */
2822  timer_delta = period_in_msecs;
2823
2824  while (bdbuf_cache.swapout_enabled)
2825  {
2826    rtems_event_set   out;
2827    rtems_status_code sc;
2828
2829    /*
2830     * Only update the timers once in the processing cycle.
2831     */
2832    bool update_timers = true;
2833
2834    /*
2835     * If we write buffers to any disk perform a check again. We only write a
2836     * single device at a time and the cache may have more than one device's
2837     * buffers modified waiting to be written.
2838     */
2839    bool transfered_buffers;
2840
2841    do
2842    {
2843      transfered_buffers = false;
2844
2845      /*
2846       * Extact all the buffers we find for a specific device. The device is
2847       * the first one we find on a modified list. Process the sync queue of
2848       * buffers first.
2849       */
2850      if (rtems_bdbuf_swapout_processing (timer_delta,
2851                                          update_timers,
2852                                          transfer))
2853      {
2854        transfered_buffers = true;
2855      }
2856
2857      /*
2858       * Only update the timers once.
2859       */
2860      update_timers = false;
2861    }
2862    while (transfered_buffers);
2863
2864    sc = rtems_event_receive (RTEMS_BDBUF_SWAPOUT_SYNC,
2865                              RTEMS_EVENT_ALL | RTEMS_WAIT,
2866                              period_in_ticks,
2867                              &out);
2868
2869    if ((sc != RTEMS_SUCCESSFUL) && (sc != RTEMS_TIMEOUT))
2870      rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_SWAPOUT_RE);
2871  }
2872
2873  rtems_bdbuf_swapout_workers_close ();
2874
2875  free (transfer);
2876
2877  rtems_task_delete (RTEMS_SELF);
2878}
2879
2880static void
2881rtems_bdbuf_purge_list (rtems_chain_control *purge_list)
2882{
2883  bool wake_buffer_waiters = false;
2884  rtems_chain_node *node = NULL;
2885
2886  while ((node = rtems_chain_get_unprotected (purge_list)) != NULL)
2887  {
2888    rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
2889
2890    if (bd->waiters == 0)
2891      wake_buffer_waiters = true;
2892
2893    rtems_bdbuf_discard_buffer (bd);
2894  }
2895
2896  if (wake_buffer_waiters)
2897    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
2898}
2899
2900static void
2901rtems_bdbuf_gather_for_purge (rtems_chain_control *purge_list,
2902                              const rtems_disk_device *dd)
2903{
2904  rtems_bdbuf_buffer *stack [RTEMS_BDBUF_AVL_MAX_HEIGHT];
2905  rtems_bdbuf_buffer **prev = stack;
2906  rtems_bdbuf_buffer *cur = bdbuf_cache.tree;
2907
2908  *prev = NULL;
2909
2910  while (cur != NULL)
2911  {
2912    if (cur->dd == dd)
2913    {
2914      switch (cur->state)
2915      {
2916        case RTEMS_BDBUF_STATE_FREE:
2917        case RTEMS_BDBUF_STATE_EMPTY:
2918        case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2919        case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
2920          break;
2921        case RTEMS_BDBUF_STATE_SYNC:
2922          rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
2923          /* Fall through */
2924        case RTEMS_BDBUF_STATE_MODIFIED:
2925          rtems_bdbuf_group_release (cur);
2926          /* Fall through */
2927        case RTEMS_BDBUF_STATE_CACHED:
2928          rtems_chain_extract_unprotected (&cur->link);
2929          rtems_chain_append_unprotected (purge_list, &cur->link);
2930          break;
2931        case RTEMS_BDBUF_STATE_TRANSFER:
2932          rtems_bdbuf_set_state (cur, RTEMS_BDBUF_STATE_TRANSFER_PURGED);
2933          break;
2934        case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2935        case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2936        case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2937          rtems_bdbuf_set_state (cur, RTEMS_BDBUF_STATE_ACCESS_PURGED);
2938          break;
2939        default:
2940          rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_STATE_11);
2941      }
2942    }
2943
2944    if (cur->avl.left != NULL)
2945    {
2946      /* Left */
2947      ++prev;
2948      *prev = cur;
2949      cur = cur->avl.left;
2950    }
2951    else if (cur->avl.right != NULL)
2952    {
2953      /* Right */
2954      ++prev;
2955      *prev = cur;
2956      cur = cur->avl.right;
2957    }
2958    else
2959    {
2960      while (*prev != NULL
2961             && (cur == (*prev)->avl.right || (*prev)->avl.right == NULL))
2962      {
2963        /* Up */
2964        cur = *prev;
2965        --prev;
2966      }
2967      if (*prev != NULL)
2968        /* Right */
2969        cur = (*prev)->avl.right;
2970      else
2971        /* Finished */
2972        cur = NULL;
2973    }
2974  }
2975}
2976
2977void
2978rtems_bdbuf_purge_dev (rtems_disk_device *dd)
2979{
2980  rtems_chain_control purge_list;
2981
2982  rtems_chain_initialize_empty (&purge_list);
2983  rtems_bdbuf_lock_cache ();
2984  rtems_bdbuf_read_ahead_reset (dd);
2985  rtems_bdbuf_gather_for_purge (&purge_list, dd);
2986  rtems_bdbuf_purge_list (&purge_list);
2987  rtems_bdbuf_unlock_cache ();
2988}
2989
2990rtems_status_code
2991rtems_bdbuf_set_block_size (rtems_disk_device *dd,
2992                            uint32_t           block_size,
2993                            bool               sync)
2994{
2995  rtems_status_code sc = RTEMS_SUCCESSFUL;
2996
2997  /*
2998   * We do not care about the synchronization status since we will purge the
2999   * device later.
3000   */
3001  if (sync)
3002    rtems_bdbuf_syncdev (dd);
3003
3004  rtems_bdbuf_lock_cache ();
3005
3006  if (block_size > 0)
3007  {
3008    size_t bds_per_group = rtems_bdbuf_bds_per_group (block_size);
3009
3010    if (bds_per_group != 0)
3011    {
3012      int block_to_media_block_shift = 0;
3013      uint32_t media_blocks_per_block = block_size / dd->media_block_size;
3014      uint32_t one = 1;
3015
3016      while ((one << block_to_media_block_shift) < media_blocks_per_block)
3017      {
3018        ++block_to_media_block_shift;
3019      }
3020
3021      if ((dd->media_block_size << block_to_media_block_shift) != block_size)
3022        block_to_media_block_shift = -1;
3023
3024      dd->block_size = block_size;
3025      dd->block_count = dd->size / media_blocks_per_block;
3026      dd->media_blocks_per_block = media_blocks_per_block;
3027      dd->block_to_media_block_shift = block_to_media_block_shift;
3028      dd->bds_per_group = bds_per_group;
3029
3030      rtems_bdbuf_purge_dev (dd);
3031    }
3032    else
3033    {
3034      sc = RTEMS_INVALID_NUMBER;
3035    }
3036  }
3037  else
3038  {
3039    sc = RTEMS_INVALID_NUMBER;
3040  }
3041
3042  rtems_bdbuf_unlock_cache ();
3043
3044  return sc;
3045}
3046
3047static rtems_task
3048rtems_bdbuf_read_ahead_task (rtems_task_argument arg)
3049{
3050  rtems_chain_control *chain = &bdbuf_cache.read_ahead_chain;
3051
3052  while (bdbuf_cache.read_ahead_enabled)
3053  {
3054    rtems_chain_node *node;
3055
3056    rtems_bdbuf_wait_for_event (RTEMS_BDBUF_READ_AHEAD_WAKE_UP);
3057    rtems_bdbuf_lock_cache ();
3058
3059    while ((node = rtems_chain_get_unprotected (chain)) != NULL)
3060    {
3061      rtems_disk_device *dd = (rtems_disk_device *)
3062        ((char *) node - offsetof (rtems_disk_device, read_ahead.node));
3063      rtems_blkdev_bnum block = dd->read_ahead.next;
3064      rtems_blkdev_bnum media_block = 0;
3065      rtems_status_code sc =
3066        rtems_bdbuf_get_media_block (dd, block, &media_block);
3067
3068      rtems_chain_set_off_chain (&dd->read_ahead.node);
3069
3070      if (sc == RTEMS_SUCCESSFUL)
3071      {
3072        rtems_bdbuf_buffer *bd =
3073          rtems_bdbuf_get_buffer_for_read_ahead (dd, media_block);
3074
3075        if (bd != NULL)
3076        {
3077          uint32_t transfer_count = dd->block_count - block;
3078          uint32_t max_transfer_count = bdbuf_config.max_read_ahead_blocks;
3079
3080          if (transfer_count >= max_transfer_count)
3081          {
3082            transfer_count = max_transfer_count;
3083            dd->read_ahead.trigger = block + transfer_count / 2;
3084            dd->read_ahead.next = block + transfer_count;
3085          }
3086          else
3087          {
3088            dd->read_ahead.trigger = RTEMS_DISK_READ_AHEAD_NO_TRIGGER;
3089          }
3090
3091          ++dd->stats.read_ahead_transfers;
3092          rtems_bdbuf_execute_read_request (dd, bd, transfer_count);
3093        }
3094      }
3095      else
3096      {
3097        dd->read_ahead.trigger = RTEMS_DISK_READ_AHEAD_NO_TRIGGER;
3098      }
3099    }
3100
3101    rtems_bdbuf_unlock_cache ();
3102  }
3103
3104  rtems_task_delete (RTEMS_SELF);
3105}
3106
3107void rtems_bdbuf_get_device_stats (const rtems_disk_device *dd,
3108                                   rtems_blkdev_stats      *stats)
3109{
3110  rtems_bdbuf_lock_cache ();
3111  *stats = dd->stats;
3112  rtems_bdbuf_unlock_cache ();
3113}
3114
3115void rtems_bdbuf_reset_device_stats (rtems_disk_device *dd)
3116{
3117  rtems_bdbuf_lock_cache ();
3118  memset (&dd->stats, 0, sizeof(dd->stats));
3119  rtems_bdbuf_unlock_cache ();
3120}
Note: See TracBrowser for help on using the repository browser.