source: rtems/cpukit/libblock/src/bdbuf.c @ 9f0a68c

4.115
Last change on this file since 9f0a68c was 9f0a68c, checked in by Sebastian Huber <sebastian.huber@…>, on 10/31/12 at 10:54:39

libblock: Block device transfer request API change

Add and use rtems_blkdev_request_done(). Block device transfer requests
must signal the completion status now with rtems_blkdev_request_done().
The return value of the block device IO control will be ignored for
transfer requests.

The first parameter of rtems_blkdev_request_cb is now the transfer
request structure.

Renamed rtems_blkdev_request::req_done to rtems_blkdev_request::done to
break third party drivers at compile time, otherwise this API change
would result in runtime errors.

  • Property mode set to 100644
File size: 83.9 KB
Line 
1/**
2 * @file
3 *
4 * @ingroup rtems_bdbuf
5 *
6 * Block device buffer management.
7 */
8
9/*
10 * Disk I/O buffering
11 * Buffer managment
12 *
13 * Copyright (C) 2001 OKTET Ltd., St.-Peterburg, Russia
14 * Author: Andrey G. Ivanov <Andrey.Ivanov@oktet.ru>
15 *         Victor V. Vengerov <vvv@oktet.ru>
16 *         Alexander Kukuta <kam@oktet.ru>
17 *
18 * Copyright (C) 2008,2009 Chris Johns <chrisj@rtems.org>
19 *    Rewritten to remove score mutex access. Fixes many performance
20 *    issues.
21 *
22 * Copyright (c) 2009-2012 embedded brains GmbH.
23 */
24
25/**
26 * Set to 1 to enable debug tracing.
27 */
28#define RTEMS_BDBUF_TRACE 0
29
30#if HAVE_CONFIG_H
31#include "config.h"
32#endif
33#include <limits.h>
34#include <errno.h>
35#include <stdio.h>
36#include <string.h>
37#include <inttypes.h>
38
39#include <rtems.h>
40#include <rtems/error.h>
41#include <rtems/malloc.h>
42
43#include "rtems/bdbuf.h"
44
45#define BDBUF_INVALID_DEV NULL
46
47/*
48 * Simpler label for this file.
49 */
50#define bdbuf_config rtems_bdbuf_configuration
51
52/**
53 * A swapout transfer transaction data. This data is passed to a worked thread
54 * to handle the write phase of the transfer.
55 */
56typedef struct rtems_bdbuf_swapout_transfer
57{
58  rtems_chain_control   bds;         /**< The transfer list of BDs. */
59  rtems_disk_device    *dd;          /**< The device the transfer is for. */
60  bool                  syncing;     /**< The data is a sync'ing. */
61  rtems_blkdev_request* write_req;   /**< The write request array. */
62} rtems_bdbuf_swapout_transfer;
63
64/**
65 * Swapout worker thread. These are available to take processing from the
66 * main swapout thread and handle the I/O operation.
67 */
68typedef struct rtems_bdbuf_swapout_worker
69{
70  rtems_chain_node             link;     /**< The threads sit on a chain when
71                                          * idle. */
72  rtems_id                     id;       /**< The id of the task so we can wake
73                                          * it. */
74  bool                         enabled;  /**< The worker is enabled. */
75  rtems_bdbuf_swapout_transfer transfer; /**< The transfer data for this
76                                          * thread. */
77} rtems_bdbuf_swapout_worker;
78
79/**
80 * Buffer waiters synchronization.
81 */
82typedef struct rtems_bdbuf_waiters {
83  unsigned count;
84  rtems_id sema;
85} rtems_bdbuf_waiters;
86
87/**
88 * The BD buffer cache.
89 */
90typedef struct rtems_bdbuf_cache
91{
92  rtems_id            swapout;           /**< Swapout task ID */
93  bool                swapout_enabled;   /**< Swapout is only running if
94                                          * enabled. Set to false to kill the
95                                          * swap out task. It deletes itself. */
96  rtems_chain_control swapout_workers;   /**< The work threads for the swapout
97                                          * task. */
98
99  rtems_bdbuf_buffer* bds;               /**< Pointer to table of buffer
100                                          * descriptors. */
101  void*               buffers;           /**< The buffer's memory. */
102  size_t              buffer_min_count;  /**< Number of minimum size buffers
103                                          * that fit the buffer memory. */
104  size_t              max_bds_per_group; /**< The number of BDs of minimum
105                                          * buffer size that fit in a group. */
106  uint32_t            flags;             /**< Configuration flags. */
107
108  rtems_id            lock;              /**< The cache lock. It locks all
109                                          * cache data, BD and lists. */
110  rtems_id            sync_lock;         /**< Sync calls block writes. */
111  bool                sync_active;       /**< True if a sync is active. */
112  rtems_id            sync_requester;    /**< The sync requester. */
113  rtems_disk_device  *sync_device;       /**< The device to sync and
114                                          * BDBUF_INVALID_DEV not a device
115                                          * sync. */
116
117  rtems_bdbuf_buffer* tree;              /**< Buffer descriptor lookup AVL tree
118                                          * root. There is only one. */
119  rtems_chain_control lru;               /**< Least recently used list */
120  rtems_chain_control modified;          /**< Modified buffers list */
121  rtems_chain_control sync;              /**< Buffers to sync list */
122
123  rtems_bdbuf_waiters access_waiters;    /**< Wait for a buffer in
124                                          * ACCESS_CACHED, ACCESS_MODIFIED or
125                                          * ACCESS_EMPTY
126                                          * state. */
127  rtems_bdbuf_waiters transfer_waiters;  /**< Wait for a buffer in TRANSFER
128                                          * state. */
129  rtems_bdbuf_waiters buffer_waiters;    /**< Wait for a buffer and no one is
130                                          * available. */
131
132  size_t              group_count;       /**< The number of groups. */
133  rtems_bdbuf_group*  groups;            /**< The groups. */
134  rtems_id            read_ahead_task;   /**< Read-ahead task */
135  rtems_chain_control read_ahead_chain;  /**< Read-ahead request chain */
136  bool                read_ahead_enabled; /**< Read-ahead enabled */
137
138  bool                initialised;       /**< Initialised state. */
139} rtems_bdbuf_cache;
140
141/**
142 * Fatal errors
143 */
144#define RTEMS_BLKDEV_FATAL_ERROR(n) \
145  (((uint32_t)'B' << 24) | ((uint32_t)(n) & (uint32_t)0x00FFFFFF))
146
147#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_11      RTEMS_BLKDEV_FATAL_ERROR(1)
148#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_4       RTEMS_BLKDEV_FATAL_ERROR(2)
149#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_5       RTEMS_BLKDEV_FATAL_ERROR(3)
150#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_6       RTEMS_BLKDEV_FATAL_ERROR(4)
151#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_7       RTEMS_BLKDEV_FATAL_ERROR(5)
152#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_8       RTEMS_BLKDEV_FATAL_ERROR(6)
153#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_9       RTEMS_BLKDEV_FATAL_ERROR(7)
154#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_10      RTEMS_BLKDEV_FATAL_ERROR(8)
155#define RTEMS_BLKDEV_FATAL_BDBUF_TREE_RM       RTEMS_BLKDEV_FATAL_ERROR(9)
156#define RTEMS_BLKDEV_FATAL_BDBUF_SWAPOUT       RTEMS_BLKDEV_FATAL_ERROR(10)
157
158/*
159 * The lock/unlock fatal errors occur in case the bdbuf is not initialized with
160 * rtems_bdbuf_init().  General system corruption like stack overflow etc. may
161 * also trigger these fatal errors.
162 */
163#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK     RTEMS_BLKDEV_FATAL_ERROR(11)
164#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK   RTEMS_BLKDEV_FATAL_ERROR(12)
165#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_LOCK    RTEMS_BLKDEV_FATAL_ERROR(13)
166#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_UNLOCK  RTEMS_BLKDEV_FATAL_ERROR(14)
167
168#define RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_DIS   RTEMS_BLKDEV_FATAL_ERROR(15)
169#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_2  RTEMS_BLKDEV_FATAL_ERROR(16)
170#define RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_RST   RTEMS_BLKDEV_FATAL_ERROR(17)
171#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_TO RTEMS_BLKDEV_FATAL_ERROR(18)
172#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAKE    RTEMS_BLKDEV_FATAL_ERROR(19)
173#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE       RTEMS_BLKDEV_FATAL_ERROR(20)
174#define RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM      RTEMS_BLKDEV_FATAL_ERROR(21)
175#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_CREATE  RTEMS_BLKDEV_FATAL_ERROR(22)
176#define BLKDEV_FATAL_BDBUF_SWAPOUT_RE          RTEMS_BLKDEV_FATAL_ERROR(24)
177#define BLKDEV_FATAL_BDBUF_SWAPOUT_TS          RTEMS_BLKDEV_FATAL_ERROR(25)
178#define RTEMS_BLKDEV_FATAL_BDBUF_WAIT_EVNT     RTEMS_BLKDEV_FATAL_ERROR(26)
179#define RTEMS_BLKDEV_FATAL_BDBUF_RECYCLE       RTEMS_BLKDEV_FATAL_ERROR(27)
180#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_0       RTEMS_BLKDEV_FATAL_ERROR(28)
181#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_1       RTEMS_BLKDEV_FATAL_ERROR(29)
182#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_2       RTEMS_BLKDEV_FATAL_ERROR(30)
183#define RTEMS_BLKDEV_FATAL_BDBUF_RA_WAKE_UP    RTEMS_BLKDEV_FATAL_ERROR(31)
184#define RTEMS_BLKDEV_FATAL_BDBUF_WAIT_TRANS_EVNT RTEMS_BLKDEV_FATAL_ERROR(32)
185
186/**
187 * The events used in this code. These should be system events rather than
188 * application events.
189 */
190#define RTEMS_BDBUF_SWAPOUT_SYNC   RTEMS_EVENT_2
191#define RTEMS_BDBUF_READ_AHEAD_WAKE_UP RTEMS_EVENT_1
192
193/**
194 * Lock semaphore attributes. This is used for locking type mutexes.
195 *
196 * @warning Priority inheritance is on.
197 */
198#define RTEMS_BDBUF_CACHE_LOCK_ATTRIBS \
199  (RTEMS_PRIORITY | RTEMS_BINARY_SEMAPHORE | \
200   RTEMS_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
201
202/**
203 * Waiter semaphore attributes.
204 *
205 * @warning Do not configure as inherit priority. If a driver is in the driver
206 *          initialisation table this locked semaphore will have the IDLE task
207 *          as the holder and a blocking task will raise the priority of the
208 *          IDLE task which can cause unsual side effects.
209 */
210#define RTEMS_BDBUF_CACHE_WAITER_ATTRIBS \
211  (RTEMS_PRIORITY | RTEMS_SIMPLE_BINARY_SEMAPHORE | \
212   RTEMS_NO_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
213
214/**
215 * Waiter timeout. Set to non-zero to find some info on a waiter that is
216 * waiting too long.
217 */
218#define RTEMS_BDBUF_WAIT_TIMEOUT RTEMS_NO_TIMEOUT
219#if !defined (RTEMS_BDBUF_WAIT_TIMEOUT)
220#define RTEMS_BDBUF_WAIT_TIMEOUT \
221  (TOD_MICROSECONDS_TO_TICKS (20000000))
222#endif
223
224static rtems_task rtems_bdbuf_swapout_task(rtems_task_argument arg);
225
226static rtems_task rtems_bdbuf_read_ahead_task(rtems_task_argument arg);
227
228/**
229 * The Buffer Descriptor cache.
230 */
231static rtems_bdbuf_cache bdbuf_cache;
232
233#if RTEMS_BDBUF_TRACE
234/**
235 * If true output the trace message.
236 */
237bool rtems_bdbuf_tracer;
238
239/**
240 * Return the number of items on the list.
241 *
242 * @param list The chain control.
243 * @return uint32_t The number of items on the list.
244 */
245uint32_t
246rtems_bdbuf_list_count (rtems_chain_control* list)
247{
248  rtems_chain_node* node = rtems_chain_first (list);
249  uint32_t          count = 0;
250  while (!rtems_chain_is_tail (list, node))
251  {
252    count++;
253    node = rtems_chain_next (node);
254  }
255  return count;
256}
257
258/**
259 * Show the usage for the bdbuf cache.
260 */
261void
262rtems_bdbuf_show_usage (void)
263{
264  uint32_t group;
265  uint32_t total = 0;
266  uint32_t val;
267
268  for (group = 0; group < bdbuf_cache.group_count; group++)
269    total += bdbuf_cache.groups[group].users;
270  printf ("bdbuf:group users=%lu", total);
271  val = rtems_bdbuf_list_count (&bdbuf_cache.lru);
272  printf (", lru=%lu", val);
273  total = val;
274  val = rtems_bdbuf_list_count (&bdbuf_cache.modified);
275  printf (", mod=%lu", val);
276  total += val;
277  val = rtems_bdbuf_list_count (&bdbuf_cache.sync);
278  printf (", sync=%lu", val);
279  total += val;
280  printf (", total=%lu\n", total);
281}
282
283/**
284 * Show the users for a group of a bd.
285 *
286 * @param where A label to show the context of output.
287 * @param bd The bd to show the users of.
288 */
289void
290rtems_bdbuf_show_users (const char* where, rtems_bdbuf_buffer* bd)
291{
292  const char* states[] =
293    { "FR", "EM", "CH", "AC", "AM", "AE", "AP", "MD", "SY", "TR", "TP" };
294
295  printf ("bdbuf:users: %15s: [%" PRIu32 " (%s)] %td:%td = %" PRIu32 " %s\n",
296          where,
297          bd->block, states[bd->state],
298          bd->group - bdbuf_cache.groups,
299          bd - bdbuf_cache.bds,
300          bd->group->users,
301          bd->group->users > 8 ? "<<<<<<<" : "");
302}
303#else
304#define rtems_bdbuf_tracer (0)
305#define rtems_bdbuf_show_usage() ((void) 0)
306#define rtems_bdbuf_show_users(_w, _b) ((void) 0)
307#endif
308
309/**
310 * The default maximum height of 32 allows for AVL trees having between
311 * 5,704,880 and 4,294,967,295 nodes, depending on order of insertion.  You may
312 * change this compile-time constant as you wish.
313 */
314#ifndef RTEMS_BDBUF_AVL_MAX_HEIGHT
315#define RTEMS_BDBUF_AVL_MAX_HEIGHT (32)
316#endif
317
318static void
319rtems_bdbuf_fatal (rtems_bdbuf_buf_state state, uint32_t error)
320{
321  rtems_fatal_error_occurred ((((uint32_t) state) << 16) | error);
322}
323
324/**
325 * Searches for the node with specified dd/block.
326 *
327 * @param root pointer to the root node of the AVL-Tree
328 * @param dd disk device search key
329 * @param block block search key
330 * @retval NULL node with the specified dd/block is not found
331 * @return pointer to the node with specified dd/block
332 */
333static rtems_bdbuf_buffer *
334rtems_bdbuf_avl_search (rtems_bdbuf_buffer** root,
335                        const rtems_disk_device *dd,
336                        rtems_blkdev_bnum    block)
337{
338  rtems_bdbuf_buffer* p = *root;
339
340  while ((p != NULL) && ((p->dd != dd) || (p->block != block)))
341  {
342    if (((uintptr_t) p->dd < (uintptr_t) dd)
343        || ((p->dd == dd) && (p->block < block)))
344    {
345      p = p->avl.right;
346    }
347    else
348    {
349      p = p->avl.left;
350    }
351  }
352
353  return p;
354}
355
356/**
357 * Inserts the specified node to the AVl-Tree.
358 *
359 * @param root pointer to the root node of the AVL-Tree
360 * @param node Pointer to the node to add.
361 * @retval 0 The node added successfully
362 * @retval -1 An error occured
363 */
364static int
365rtems_bdbuf_avl_insert(rtems_bdbuf_buffer** root,
366                       rtems_bdbuf_buffer*  node)
367{
368  const rtems_disk_device *dd = node->dd;
369  rtems_blkdev_bnum block = node->block;
370
371  rtems_bdbuf_buffer*  p = *root;
372  rtems_bdbuf_buffer*  q;
373  rtems_bdbuf_buffer*  p1;
374  rtems_bdbuf_buffer*  p2;
375  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
376  rtems_bdbuf_buffer** buf_prev = buf_stack;
377
378  bool modified = false;
379
380  if (p == NULL)
381  {
382    *root = node;
383    node->avl.left = NULL;
384    node->avl.right = NULL;
385    node->avl.bal = 0;
386    return 0;
387  }
388
389  while (p != NULL)
390  {
391    *buf_prev++ = p;
392
393    if (((uintptr_t) p->dd < (uintptr_t) dd)
394        || ((p->dd == dd) && (p->block < block)))
395    {
396      p->avl.cache = 1;
397      q = p->avl.right;
398      if (q == NULL)
399      {
400        q = node;
401        p->avl.right = q = node;
402        break;
403      }
404    }
405    else if ((p->dd != dd) || (p->block != block))
406    {
407      p->avl.cache = -1;
408      q = p->avl.left;
409      if (q == NULL)
410      {
411        q = node;
412        p->avl.left = q;
413        break;
414      }
415    }
416    else
417    {
418      return -1;
419    }
420
421    p = q;
422  }
423
424  q->avl.left = q->avl.right = NULL;
425  q->avl.bal = 0;
426  modified = true;
427  buf_prev--;
428
429  while (modified)
430  {
431    if (p->avl.cache == -1)
432    {
433      switch (p->avl.bal)
434      {
435        case 1:
436          p->avl.bal = 0;
437          modified = false;
438          break;
439
440        case 0:
441          p->avl.bal = -1;
442          break;
443
444        case -1:
445          p1 = p->avl.left;
446          if (p1->avl.bal == -1) /* simple LL-turn */
447          {
448            p->avl.left = p1->avl.right;
449            p1->avl.right = p;
450            p->avl.bal = 0;
451            p = p1;
452          }
453          else /* double LR-turn */
454          {
455            p2 = p1->avl.right;
456            p1->avl.right = p2->avl.left;
457            p2->avl.left = p1;
458            p->avl.left = p2->avl.right;
459            p2->avl.right = p;
460            if (p2->avl.bal == -1) p->avl.bal = +1; else p->avl.bal = 0;
461            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
462            p = p2;
463          }
464          p->avl.bal = 0;
465          modified = false;
466          break;
467
468        default:
469          break;
470      }
471    }
472    else
473    {
474      switch (p->avl.bal)
475      {
476        case -1:
477          p->avl.bal = 0;
478          modified = false;
479          break;
480
481        case 0:
482          p->avl.bal = 1;
483          break;
484
485        case 1:
486          p1 = p->avl.right;
487          if (p1->avl.bal == 1) /* simple RR-turn */
488          {
489            p->avl.right = p1->avl.left;
490            p1->avl.left = p;
491            p->avl.bal = 0;
492            p = p1;
493          }
494          else /* double RL-turn */
495          {
496            p2 = p1->avl.left;
497            p1->avl.left = p2->avl.right;
498            p2->avl.right = p1;
499            p->avl.right = p2->avl.left;
500            p2->avl.left = p;
501            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
502            if (p2->avl.bal == -1) p1->avl.bal = +1; else p1->avl.bal = 0;
503            p = p2;
504          }
505          p->avl.bal = 0;
506          modified = false;
507          break;
508
509        default:
510          break;
511      }
512    }
513    q = p;
514    if (buf_prev > buf_stack)
515    {
516      p = *--buf_prev;
517
518      if (p->avl.cache == -1)
519      {
520        p->avl.left = q;
521      }
522      else
523      {
524        p->avl.right = q;
525      }
526    }
527    else
528    {
529      *root = p;
530      break;
531    }
532  };
533
534  return 0;
535}
536
537
538/**
539 * Removes the node from the tree.
540 *
541 * @param root Pointer to pointer to the root node
542 * @param node Pointer to the node to remove
543 * @retval 0 Item removed
544 * @retval -1 No such item found
545 */
546static int
547rtems_bdbuf_avl_remove(rtems_bdbuf_buffer**      root,
548                       const rtems_bdbuf_buffer* node)
549{
550  const rtems_disk_device *dd = node->dd;
551  rtems_blkdev_bnum block = node->block;
552
553  rtems_bdbuf_buffer*  p = *root;
554  rtems_bdbuf_buffer*  q;
555  rtems_bdbuf_buffer*  r;
556  rtems_bdbuf_buffer*  s;
557  rtems_bdbuf_buffer*  p1;
558  rtems_bdbuf_buffer*  p2;
559  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
560  rtems_bdbuf_buffer** buf_prev = buf_stack;
561
562  bool modified = false;
563
564  memset (buf_stack, 0, sizeof(buf_stack));
565
566  while (p != NULL)
567  {
568    *buf_prev++ = p;
569
570    if (((uintptr_t) p->dd < (uintptr_t) dd)
571        || ((p->dd == dd) && (p->block < block)))
572    {
573      p->avl.cache = 1;
574      p = p->avl.right;
575    }
576    else if ((p->dd != dd) || (p->block != block))
577    {
578      p->avl.cache = -1;
579      p = p->avl.left;
580    }
581    else
582    {
583      /* node found */
584      break;
585    }
586  }
587
588  if (p == NULL)
589  {
590    /* there is no such node */
591    return -1;
592  }
593
594  q = p;
595
596  buf_prev--;
597  if (buf_prev > buf_stack)
598  {
599    p = *(buf_prev - 1);
600  }
601  else
602  {
603    p = NULL;
604  }
605
606  /* at this moment q - is a node to delete, p is q's parent */
607  if (q->avl.right == NULL)
608  {
609    r = q->avl.left;
610    if (r != NULL)
611    {
612      r->avl.bal = 0;
613    }
614    q = r;
615  }
616  else
617  {
618    rtems_bdbuf_buffer **t;
619
620    r = q->avl.right;
621
622    if (r->avl.left == NULL)
623    {
624      r->avl.left = q->avl.left;
625      r->avl.bal = q->avl.bal;
626      r->avl.cache = 1;
627      *buf_prev++ = q = r;
628    }
629    else
630    {
631      t = buf_prev++;
632      s = r;
633
634      while (s->avl.left != NULL)
635      {
636        *buf_prev++ = r = s;
637        s = r->avl.left;
638        r->avl.cache = -1;
639      }
640
641      s->avl.left = q->avl.left;
642      r->avl.left = s->avl.right;
643      s->avl.right = q->avl.right;
644      s->avl.bal = q->avl.bal;
645      s->avl.cache = 1;
646
647      *t = q = s;
648    }
649  }
650
651  if (p != NULL)
652  {
653    if (p->avl.cache == -1)
654    {
655      p->avl.left = q;
656    }
657    else
658    {
659      p->avl.right = q;
660    }
661  }
662  else
663  {
664    *root = q;
665  }
666
667  modified = true;
668
669  while (modified)
670  {
671    if (buf_prev > buf_stack)
672    {
673      p = *--buf_prev;
674    }
675    else
676    {
677      break;
678    }
679
680    if (p->avl.cache == -1)
681    {
682      /* rebalance left branch */
683      switch (p->avl.bal)
684      {
685        case -1:
686          p->avl.bal = 0;
687          break;
688        case  0:
689          p->avl.bal = 1;
690          modified = false;
691          break;
692
693        case +1:
694          p1 = p->avl.right;
695
696          if (p1->avl.bal >= 0) /* simple RR-turn */
697          {
698            p->avl.right = p1->avl.left;
699            p1->avl.left = p;
700
701            if (p1->avl.bal == 0)
702            {
703              p1->avl.bal = -1;
704              modified = false;
705            }
706            else
707            {
708              p->avl.bal = 0;
709              p1->avl.bal = 0;
710            }
711            p = p1;
712          }
713          else /* double RL-turn */
714          {
715            p2 = p1->avl.left;
716
717            p1->avl.left = p2->avl.right;
718            p2->avl.right = p1;
719            p->avl.right = p2->avl.left;
720            p2->avl.left = p;
721
722            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
723            if (p2->avl.bal == -1) p1->avl.bal = 1; else p1->avl.bal = 0;
724
725            p = p2;
726            p2->avl.bal = 0;
727          }
728          break;
729
730        default:
731          break;
732      }
733    }
734    else
735    {
736      /* rebalance right branch */
737      switch (p->avl.bal)
738      {
739        case +1:
740          p->avl.bal = 0;
741          break;
742
743        case  0:
744          p->avl.bal = -1;
745          modified = false;
746          break;
747
748        case -1:
749          p1 = p->avl.left;
750
751          if (p1->avl.bal <= 0) /* simple LL-turn */
752          {
753            p->avl.left = p1->avl.right;
754            p1->avl.right = p;
755            if (p1->avl.bal == 0)
756            {
757              p1->avl.bal = 1;
758              modified = false;
759            }
760            else
761            {
762              p->avl.bal = 0;
763              p1->avl.bal = 0;
764            }
765            p = p1;
766          }
767          else /* double LR-turn */
768          {
769            p2 = p1->avl.right;
770
771            p1->avl.right = p2->avl.left;
772            p2->avl.left = p1;
773            p->avl.left = p2->avl.right;
774            p2->avl.right = p;
775
776            if (p2->avl.bal == -1) p->avl.bal = 1; else p->avl.bal = 0;
777            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
778
779            p = p2;
780            p2->avl.bal = 0;
781          }
782          break;
783
784        default:
785          break;
786      }
787    }
788
789    if (buf_prev > buf_stack)
790    {
791      q = *(buf_prev - 1);
792
793      if (q->avl.cache == -1)
794      {
795        q->avl.left = p;
796      }
797      else
798      {
799        q->avl.right = p;
800      }
801    }
802    else
803    {
804      *root = p;
805      break;
806    }
807
808  }
809
810  return 0;
811}
812
813static void
814rtems_bdbuf_set_state (rtems_bdbuf_buffer *bd, rtems_bdbuf_buf_state state)
815{
816  bd->state = state;
817}
818
819static rtems_blkdev_bnum
820rtems_bdbuf_media_block (const rtems_disk_device *dd, rtems_blkdev_bnum block)
821{
822  if (dd->block_to_media_block_shift >= 0)
823    return block << dd->block_to_media_block_shift;
824  else
825    /*
826     * Change the block number for the block size to the block number for the media
827     * block size. We have to use 64bit maths. There is no short cut here.
828     */
829    return (rtems_blkdev_bnum)
830      ((((uint64_t) block) * dd->block_size) / dd->media_block_size);
831}
832
833/**
834 * Lock the mutex. A single task can nest calls.
835 *
836 * @param lock The mutex to lock.
837 * @param fatal_error_code The error code if the call fails.
838 */
839static void
840rtems_bdbuf_lock (rtems_id lock, uint32_t fatal_error_code)
841{
842  rtems_status_code sc = rtems_semaphore_obtain (lock,
843                                                 RTEMS_WAIT,
844                                                 RTEMS_NO_TIMEOUT);
845  if (sc != RTEMS_SUCCESSFUL)
846    rtems_fatal_error_occurred (fatal_error_code);
847}
848
849/**
850 * Unlock the mutex.
851 *
852 * @param lock The mutex to unlock.
853 * @param fatal_error_code The error code if the call fails.
854 */
855static void
856rtems_bdbuf_unlock (rtems_id lock, uint32_t fatal_error_code)
857{
858  rtems_status_code sc = rtems_semaphore_release (lock);
859  if (sc != RTEMS_SUCCESSFUL)
860    rtems_fatal_error_occurred (fatal_error_code);
861}
862
863/**
864 * Lock the cache. A single task can nest calls.
865 */
866static void
867rtems_bdbuf_lock_cache (void)
868{
869  rtems_bdbuf_lock (bdbuf_cache.lock, RTEMS_BLKDEV_FATAL_BDBUF_CACHE_LOCK);
870}
871
872/**
873 * Unlock the cache.
874 */
875static void
876rtems_bdbuf_unlock_cache (void)
877{
878  rtems_bdbuf_unlock (bdbuf_cache.lock, RTEMS_BLKDEV_FATAL_BDBUF_CACHE_UNLOCK);
879}
880
881/**
882 * Lock the cache's sync. A single task can nest calls.
883 */
884static void
885rtems_bdbuf_lock_sync (void)
886{
887  rtems_bdbuf_lock (bdbuf_cache.sync_lock, RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK);
888}
889
890/**
891 * Unlock the cache's sync lock. Any blocked writers are woken.
892 */
893static void
894rtems_bdbuf_unlock_sync (void)
895{
896  rtems_bdbuf_unlock (bdbuf_cache.sync_lock,
897                      RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK);
898}
899
900static void
901rtems_bdbuf_group_obtain (rtems_bdbuf_buffer *bd)
902{
903  ++bd->group->users;
904}
905
906static void
907rtems_bdbuf_group_release (rtems_bdbuf_buffer *bd)
908{
909  --bd->group->users;
910}
911
912static rtems_mode
913rtems_bdbuf_disable_preemption (void)
914{
915  rtems_status_code sc = RTEMS_SUCCESSFUL;
916  rtems_mode prev_mode = 0;
917
918  sc = rtems_task_mode (RTEMS_NO_PREEMPT, RTEMS_PREEMPT_MASK, &prev_mode);
919  if (sc != RTEMS_SUCCESSFUL)
920    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_DIS);
921
922  return prev_mode;
923}
924
925static void
926rtems_bdbuf_restore_preemption (rtems_mode prev_mode)
927{
928  rtems_status_code sc = RTEMS_SUCCESSFUL;
929
930  sc = rtems_task_mode (prev_mode, RTEMS_ALL_MODE_MASKS, &prev_mode);
931  if (sc != RTEMS_SUCCESSFUL)
932    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_RST);
933}
934
935/**
936 * Wait until woken. Semaphores are used so a number of tasks can wait and can
937 * be woken at once. Task events would require we maintain a list of tasks to
938 * be woken and this would require storage and we do not know the number of
939 * tasks that could be waiting.
940 *
941 * While we have the cache locked we can try and claim the semaphore and
942 * therefore know when we release the lock to the cache we will block until the
943 * semaphore is released. This may even happen before we get to block.
944 *
945 * A counter is used to save the release call when no one is waiting.
946 *
947 * The function assumes the cache is locked on entry and it will be locked on
948 * exit.
949 */
950static void
951rtems_bdbuf_anonymous_wait (rtems_bdbuf_waiters *waiters)
952{
953  rtems_status_code sc;
954  rtems_mode        prev_mode;
955
956  /*
957   * Indicate we are waiting.
958   */
959  ++waiters->count;
960
961  /*
962   * Disable preemption then unlock the cache and block.  There is no POSIX
963   * condition variable in the core API so this is a work around.
964   *
965   * The issue is a task could preempt after the cache is unlocked because it is
966   * blocking or just hits that window, and before this task has blocked on the
967   * semaphore. If the preempting task flushes the queue this task will not see
968   * the flush and may block for ever or until another transaction flushes this
969   * semaphore.
970   */
971  prev_mode = rtems_bdbuf_disable_preemption ();
972
973  /*
974   * Unlock the cache, wait, and lock the cache when we return.
975   */
976  rtems_bdbuf_unlock_cache ();
977
978  sc = rtems_semaphore_obtain (waiters->sema, RTEMS_WAIT, RTEMS_BDBUF_WAIT_TIMEOUT);
979
980  if (sc == RTEMS_TIMEOUT)
981    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_TO);
982
983  if (sc != RTEMS_UNSATISFIED)
984    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_2);
985
986  rtems_bdbuf_lock_cache ();
987
988  rtems_bdbuf_restore_preemption (prev_mode);
989
990  --waiters->count;
991}
992
993static void
994rtems_bdbuf_wait (rtems_bdbuf_buffer *bd, rtems_bdbuf_waiters *waiters)
995{
996  rtems_bdbuf_group_obtain (bd);
997  ++bd->waiters;
998  rtems_bdbuf_anonymous_wait (waiters);
999  --bd->waiters;
1000  rtems_bdbuf_group_release (bd);
1001}
1002
1003/**
1004 * Wake a blocked resource. The resource has a counter that lets us know if
1005 * there are any waiters.
1006 */
1007static void
1008rtems_bdbuf_wake (const rtems_bdbuf_waiters *waiters)
1009{
1010  rtems_status_code sc = RTEMS_SUCCESSFUL;
1011
1012  if (waiters->count > 0)
1013  {
1014    sc = rtems_semaphore_flush (waiters->sema);
1015    if (sc != RTEMS_SUCCESSFUL)
1016      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAKE);
1017  }
1018}
1019
1020static void
1021rtems_bdbuf_wake_swapper (void)
1022{
1023  rtems_status_code sc = rtems_event_send (bdbuf_cache.swapout,
1024                                           RTEMS_BDBUF_SWAPOUT_SYNC);
1025  if (sc != RTEMS_SUCCESSFUL)
1026    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE);
1027}
1028
1029static bool
1030rtems_bdbuf_has_buffer_waiters (void)
1031{
1032  return bdbuf_cache.buffer_waiters.count;
1033}
1034
1035static void
1036rtems_bdbuf_remove_from_tree (rtems_bdbuf_buffer *bd)
1037{
1038  if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
1039    rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_TREE_RM);
1040}
1041
1042static void
1043rtems_bdbuf_remove_from_tree_and_lru_list (rtems_bdbuf_buffer *bd)
1044{
1045  switch (bd->state)
1046  {
1047    case RTEMS_BDBUF_STATE_FREE:
1048      break;
1049    case RTEMS_BDBUF_STATE_CACHED:
1050      rtems_bdbuf_remove_from_tree (bd);
1051      break;
1052    default:
1053      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_10);
1054  }
1055
1056  rtems_chain_extract_unprotected (&bd->link);
1057}
1058
1059static void
1060rtems_bdbuf_make_free_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
1061{
1062  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_FREE);
1063  rtems_chain_prepend_unprotected (&bdbuf_cache.lru, &bd->link);
1064}
1065
1066static void
1067rtems_bdbuf_make_empty (rtems_bdbuf_buffer *bd)
1068{
1069  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_EMPTY);
1070}
1071
1072static void
1073rtems_bdbuf_make_cached_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
1074{
1075  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_CACHED);
1076  rtems_chain_append_unprotected (&bdbuf_cache.lru, &bd->link);
1077}
1078
1079static void
1080rtems_bdbuf_discard_buffer (rtems_bdbuf_buffer *bd)
1081{
1082  rtems_bdbuf_make_empty (bd);
1083
1084  if (bd->waiters == 0)
1085  {
1086    rtems_bdbuf_remove_from_tree (bd);
1087    rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1088  }
1089}
1090
1091static void
1092rtems_bdbuf_add_to_modified_list_after_access (rtems_bdbuf_buffer *bd)
1093{
1094  if (bdbuf_cache.sync_active && bdbuf_cache.sync_device == bd->dd)
1095  {
1096    rtems_bdbuf_unlock_cache ();
1097
1098    /*
1099     * Wait for the sync lock.
1100     */
1101    rtems_bdbuf_lock_sync ();
1102
1103    rtems_bdbuf_unlock_sync ();
1104    rtems_bdbuf_lock_cache ();
1105  }
1106
1107  /*
1108   * Only the first modified release sets the timer and any further user
1109   * accesses do not change the timer value which should move down. This
1110   * assumes the user's hold of the buffer is much less than the time on the
1111   * modified list. Resetting the timer on each access which could result in a
1112   * buffer never getting to 0 and never being forced onto disk. This raises a
1113   * difficult question. Is a snapshot of a block that is changing better than
1114   * nothing being written? We have tended to think we should hold changes for
1115   * only a specific period of time even if still changing and get onto disk
1116   * and letting the file system try and recover this position if it can.
1117   */
1118  if (bd->state == RTEMS_BDBUF_STATE_ACCESS_CACHED
1119        || bd->state == RTEMS_BDBUF_STATE_ACCESS_EMPTY)
1120    bd->hold_timer = bdbuf_config.swap_block_hold;
1121
1122  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_MODIFIED);
1123  rtems_chain_append_unprotected (&bdbuf_cache.modified, &bd->link);
1124
1125  if (bd->waiters)
1126    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1127  else if (rtems_bdbuf_has_buffer_waiters ())
1128    rtems_bdbuf_wake_swapper ();
1129}
1130
1131static void
1132rtems_bdbuf_add_to_lru_list_after_access (rtems_bdbuf_buffer *bd)
1133{
1134  rtems_bdbuf_group_release (bd);
1135  rtems_bdbuf_make_cached_and_add_to_lru_list (bd);
1136
1137  if (bd->waiters)
1138    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1139  else
1140    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1141}
1142
1143/**
1144 * Compute the number of BDs per group for a given buffer size.
1145 *
1146 * @param size The buffer size. It can be any size and we scale up.
1147 */
1148static size_t
1149rtems_bdbuf_bds_per_group (size_t size)
1150{
1151  size_t bufs_per_size;
1152  size_t bds_per_size;
1153
1154  if (size > bdbuf_config.buffer_max)
1155    return 0;
1156
1157  bufs_per_size = ((size - 1) / bdbuf_config.buffer_min) + 1;
1158
1159  for (bds_per_size = 1;
1160       bds_per_size < bufs_per_size;
1161       bds_per_size <<= 1)
1162    ;
1163
1164  return bdbuf_cache.max_bds_per_group / bds_per_size;
1165}
1166
1167static void
1168rtems_bdbuf_discard_buffer_after_access (rtems_bdbuf_buffer *bd)
1169{
1170  rtems_bdbuf_group_release (bd);
1171  rtems_bdbuf_discard_buffer (bd);
1172
1173  if (bd->waiters)
1174    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1175  else
1176    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1177}
1178
1179/**
1180 * Reallocate a group. The BDs currently allocated in the group are removed
1181 * from the ALV tree and any lists then the new BD's are prepended to the ready
1182 * list of the cache.
1183 *
1184 * @param group The group to reallocate.
1185 * @param new_bds_per_group The new count of BDs per group.
1186 * @return A buffer of this group.
1187 */
1188static rtems_bdbuf_buffer *
1189rtems_bdbuf_group_realloc (rtems_bdbuf_group* group, size_t new_bds_per_group)
1190{
1191  rtems_bdbuf_buffer* bd;
1192  size_t              b;
1193  size_t              bufs_per_bd;
1194
1195  if (rtems_bdbuf_tracer)
1196    printf ("bdbuf:realloc: %tu: %zd -> %zd\n",
1197            group - bdbuf_cache.groups, group->bds_per_group,
1198            new_bds_per_group);
1199
1200  bufs_per_bd = bdbuf_cache.max_bds_per_group / group->bds_per_group;
1201
1202  for (b = 0, bd = group->bdbuf;
1203       b < group->bds_per_group;
1204       b++, bd += bufs_per_bd)
1205    rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1206
1207  group->bds_per_group = new_bds_per_group;
1208  bufs_per_bd = bdbuf_cache.max_bds_per_group / new_bds_per_group;
1209
1210  for (b = 1, bd = group->bdbuf + bufs_per_bd;
1211       b < group->bds_per_group;
1212       b++, bd += bufs_per_bd)
1213    rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1214
1215  if (b > 1)
1216    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1217
1218  return group->bdbuf;
1219}
1220
1221static void
1222rtems_bdbuf_setup_empty_buffer (rtems_bdbuf_buffer *bd,
1223                                rtems_disk_device  *dd,
1224                                rtems_blkdev_bnum   block)
1225{
1226  bd->dd        = dd ;
1227  bd->block     = block;
1228  bd->avl.left  = NULL;
1229  bd->avl.right = NULL;
1230  bd->waiters   = 0;
1231
1232  if (rtems_bdbuf_avl_insert (&bdbuf_cache.tree, bd) != 0)
1233    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_RECYCLE);
1234
1235  rtems_bdbuf_make_empty (bd);
1236}
1237
1238static rtems_bdbuf_buffer *
1239rtems_bdbuf_get_buffer_from_lru_list (rtems_disk_device *dd,
1240                                      rtems_blkdev_bnum  block)
1241{
1242  rtems_chain_node *node = rtems_chain_first (&bdbuf_cache.lru);
1243
1244  while (!rtems_chain_is_tail (&bdbuf_cache.lru, node))
1245  {
1246    rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
1247    rtems_bdbuf_buffer *empty_bd = NULL;
1248
1249    if (rtems_bdbuf_tracer)
1250      printf ("bdbuf:next-bd: %tu (%td:%" PRId32 ") %zd -> %zd\n",
1251              bd - bdbuf_cache.bds,
1252              bd->group - bdbuf_cache.groups, bd->group->users,
1253              bd->group->bds_per_group, dd->bds_per_group);
1254
1255    /*
1256     * If nobody waits for this BD, we may recycle it.
1257     */
1258    if (bd->waiters == 0)
1259    {
1260      if (bd->group->bds_per_group == dd->bds_per_group)
1261      {
1262        rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1263
1264        empty_bd = bd;
1265      }
1266      else if (bd->group->users == 0)
1267        empty_bd = rtems_bdbuf_group_realloc (bd->group, dd->bds_per_group);
1268    }
1269
1270    if (empty_bd != NULL)
1271    {
1272      rtems_bdbuf_setup_empty_buffer (empty_bd, dd, block);
1273
1274      return empty_bd;
1275    }
1276
1277    node = rtems_chain_next (node);
1278  }
1279
1280  return NULL;
1281}
1282
1283static rtems_status_code
1284rtems_bdbuf_create_task(
1285  rtems_name name,
1286  rtems_task_priority priority,
1287  rtems_task_priority default_priority,
1288  rtems_task_entry entry,
1289  rtems_task_argument arg,
1290  rtems_id *id
1291)
1292{
1293  rtems_status_code sc;
1294  size_t stack_size = bdbuf_config.task_stack_size ?
1295    bdbuf_config.task_stack_size : RTEMS_BDBUF_TASK_STACK_SIZE_DEFAULT;
1296
1297  priority = priority != 0 ? priority : default_priority;
1298
1299  sc = rtems_task_create (name,
1300                          priority,
1301                          stack_size,
1302                          RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
1303                          RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT,
1304                          id);
1305
1306  if (sc == RTEMS_SUCCESSFUL)
1307    sc = rtems_task_start (*id, entry, arg);
1308
1309  return sc;
1310}
1311
1312/**
1313 * Initialise the cache.
1314 *
1315 * @return rtems_status_code The initialisation status.
1316 */
1317rtems_status_code
1318rtems_bdbuf_init (void)
1319{
1320  rtems_bdbuf_group*  group;
1321  rtems_bdbuf_buffer* bd;
1322  uint8_t*            buffer;
1323  size_t              b;
1324  size_t              cache_aligment;
1325  rtems_status_code   sc;
1326  rtems_mode          prev_mode;
1327
1328  if (rtems_bdbuf_tracer)
1329    printf ("bdbuf:init\n");
1330
1331  if (rtems_interrupt_is_in_progress())
1332    return RTEMS_CALLED_FROM_ISR;
1333
1334  /*
1335   * Check the configuration table values.
1336   */
1337  if ((bdbuf_config.buffer_max % bdbuf_config.buffer_min) != 0)
1338    return RTEMS_INVALID_NUMBER;
1339
1340  /*
1341   * We use a special variable to manage the initialisation incase we have
1342   * completing threads doing this. You may get errors if the another thread
1343   * makes a call and we have not finished initialisation.
1344   */
1345  prev_mode = rtems_bdbuf_disable_preemption ();
1346  if (bdbuf_cache.initialised)
1347  {
1348    rtems_bdbuf_restore_preemption (prev_mode);
1349    return RTEMS_RESOURCE_IN_USE;
1350  }
1351
1352  memset(&bdbuf_cache, 0, sizeof(bdbuf_cache));
1353  bdbuf_cache.initialised = true;
1354  rtems_bdbuf_restore_preemption (prev_mode);
1355
1356  /*
1357   * For unspecified cache alignments we use the CPU alignment.
1358   */
1359  cache_aligment = 32; /* FIXME rtems_cache_get_data_line_size() */
1360  if (cache_aligment <= 0)
1361    cache_aligment = CPU_ALIGNMENT;
1362
1363  bdbuf_cache.sync_device = BDBUF_INVALID_DEV;
1364
1365  rtems_chain_initialize_empty (&bdbuf_cache.swapout_workers);
1366  rtems_chain_initialize_empty (&bdbuf_cache.lru);
1367  rtems_chain_initialize_empty (&bdbuf_cache.modified);
1368  rtems_chain_initialize_empty (&bdbuf_cache.sync);
1369  rtems_chain_initialize_empty (&bdbuf_cache.read_ahead_chain);
1370
1371  /*
1372   * Create the locks for the cache.
1373   */
1374  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'l'),
1375                               1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0,
1376                               &bdbuf_cache.lock);
1377  if (sc != RTEMS_SUCCESSFUL)
1378    goto error;
1379
1380  rtems_bdbuf_lock_cache ();
1381
1382  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 's'),
1383                               1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0,
1384                               &bdbuf_cache.sync_lock);
1385  if (sc != RTEMS_SUCCESSFUL)
1386    goto error;
1387
1388  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'a'),
1389                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1390                               &bdbuf_cache.access_waiters.sema);
1391  if (sc != RTEMS_SUCCESSFUL)
1392    goto error;
1393
1394  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 't'),
1395                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1396                               &bdbuf_cache.transfer_waiters.sema);
1397  if (sc != RTEMS_SUCCESSFUL)
1398    goto error;
1399
1400  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'b'),
1401                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1402                               &bdbuf_cache.buffer_waiters.sema);
1403  if (sc != RTEMS_SUCCESSFUL)
1404    goto error;
1405
1406  /*
1407   * Compute the various number of elements in the cache.
1408   */
1409  bdbuf_cache.buffer_min_count =
1410    bdbuf_config.size / bdbuf_config.buffer_min;
1411  bdbuf_cache.max_bds_per_group =
1412    bdbuf_config.buffer_max / bdbuf_config.buffer_min;
1413  bdbuf_cache.group_count =
1414    bdbuf_cache.buffer_min_count / bdbuf_cache.max_bds_per_group;
1415
1416  /*
1417   * Allocate the memory for the buffer descriptors.
1418   */
1419  bdbuf_cache.bds = calloc (sizeof (rtems_bdbuf_buffer),
1420                            bdbuf_cache.buffer_min_count);
1421  if (!bdbuf_cache.bds)
1422    goto error;
1423
1424  /*
1425   * Allocate the memory for the buffer descriptors.
1426   */
1427  bdbuf_cache.groups = calloc (sizeof (rtems_bdbuf_group),
1428                               bdbuf_cache.group_count);
1429  if (!bdbuf_cache.groups)
1430    goto error;
1431
1432  /*
1433   * Allocate memory for buffer memory. The buffer memory will be cache
1434   * aligned. It is possible to free the memory allocated by rtems_memalign()
1435   * with free(). Return 0 if allocated.
1436   *
1437   * The memory allocate allows a
1438   */
1439  if (rtems_memalign ((void **) &bdbuf_cache.buffers,
1440                      cache_aligment,
1441                      bdbuf_cache.buffer_min_count * bdbuf_config.buffer_min) != 0)
1442    goto error;
1443
1444  /*
1445   * The cache is empty after opening so we need to add all the buffers to it
1446   * and initialise the groups.
1447   */
1448  for (b = 0, group = bdbuf_cache.groups,
1449         bd = bdbuf_cache.bds, buffer = bdbuf_cache.buffers;
1450       b < bdbuf_cache.buffer_min_count;
1451       b++, bd++, buffer += bdbuf_config.buffer_min)
1452  {
1453    bd->dd    = BDBUF_INVALID_DEV;
1454    bd->group  = group;
1455    bd->buffer = buffer;
1456
1457    rtems_chain_append_unprotected (&bdbuf_cache.lru, &bd->link);
1458
1459    if ((b % bdbuf_cache.max_bds_per_group) ==
1460        (bdbuf_cache.max_bds_per_group - 1))
1461      group++;
1462  }
1463
1464  for (b = 0,
1465         group = bdbuf_cache.groups,
1466         bd = bdbuf_cache.bds;
1467       b < bdbuf_cache.group_count;
1468       b++,
1469         group++,
1470         bd += bdbuf_cache.max_bds_per_group)
1471  {
1472    group->bds_per_group = bdbuf_cache.max_bds_per_group;
1473    group->bdbuf = bd;
1474  }
1475
1476  /*
1477   * Create and start swapout task. This task will create and manage the worker
1478   * threads.
1479   */
1480  bdbuf_cache.swapout_enabled = true;
1481
1482  sc = rtems_bdbuf_create_task (rtems_build_name('B', 'S', 'W', 'P'),
1483                                bdbuf_config.swapout_priority,
1484                                RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT,
1485                                rtems_bdbuf_swapout_task,
1486                                0,
1487                                &bdbuf_cache.swapout);
1488  if (sc != RTEMS_SUCCESSFUL)
1489    goto error;
1490
1491  if (bdbuf_config.max_read_ahead_blocks > 0)
1492  {
1493    bdbuf_cache.read_ahead_enabled = true;
1494    sc = rtems_bdbuf_create_task (rtems_build_name('B', 'R', 'D', 'A'),
1495                                  bdbuf_config.read_ahead_priority,
1496                                  RTEMS_BDBUF_READ_AHEAD_TASK_PRIORITY_DEFAULT,
1497                                  rtems_bdbuf_read_ahead_task,
1498                                  0,
1499                                  &bdbuf_cache.read_ahead_task);
1500    if (sc != RTEMS_SUCCESSFUL)
1501      goto error;
1502  }
1503
1504  rtems_bdbuf_unlock_cache ();
1505
1506  return RTEMS_SUCCESSFUL;
1507
1508error:
1509
1510  if (bdbuf_cache.read_ahead_task != 0)
1511    rtems_task_delete (bdbuf_cache.read_ahead_task);
1512
1513  if (bdbuf_cache.swapout != 0)
1514    rtems_task_delete (bdbuf_cache.swapout);
1515
1516  free (bdbuf_cache.buffers);
1517  free (bdbuf_cache.groups);
1518  free (bdbuf_cache.bds);
1519
1520  rtems_semaphore_delete (bdbuf_cache.buffer_waiters.sema);
1521  rtems_semaphore_delete (bdbuf_cache.access_waiters.sema);
1522  rtems_semaphore_delete (bdbuf_cache.transfer_waiters.sema);
1523  rtems_semaphore_delete (bdbuf_cache.sync_lock);
1524
1525  if (bdbuf_cache.lock != 0)
1526  {
1527    rtems_bdbuf_unlock_cache ();
1528    rtems_semaphore_delete (bdbuf_cache.lock);
1529  }
1530
1531  bdbuf_cache.initialised = false;
1532
1533  return RTEMS_UNSATISFIED;
1534}
1535
1536static void
1537rtems_bdbuf_wait_for_event (rtems_event_set event)
1538{
1539  rtems_status_code sc = RTEMS_SUCCESSFUL;
1540  rtems_event_set   out = 0;
1541
1542  sc = rtems_event_receive (event,
1543                            RTEMS_EVENT_ALL | RTEMS_WAIT,
1544                            RTEMS_NO_TIMEOUT,
1545                            &out);
1546
1547  if (sc != RTEMS_SUCCESSFUL || out != event)
1548    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_WAIT_EVNT);
1549}
1550
1551static void
1552rtems_bdbuf_wait_for_transient_event (void)
1553{
1554  rtems_status_code sc = RTEMS_SUCCESSFUL;
1555
1556  sc = rtems_event_transient_receive (RTEMS_WAIT, RTEMS_NO_TIMEOUT);
1557  if (sc != RTEMS_SUCCESSFUL)
1558    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_WAIT_TRANS_EVNT);
1559}
1560
1561static void
1562rtems_bdbuf_wait_for_access (rtems_bdbuf_buffer *bd)
1563{
1564  while (true)
1565  {
1566    switch (bd->state)
1567    {
1568      case RTEMS_BDBUF_STATE_MODIFIED:
1569        rtems_bdbuf_group_release (bd);
1570        /* Fall through */
1571      case RTEMS_BDBUF_STATE_CACHED:
1572        rtems_chain_extract_unprotected (&bd->link);
1573        /* Fall through */
1574      case RTEMS_BDBUF_STATE_EMPTY:
1575        return;
1576      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1577      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1578      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1579      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1580        rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
1581        break;
1582      case RTEMS_BDBUF_STATE_SYNC:
1583      case RTEMS_BDBUF_STATE_TRANSFER:
1584      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1585        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1586        break;
1587      default:
1588        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_7);
1589    }
1590  }
1591}
1592
1593static void
1594rtems_bdbuf_request_sync_for_modified_buffer (rtems_bdbuf_buffer *bd)
1595{
1596  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
1597  rtems_chain_extract_unprotected (&bd->link);
1598  rtems_chain_append_unprotected (&bdbuf_cache.sync, &bd->link);
1599  rtems_bdbuf_wake_swapper ();
1600}
1601
1602/**
1603 * @brief Waits until the buffer is ready for recycling.
1604 *
1605 * @retval @c true Buffer is valid and may be recycled.
1606 * @retval @c false Buffer is invalid and has to searched again.
1607 */
1608static bool
1609rtems_bdbuf_wait_for_recycle (rtems_bdbuf_buffer *bd)
1610{
1611  while (true)
1612  {
1613    switch (bd->state)
1614    {
1615      case RTEMS_BDBUF_STATE_FREE:
1616        return true;
1617      case RTEMS_BDBUF_STATE_MODIFIED:
1618        rtems_bdbuf_request_sync_for_modified_buffer (bd);
1619        break;
1620      case RTEMS_BDBUF_STATE_CACHED:
1621      case RTEMS_BDBUF_STATE_EMPTY:
1622        if (bd->waiters == 0)
1623          return true;
1624        else
1625        {
1626          /*
1627           * It is essential that we wait here without a special wait count and
1628           * without the group in use.  Otherwise we could trigger a wait ping
1629           * pong with another recycle waiter.  The state of the buffer is
1630           * arbitrary afterwards.
1631           */
1632          rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
1633          return false;
1634        }
1635      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1636      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1637      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1638      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1639        rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
1640        break;
1641      case RTEMS_BDBUF_STATE_SYNC:
1642      case RTEMS_BDBUF_STATE_TRANSFER:
1643      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1644        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1645        break;
1646      default:
1647        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_8);
1648    }
1649  }
1650}
1651
1652static void
1653rtems_bdbuf_wait_for_sync_done (rtems_bdbuf_buffer *bd)
1654{
1655  while (true)
1656  {
1657    switch (bd->state)
1658    {
1659      case RTEMS_BDBUF_STATE_CACHED:
1660      case RTEMS_BDBUF_STATE_EMPTY:
1661      case RTEMS_BDBUF_STATE_MODIFIED:
1662      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1663      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1664      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1665      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1666        return;
1667      case RTEMS_BDBUF_STATE_SYNC:
1668      case RTEMS_BDBUF_STATE_TRANSFER:
1669      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1670        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1671        break;
1672      default:
1673        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_9);
1674    }
1675  }
1676}
1677
1678static void
1679rtems_bdbuf_wait_for_buffer (void)
1680{
1681  if (!rtems_chain_is_empty (&bdbuf_cache.modified))
1682    rtems_bdbuf_wake_swapper ();
1683
1684  rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
1685}
1686
1687static void
1688rtems_bdbuf_sync_after_access (rtems_bdbuf_buffer *bd)
1689{
1690  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
1691
1692  rtems_chain_append_unprotected (&bdbuf_cache.sync, &bd->link);
1693
1694  if (bd->waiters)
1695    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1696
1697  rtems_bdbuf_wake_swapper ();
1698  rtems_bdbuf_wait_for_sync_done (bd);
1699
1700  /*
1701   * We may have created a cached or empty buffer which may be recycled.
1702   */
1703  if (bd->waiters == 0
1704        && (bd->state == RTEMS_BDBUF_STATE_CACHED
1705          || bd->state == RTEMS_BDBUF_STATE_EMPTY))
1706  {
1707    if (bd->state == RTEMS_BDBUF_STATE_EMPTY)
1708    {
1709      rtems_bdbuf_remove_from_tree (bd);
1710      rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1711    }
1712    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1713  }
1714}
1715
1716static rtems_bdbuf_buffer *
1717rtems_bdbuf_get_buffer_for_read_ahead (rtems_disk_device *dd,
1718                                       rtems_blkdev_bnum  block)
1719{
1720  rtems_bdbuf_buffer *bd = NULL;
1721
1722  bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dd, block);
1723
1724  if (bd == NULL)
1725  {
1726    bd = rtems_bdbuf_get_buffer_from_lru_list (dd, block);
1727
1728    if (bd != NULL)
1729      rtems_bdbuf_group_obtain (bd);
1730  }
1731  else
1732    /*
1733     * The buffer is in the cache.  So it is already available or in use, and
1734     * thus no need for a read ahead.
1735     */
1736    bd = NULL;
1737
1738  return bd;
1739}
1740
1741static rtems_bdbuf_buffer *
1742rtems_bdbuf_get_buffer_for_access (rtems_disk_device *dd,
1743                                   rtems_blkdev_bnum  block)
1744{
1745  rtems_bdbuf_buffer *bd = NULL;
1746
1747  do
1748  {
1749    bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dd, block);
1750
1751    if (bd != NULL)
1752    {
1753      if (bd->group->bds_per_group != dd->bds_per_group)
1754      {
1755        if (rtems_bdbuf_wait_for_recycle (bd))
1756        {
1757          rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1758          rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1759          rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1760        }
1761        bd = NULL;
1762      }
1763    }
1764    else
1765    {
1766      bd = rtems_bdbuf_get_buffer_from_lru_list (dd, block);
1767
1768      if (bd == NULL)
1769        rtems_bdbuf_wait_for_buffer ();
1770    }
1771  }
1772  while (bd == NULL);
1773
1774  rtems_bdbuf_wait_for_access (bd);
1775  rtems_bdbuf_group_obtain (bd);
1776
1777  return bd;
1778}
1779
1780static rtems_status_code
1781rtems_bdbuf_get_media_block (const rtems_disk_device *dd,
1782                             rtems_blkdev_bnum        block,
1783                             rtems_blkdev_bnum       *media_block_ptr)
1784{
1785  rtems_status_code sc = RTEMS_SUCCESSFUL;
1786
1787  if (block < dd->block_count)
1788  {
1789    /*
1790     * Compute the media block number. Drivers work with media block number not
1791     * the block number a BD may have as this depends on the block size set by
1792     * the user.
1793     */
1794    *media_block_ptr = rtems_bdbuf_media_block (dd, block) + dd->start;
1795  }
1796  else
1797  {
1798    sc = RTEMS_INVALID_ID;
1799  }
1800
1801  return sc;
1802}
1803
1804rtems_status_code
1805rtems_bdbuf_get (rtems_disk_device   *dd,
1806                 rtems_blkdev_bnum    block,
1807                 rtems_bdbuf_buffer **bd_ptr)
1808{
1809  rtems_status_code   sc = RTEMS_SUCCESSFUL;
1810  rtems_bdbuf_buffer *bd = NULL;
1811  rtems_blkdev_bnum   media_block;
1812
1813  rtems_bdbuf_lock_cache ();
1814
1815  sc = rtems_bdbuf_get_media_block (dd, block, &media_block);
1816  if (sc == RTEMS_SUCCESSFUL)
1817  {
1818    /*
1819     * Print the block index relative to the physical disk.
1820     */
1821    if (rtems_bdbuf_tracer)
1822      printf ("bdbuf:get: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
1823              media_block, block, (unsigned) dd->dev);
1824
1825    bd = rtems_bdbuf_get_buffer_for_access (dd, media_block);
1826
1827    switch (bd->state)
1828    {
1829      case RTEMS_BDBUF_STATE_CACHED:
1830        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
1831        break;
1832      case RTEMS_BDBUF_STATE_EMPTY:
1833        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_EMPTY);
1834        break;
1835      case RTEMS_BDBUF_STATE_MODIFIED:
1836        /*
1837         * To get a modified buffer could be considered a bug in the caller
1838         * because you should not be getting an already modified buffer but
1839         * user may have modified a byte in a block then decided to seek the
1840         * start and write the whole block and the file system will have no
1841         * record of this so just gets the block to fill.
1842         */
1843        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
1844        break;
1845      default:
1846        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_2);
1847        break;
1848    }
1849
1850    if (rtems_bdbuf_tracer)
1851    {
1852      rtems_bdbuf_show_users ("get", bd);
1853      rtems_bdbuf_show_usage ();
1854    }
1855  }
1856
1857  rtems_bdbuf_unlock_cache ();
1858
1859  *bd_ptr = bd;
1860
1861  return sc;
1862}
1863
1864/**
1865 * Call back handler called by the low level driver when the transfer has
1866 * completed. This function may be invoked from interrupt handler.
1867 *
1868 * @param arg Arbitrary argument specified in block device request
1869 *            structure (in this case - pointer to the appropriate
1870 *            block device request structure).
1871 * @param status I/O completion status
1872 */
1873static void
1874rtems_bdbuf_transfer_done (rtems_blkdev_request* req, rtems_status_code status)
1875{
1876  req->status = status;
1877
1878  rtems_event_transient_send (req->io_task);
1879}
1880
1881static rtems_status_code
1882rtems_bdbuf_execute_transfer_request (rtems_disk_device    *dd,
1883                                      rtems_blkdev_request *req,
1884                                      bool                  cache_locked)
1885{
1886  rtems_status_code sc = RTEMS_SUCCESSFUL;
1887  uint32_t transfer_index = 0;
1888  bool wake_transfer_waiters = false;
1889  bool wake_buffer_waiters = false;
1890
1891  if (cache_locked)
1892    rtems_bdbuf_unlock_cache ();
1893
1894  /* The return value will be ignored for transfer requests */
1895  dd->ioctl (dd->phys_dev, RTEMS_BLKIO_REQUEST, req);
1896
1897  /* Wait for transfer request completion */
1898  rtems_bdbuf_wait_for_transient_event ();
1899  sc = req->status;
1900
1901  rtems_bdbuf_lock_cache ();
1902
1903  /* Statistics */
1904  if (req->req == RTEMS_BLKDEV_REQ_READ)
1905  {
1906    dd->stats.read_blocks += req->bufnum;
1907    if (sc != RTEMS_SUCCESSFUL)
1908      ++dd->stats.read_errors;
1909  }
1910  else
1911  {
1912    dd->stats.write_blocks += req->bufnum;
1913    ++dd->stats.write_transfers;
1914    if (sc != RTEMS_SUCCESSFUL)
1915      ++dd->stats.write_errors;
1916  }
1917
1918  for (transfer_index = 0; transfer_index < req->bufnum; ++transfer_index)
1919  {
1920    rtems_bdbuf_buffer *bd = req->bufs [transfer_index].user;
1921    bool waiters = bd->waiters;
1922
1923    if (waiters)
1924      wake_transfer_waiters = true;
1925    else
1926      wake_buffer_waiters = true;
1927
1928    rtems_bdbuf_group_release (bd);
1929
1930    if (sc == RTEMS_SUCCESSFUL && bd->state == RTEMS_BDBUF_STATE_TRANSFER)
1931      rtems_bdbuf_make_cached_and_add_to_lru_list (bd);
1932    else
1933      rtems_bdbuf_discard_buffer (bd);
1934
1935    if (rtems_bdbuf_tracer)
1936      rtems_bdbuf_show_users ("transfer", bd);
1937  }
1938
1939  if (wake_transfer_waiters)
1940    rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
1941
1942  if (wake_buffer_waiters)
1943    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1944
1945  if (!cache_locked)
1946    rtems_bdbuf_unlock_cache ();
1947
1948  if (sc == RTEMS_SUCCESSFUL || sc == RTEMS_UNSATISFIED)
1949    return sc;
1950  else
1951    return RTEMS_IO_ERROR;
1952}
1953
1954static rtems_status_code
1955rtems_bdbuf_execute_read_request (rtems_disk_device  *dd,
1956                                  rtems_bdbuf_buffer *bd,
1957                                  uint32_t            transfer_count)
1958{
1959  rtems_blkdev_request *req = NULL;
1960  rtems_blkdev_bnum media_block = bd->block;
1961  uint32_t media_blocks_per_block = dd->media_blocks_per_block;
1962  uint32_t block_size = dd->block_size;
1963  uint32_t transfer_index = 1;
1964
1965  /*
1966   * TODO: This type of request structure is wrong and should be removed.
1967   */
1968#define bdbuf_alloc(size) __builtin_alloca (size)
1969
1970  req = bdbuf_alloc (sizeof (rtems_blkdev_request) +
1971                     sizeof (rtems_blkdev_sg_buffer) * transfer_count);
1972
1973  req->req = RTEMS_BLKDEV_REQ_READ;
1974  req->done = rtems_bdbuf_transfer_done;
1975  req->io_task = rtems_task_self ();
1976  req->bufnum = 0;
1977
1978  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
1979
1980  req->bufs [0].user   = bd;
1981  req->bufs [0].block  = media_block;
1982  req->bufs [0].length = block_size;
1983  req->bufs [0].buffer = bd->buffer;
1984
1985  if (rtems_bdbuf_tracer)
1986    rtems_bdbuf_show_users ("read", bd);
1987
1988  while (transfer_index < transfer_count)
1989  {
1990    media_block += media_blocks_per_block;
1991
1992    bd = rtems_bdbuf_get_buffer_for_read_ahead (dd, media_block);
1993
1994    if (bd == NULL)
1995      break;
1996
1997    rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
1998
1999    req->bufs [transfer_index].user   = bd;
2000    req->bufs [transfer_index].block  = media_block;
2001    req->bufs [transfer_index].length = block_size;
2002    req->bufs [transfer_index].buffer = bd->buffer;
2003
2004    if (rtems_bdbuf_tracer)
2005      rtems_bdbuf_show_users ("read", bd);
2006
2007    ++transfer_index;
2008  }
2009
2010  req->bufnum = transfer_index;
2011
2012  return rtems_bdbuf_execute_transfer_request (dd, req, true);
2013}
2014
2015static bool
2016rtems_bdbuf_is_read_ahead_active (const rtems_disk_device *dd)
2017{
2018  return !rtems_chain_is_node_off_chain (&dd->read_ahead.node);
2019}
2020
2021static void
2022rtems_bdbuf_read_ahead_cancel (rtems_disk_device *dd)
2023{
2024  if (rtems_bdbuf_is_read_ahead_active (dd))
2025  {
2026    rtems_chain_extract_unprotected (&dd->read_ahead.node);
2027    rtems_chain_set_off_chain (&dd->read_ahead.node);
2028  }
2029}
2030
2031static void
2032rtems_bdbuf_read_ahead_reset (rtems_disk_device *dd)
2033{
2034  rtems_bdbuf_read_ahead_cancel (dd);
2035  dd->read_ahead.trigger = RTEMS_DISK_READ_AHEAD_NO_TRIGGER;
2036}
2037
2038static void
2039rtems_bdbuf_check_read_ahead_trigger (rtems_disk_device *dd,
2040                                      rtems_blkdev_bnum  block)
2041{
2042  if (bdbuf_cache.read_ahead_task != 0
2043      && dd->read_ahead.trigger == block
2044      && !rtems_bdbuf_is_read_ahead_active (dd))
2045  {
2046    rtems_status_code sc;
2047    rtems_chain_control *chain = &bdbuf_cache.read_ahead_chain;
2048
2049    if (rtems_chain_is_empty (chain))
2050    {
2051      sc = rtems_event_send (bdbuf_cache.read_ahead_task,
2052                             RTEMS_BDBUF_READ_AHEAD_WAKE_UP);
2053      if (sc != RTEMS_SUCCESSFUL)
2054        rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_RA_WAKE_UP);
2055    }
2056
2057    rtems_chain_append_unprotected (chain, &dd->read_ahead.node);
2058  }
2059}
2060
2061static void
2062rtems_bdbuf_set_read_ahead_trigger (rtems_disk_device *dd,
2063                                    rtems_blkdev_bnum  block)
2064{
2065  if (dd->read_ahead.trigger != block)
2066  {
2067    rtems_bdbuf_read_ahead_cancel (dd);
2068    dd->read_ahead.trigger = block + 1;
2069    dd->read_ahead.next = block + 2;
2070  }
2071}
2072
2073rtems_status_code
2074rtems_bdbuf_read (rtems_disk_device   *dd,
2075                  rtems_blkdev_bnum    block,
2076                  rtems_bdbuf_buffer **bd_ptr)
2077{
2078  rtems_status_code     sc = RTEMS_SUCCESSFUL;
2079  rtems_bdbuf_buffer   *bd = NULL;
2080  rtems_blkdev_bnum     media_block;
2081
2082  rtems_bdbuf_lock_cache ();
2083
2084  sc = rtems_bdbuf_get_media_block (dd, block, &media_block);
2085  if (sc == RTEMS_SUCCESSFUL)
2086  {
2087    if (rtems_bdbuf_tracer)
2088      printf ("bdbuf:read: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
2089              media_block + dd->start, block, (unsigned) dd->dev);
2090
2091    bd = rtems_bdbuf_get_buffer_for_access (dd, media_block);
2092    switch (bd->state)
2093    {
2094      case RTEMS_BDBUF_STATE_CACHED:
2095        ++dd->stats.read_hits;
2096        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
2097        break;
2098      case RTEMS_BDBUF_STATE_MODIFIED:
2099        ++dd->stats.read_hits;
2100        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
2101        break;
2102      case RTEMS_BDBUF_STATE_EMPTY:
2103        ++dd->stats.read_misses;
2104        rtems_bdbuf_set_read_ahead_trigger (dd, block);
2105        sc = rtems_bdbuf_execute_read_request (dd, bd, 1);
2106        if (sc == RTEMS_SUCCESSFUL)
2107        {
2108          rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
2109          rtems_chain_extract_unprotected (&bd->link);
2110          rtems_bdbuf_group_obtain (bd);
2111        }
2112        else
2113        {
2114          bd = NULL;
2115        }
2116        break;
2117      default:
2118        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_4);
2119        break;
2120    }
2121
2122    rtems_bdbuf_check_read_ahead_trigger (dd, block);
2123  }
2124
2125  rtems_bdbuf_unlock_cache ();
2126
2127  *bd_ptr = bd;
2128
2129  return sc;
2130}
2131
2132static rtems_status_code
2133rtems_bdbuf_check_bd_and_lock_cache (rtems_bdbuf_buffer *bd, const char *kind)
2134{
2135  if (bd == NULL)
2136    return RTEMS_INVALID_ADDRESS;
2137  if (rtems_bdbuf_tracer)
2138  {
2139    printf ("bdbuf:%s: %" PRIu32 "\n", kind, bd->block);
2140    rtems_bdbuf_show_users (kind, bd);
2141  }
2142  rtems_bdbuf_lock_cache();
2143
2144  return RTEMS_SUCCESSFUL;
2145}
2146
2147rtems_status_code
2148rtems_bdbuf_release (rtems_bdbuf_buffer *bd)
2149{
2150  rtems_status_code sc = RTEMS_SUCCESSFUL;
2151
2152  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release");
2153  if (sc != RTEMS_SUCCESSFUL)
2154    return sc;
2155
2156  switch (bd->state)
2157  {
2158    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2159      rtems_bdbuf_add_to_lru_list_after_access (bd);
2160      break;
2161    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2162    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2163      rtems_bdbuf_discard_buffer_after_access (bd);
2164      break;
2165    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2166      rtems_bdbuf_add_to_modified_list_after_access (bd);
2167      break;
2168    default:
2169      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_0);
2170      break;
2171  }
2172
2173  if (rtems_bdbuf_tracer)
2174    rtems_bdbuf_show_usage ();
2175
2176  rtems_bdbuf_unlock_cache ();
2177
2178  return RTEMS_SUCCESSFUL;
2179}
2180
2181rtems_status_code
2182rtems_bdbuf_release_modified (rtems_bdbuf_buffer *bd)
2183{
2184  rtems_status_code sc = RTEMS_SUCCESSFUL;
2185
2186  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release modified");
2187  if (sc != RTEMS_SUCCESSFUL)
2188    return sc;
2189
2190  switch (bd->state)
2191  {
2192    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2193    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2194    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2195      rtems_bdbuf_add_to_modified_list_after_access (bd);
2196      break;
2197    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2198      rtems_bdbuf_discard_buffer_after_access (bd);
2199      break;
2200    default:
2201      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_6);
2202      break;
2203  }
2204
2205  if (rtems_bdbuf_tracer)
2206    rtems_bdbuf_show_usage ();
2207
2208  rtems_bdbuf_unlock_cache ();
2209
2210  return RTEMS_SUCCESSFUL;
2211}
2212
2213rtems_status_code
2214rtems_bdbuf_sync (rtems_bdbuf_buffer *bd)
2215{
2216  rtems_status_code sc = RTEMS_SUCCESSFUL;
2217
2218  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "sync");
2219  if (sc != RTEMS_SUCCESSFUL)
2220    return sc;
2221
2222  switch (bd->state)
2223  {
2224    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2225    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2226    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2227      rtems_bdbuf_sync_after_access (bd);
2228      break;
2229    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2230      rtems_bdbuf_discard_buffer_after_access (bd);
2231      break;
2232    default:
2233      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_5);
2234      break;
2235  }
2236
2237  if (rtems_bdbuf_tracer)
2238    rtems_bdbuf_show_usage ();
2239
2240  rtems_bdbuf_unlock_cache ();
2241
2242  return RTEMS_SUCCESSFUL;
2243}
2244
2245rtems_status_code
2246rtems_bdbuf_syncdev (rtems_disk_device *dd)
2247{
2248  if (rtems_bdbuf_tracer)
2249    printf ("bdbuf:syncdev: %08x\n", (unsigned) dd->dev);
2250
2251  /*
2252   * Take the sync lock before locking the cache. Once we have the sync lock we
2253   * can lock the cache. If another thread has the sync lock it will cause this
2254   * thread to block until it owns the sync lock then it can own the cache. The
2255   * sync lock can only be obtained with the cache unlocked.
2256   */
2257  rtems_bdbuf_lock_sync ();
2258  rtems_bdbuf_lock_cache ();
2259
2260  /*
2261   * Set the cache to have a sync active for a specific device and let the swap
2262   * out task know the id of the requester to wake when done.
2263   *
2264   * The swap out task will negate the sync active flag when no more buffers
2265   * for the device are held on the "modified for sync" queues.
2266   */
2267  bdbuf_cache.sync_active    = true;
2268  bdbuf_cache.sync_requester = rtems_task_self ();
2269  bdbuf_cache.sync_device    = dd;
2270
2271  rtems_bdbuf_wake_swapper ();
2272  rtems_bdbuf_unlock_cache ();
2273  rtems_bdbuf_wait_for_transient_event ();
2274  rtems_bdbuf_unlock_sync ();
2275
2276  return RTEMS_SUCCESSFUL;
2277}
2278
2279/**
2280 * Swapout transfer to the driver. The driver will break this I/O into groups
2281 * of consecutive write requests is multiple consecutive buffers are required
2282 * by the driver. The cache is not locked.
2283 *
2284 * @param transfer The transfer transaction.
2285 */
2286static void
2287rtems_bdbuf_swapout_write (rtems_bdbuf_swapout_transfer* transfer)
2288{
2289  rtems_chain_node *node;
2290
2291  if (rtems_bdbuf_tracer)
2292    printf ("bdbuf:swapout transfer: %08x\n", (unsigned) transfer->dd->dev);
2293
2294  /*
2295   * If there are buffers to transfer to the media transfer them.
2296   */
2297  if (!rtems_chain_is_empty (&transfer->bds))
2298  {
2299    /*
2300     * The last block number used when the driver only supports
2301     * continuous blocks in a single request.
2302     */
2303    uint32_t last_block = 0;
2304
2305    rtems_disk_device *dd = transfer->dd;
2306    uint32_t media_blocks_per_block = dd->media_blocks_per_block;
2307    bool need_continuous_blocks =
2308      (dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_MULTISECTOR_CONT) != 0;
2309
2310    /*
2311     * Take as many buffers as configured and pass to the driver. Note, the
2312     * API to the drivers has an array of buffers and if a chain was passed
2313     * we could have just passed the list. If the driver API is updated it
2314     * should be possible to make this change with little effect in this
2315     * code. The array that is passed is broken in design and should be
2316     * removed. Merging members of a struct into the first member is
2317     * trouble waiting to happen.
2318     */
2319    transfer->write_req->status = RTEMS_RESOURCE_IN_USE;
2320    transfer->write_req->bufnum = 0;
2321
2322    while ((node = rtems_chain_get_unprotected(&transfer->bds)) != NULL)
2323    {
2324      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2325      bool                write = false;
2326
2327      /*
2328       * If the device only accepts sequential buffers and this is not the
2329       * first buffer (the first is always sequential, and the buffer is not
2330       * sequential then put the buffer back on the transfer chain and write
2331       * the committed buffers.
2332       */
2333
2334      if (rtems_bdbuf_tracer)
2335        printf ("bdbuf:swapout write: bd:%" PRIu32 ", bufnum:%" PRIu32 " mode:%s\n",
2336                bd->block, transfer->write_req->bufnum,
2337                need_continuous_blocks ? "MULTI" : "SCAT");
2338
2339      if (need_continuous_blocks && transfer->write_req->bufnum &&
2340          bd->block != last_block + media_blocks_per_block)
2341      {
2342        rtems_chain_prepend_unprotected (&transfer->bds, &bd->link);
2343        write = true;
2344      }
2345      else
2346      {
2347        rtems_blkdev_sg_buffer* buf;
2348        buf = &transfer->write_req->bufs[transfer->write_req->bufnum];
2349        transfer->write_req->bufnum++;
2350        buf->user   = bd;
2351        buf->block  = bd->block;
2352        buf->length = dd->block_size;
2353        buf->buffer = bd->buffer;
2354        last_block  = bd->block;
2355      }
2356
2357      /*
2358       * Perform the transfer if there are no more buffers, or the transfer
2359       * size has reached the configured max. value.
2360       */
2361
2362      if (rtems_chain_is_empty (&transfer->bds) ||
2363          (transfer->write_req->bufnum >= bdbuf_config.max_write_blocks))
2364        write = true;
2365
2366      if (write)
2367      {
2368        rtems_bdbuf_execute_transfer_request (dd, transfer->write_req, false);
2369
2370        transfer->write_req->status = RTEMS_RESOURCE_IN_USE;
2371        transfer->write_req->bufnum = 0;
2372      }
2373    }
2374
2375    /*
2376     * If sync'ing and the deivce is capability of handling a sync IO control
2377     * call perform the call.
2378     */
2379    if (transfer->syncing &&
2380        (dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_SYNC))
2381    {
2382      /* int result = */ dd->ioctl (dd->phys_dev, RTEMS_BLKDEV_REQ_SYNC, NULL);
2383      /* How should the error be handled ? */
2384    }
2385  }
2386}
2387
2388/**
2389 * Process the modified list of buffers. There is a sync or modified list that
2390 * needs to be handled so we have a common function to do the work.
2391 *
2392 * @param dd_ptr Pointer to the device to handle. If BDBUF_INVALID_DEV no
2393 * device is selected so select the device of the first buffer to be written to
2394 * disk.
2395 * @param chain The modified chain to process.
2396 * @param transfer The chain to append buffers to be written too.
2397 * @param sync_active If true this is a sync operation so expire all timers.
2398 * @param update_timers If true update the timers.
2399 * @param timer_delta It update_timers is true update the timers by this
2400 *                    amount.
2401 */
2402static void
2403rtems_bdbuf_swapout_modified_processing (rtems_disk_device  **dd_ptr,
2404                                         rtems_chain_control* chain,
2405                                         rtems_chain_control* transfer,
2406                                         bool                 sync_active,
2407                                         bool                 update_timers,
2408                                         uint32_t             timer_delta)
2409{
2410  if (!rtems_chain_is_empty (chain))
2411  {
2412    rtems_chain_node* node = rtems_chain_head (chain);
2413    bool              sync_all;
2414   
2415    node = node->next;
2416
2417    /*
2418     * A sync active with no valid dev means sync all.
2419     */
2420    if (sync_active && (*dd_ptr == BDBUF_INVALID_DEV))
2421      sync_all = true;
2422    else
2423      sync_all = false;
2424   
2425    while (!rtems_chain_is_tail (chain, node))
2426    {
2427      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2428
2429      /*
2430       * Check if the buffer's hold timer has reached 0. If a sync is active
2431       * or someone waits for a buffer written force all the timers to 0.
2432       *
2433       * @note Lots of sync requests will skew this timer. It should be based
2434       *       on TOD to be accurate. Does it matter ?
2435       */
2436      if (sync_all || (sync_active && (*dd_ptr == bd->dd))
2437          || rtems_bdbuf_has_buffer_waiters ())
2438        bd->hold_timer = 0;
2439
2440      if (bd->hold_timer)
2441      {
2442        if (update_timers)
2443        {
2444          if (bd->hold_timer > timer_delta)
2445            bd->hold_timer -= timer_delta;
2446          else
2447            bd->hold_timer = 0;
2448        }
2449
2450        if (bd->hold_timer)
2451        {
2452          node = node->next;
2453          continue;
2454        }
2455      }
2456
2457      /*
2458       * This assumes we can set it to BDBUF_INVALID_DEV which is just an
2459       * assumption. Cannot use the transfer list being empty the sync dev
2460       * calls sets the dev to use.
2461       */
2462      if (*dd_ptr == BDBUF_INVALID_DEV)
2463        *dd_ptr = bd->dd;
2464
2465      if (bd->dd == *dd_ptr)
2466      {
2467        rtems_chain_node* next_node = node->next;
2468        rtems_chain_node* tnode = rtems_chain_tail (transfer);
2469
2470        /*
2471         * The blocks on the transfer list are sorted in block order. This
2472         * means multi-block transfers for drivers that require consecutive
2473         * blocks perform better with sorted blocks and for real disks it may
2474         * help lower head movement.
2475         */
2476
2477        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
2478
2479        rtems_chain_extract_unprotected (node);
2480
2481        tnode = tnode->previous;
2482
2483        while (node && !rtems_chain_is_head (transfer, tnode))
2484        {
2485          rtems_bdbuf_buffer* tbd = (rtems_bdbuf_buffer*) tnode;
2486
2487          if (bd->block > tbd->block)
2488          {
2489            rtems_chain_insert_unprotected (tnode, node);
2490            node = NULL;
2491          }
2492          else
2493            tnode = tnode->previous;
2494        }
2495
2496        if (node)
2497          rtems_chain_prepend_unprotected (transfer, node);
2498
2499        node = next_node;
2500      }
2501      else
2502      {
2503        node = node->next;
2504      }
2505    }
2506  }
2507}
2508
2509/**
2510 * Process the cache's modified buffers. Check the sync list first then the
2511 * modified list extracting the buffers suitable to be written to disk. We have
2512 * a device at a time. The task level loop will repeat this operation while
2513 * there are buffers to be written. If the transfer fails place the buffers
2514 * back on the modified list and try again later. The cache is unlocked while
2515 * the buffers are being written to disk.
2516 *
2517 * @param timer_delta It update_timers is true update the timers by this
2518 *                    amount.
2519 * @param update_timers If true update the timers.
2520 * @param transfer The transfer transaction data.
2521 *
2522 * @retval true Buffers where written to disk so scan again.
2523 * @retval false No buffers where written to disk.
2524 */
2525static bool
2526rtems_bdbuf_swapout_processing (unsigned long                 timer_delta,
2527                                bool                          update_timers,
2528                                rtems_bdbuf_swapout_transfer* transfer)
2529{
2530  rtems_bdbuf_swapout_worker* worker;
2531  bool                        transfered_buffers = false;
2532
2533  rtems_bdbuf_lock_cache ();
2534
2535  /*
2536   * If a sync is active do not use a worker because the current code does not
2537   * cleaning up after. We need to know the buffers have been written when
2538   * syncing to release sync lock and currently worker threads do not return to
2539   * here. We do not know the worker is the last in a sequence of sync writes
2540   * until after we have it running so we do not know to tell it to release the
2541   * lock. The simplest solution is to get the main swap out task perform all
2542   * sync operations.
2543   */
2544  if (bdbuf_cache.sync_active)
2545    worker = NULL;
2546  else
2547  {
2548    worker = (rtems_bdbuf_swapout_worker*)
2549      rtems_chain_get_unprotected (&bdbuf_cache.swapout_workers);
2550    if (worker)
2551      transfer = &worker->transfer;
2552  }
2553
2554  rtems_chain_initialize_empty (&transfer->bds);
2555  transfer->dd = BDBUF_INVALID_DEV;
2556  transfer->syncing = bdbuf_cache.sync_active;
2557 
2558  /*
2559   * When the sync is for a device limit the sync to that device. If the sync
2560   * is for a buffer handle process the devices in the order on the sync
2561   * list. This means the dev is BDBUF_INVALID_DEV.
2562   */
2563  if (bdbuf_cache.sync_active)
2564    transfer->dd = bdbuf_cache.sync_device;
2565   
2566  /*
2567   * If we have any buffers in the sync queue move them to the modified
2568   * list. The first sync buffer will select the device we use.
2569   */
2570  rtems_bdbuf_swapout_modified_processing (&transfer->dd,
2571                                           &bdbuf_cache.sync,
2572                                           &transfer->bds,
2573                                           true, false,
2574                                           timer_delta);
2575
2576  /*
2577   * Process the cache's modified list.
2578   */
2579  rtems_bdbuf_swapout_modified_processing (&transfer->dd,
2580                                           &bdbuf_cache.modified,
2581                                           &transfer->bds,
2582                                           bdbuf_cache.sync_active,
2583                                           update_timers,
2584                                           timer_delta);
2585
2586  /*
2587   * We have all the buffers that have been modified for this device so the
2588   * cache can be unlocked because the state of each buffer has been set to
2589   * TRANSFER.
2590   */
2591  rtems_bdbuf_unlock_cache ();
2592
2593  /*
2594   * If there are buffers to transfer to the media transfer them.
2595   */
2596  if (!rtems_chain_is_empty (&transfer->bds))
2597  {
2598    if (worker)
2599    {
2600      rtems_status_code sc = rtems_event_send (worker->id,
2601                                               RTEMS_BDBUF_SWAPOUT_SYNC);
2602      if (sc != RTEMS_SUCCESSFUL)
2603        rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE);
2604    }
2605    else
2606    {
2607      rtems_bdbuf_swapout_write (transfer);
2608    }
2609
2610    transfered_buffers = true;
2611  }
2612
2613  if (bdbuf_cache.sync_active && !transfered_buffers)
2614  {
2615    rtems_id sync_requester;
2616    rtems_bdbuf_lock_cache ();
2617    sync_requester = bdbuf_cache.sync_requester;
2618    bdbuf_cache.sync_active = false;
2619    bdbuf_cache.sync_requester = 0;
2620    rtems_bdbuf_unlock_cache ();
2621    if (sync_requester)
2622      rtems_event_transient_send (sync_requester);
2623  }
2624
2625  return transfered_buffers;
2626}
2627
2628/**
2629 * Allocate the write request and initialise it for good measure.
2630 *
2631 * @return rtems_blkdev_request* The write reference memory.
2632 */
2633static rtems_blkdev_request*
2634rtems_bdbuf_swapout_writereq_alloc (void)
2635{
2636  /*
2637   * @note chrisj The rtems_blkdev_request and the array at the end is a hack.
2638   * I am disappointment at finding code like this in RTEMS. The request should
2639   * have been a rtems_chain_control. Simple, fast and less storage as the node
2640   * is already part of the buffer structure.
2641   */
2642  rtems_blkdev_request* write_req =
2643    malloc (sizeof (rtems_blkdev_request) +
2644            (bdbuf_config.max_write_blocks * sizeof (rtems_blkdev_sg_buffer)));
2645
2646  if (!write_req)
2647    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM);
2648
2649  write_req->req = RTEMS_BLKDEV_REQ_WRITE;
2650  write_req->done = rtems_bdbuf_transfer_done;
2651  write_req->io_task = rtems_task_self ();
2652
2653  return write_req;
2654}
2655
2656/**
2657 * The swapout worker thread body.
2658 *
2659 * @param arg A pointer to the worker thread's private data.
2660 * @return rtems_task Not used.
2661 */
2662static rtems_task
2663rtems_bdbuf_swapout_worker_task (rtems_task_argument arg)
2664{
2665  rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) arg;
2666
2667  while (worker->enabled)
2668  {
2669    rtems_bdbuf_wait_for_event (RTEMS_BDBUF_SWAPOUT_SYNC);
2670
2671    rtems_bdbuf_swapout_write (&worker->transfer);
2672
2673    rtems_bdbuf_lock_cache ();
2674
2675    rtems_chain_initialize_empty (&worker->transfer.bds);
2676    worker->transfer.dd = BDBUF_INVALID_DEV;
2677
2678    rtems_chain_append_unprotected (&bdbuf_cache.swapout_workers, &worker->link);
2679
2680    rtems_bdbuf_unlock_cache ();
2681  }
2682
2683  free (worker->transfer.write_req);
2684  free (worker);
2685
2686  rtems_task_delete (RTEMS_SELF);
2687}
2688
2689/**
2690 * Open the swapout worker threads.
2691 */
2692static void
2693rtems_bdbuf_swapout_workers_open (void)
2694{
2695  rtems_status_code sc;
2696  size_t            w;
2697
2698  rtems_bdbuf_lock_cache ();
2699
2700  for (w = 0; w < bdbuf_config.swapout_workers; w++)
2701  {
2702    rtems_bdbuf_swapout_worker* worker;
2703
2704    worker = malloc (sizeof (rtems_bdbuf_swapout_worker));
2705    if (!worker)
2706      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM);
2707
2708    rtems_chain_append_unprotected (&bdbuf_cache.swapout_workers, &worker->link);
2709    worker->enabled = true;
2710    worker->transfer.write_req = rtems_bdbuf_swapout_writereq_alloc ();
2711
2712    rtems_chain_initialize_empty (&worker->transfer.bds);
2713    worker->transfer.dd = BDBUF_INVALID_DEV;
2714
2715    sc = rtems_bdbuf_create_task (rtems_build_name('B', 'D', 'o', 'a' + w),
2716                                  bdbuf_config.swapout_worker_priority,
2717                                  RTEMS_BDBUF_SWAPOUT_WORKER_TASK_PRIORITY_DEFAULT,
2718                                  rtems_bdbuf_swapout_worker_task,
2719                                  (rtems_task_argument) worker,
2720                                  &worker->id);
2721    if (sc != RTEMS_SUCCESSFUL)
2722      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_CREATE);
2723  }
2724
2725  rtems_bdbuf_unlock_cache ();
2726}
2727
2728/**
2729 * Close the swapout worker threads.
2730 */
2731static void
2732rtems_bdbuf_swapout_workers_close (void)
2733{
2734  rtems_chain_node* node;
2735
2736  rtems_bdbuf_lock_cache ();
2737
2738  node = rtems_chain_first (&bdbuf_cache.swapout_workers);
2739  while (!rtems_chain_is_tail (&bdbuf_cache.swapout_workers, node))
2740  {
2741    rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) node;
2742    worker->enabled = false;
2743    rtems_event_send (worker->id, RTEMS_BDBUF_SWAPOUT_SYNC);
2744    node = rtems_chain_next (node);
2745  }
2746
2747  rtems_bdbuf_unlock_cache ();
2748}
2749
2750/**
2751 * Body of task which takes care on flushing modified buffers to the disk.
2752 *
2753 * @param arg A pointer to the global cache data. Use the global variable and
2754 *            not this.
2755 * @return rtems_task Not used.
2756 */
2757static rtems_task
2758rtems_bdbuf_swapout_task (rtems_task_argument arg)
2759{
2760  rtems_bdbuf_swapout_transfer transfer;
2761  uint32_t                     period_in_ticks;
2762  const uint32_t               period_in_msecs = bdbuf_config.swapout_period;
2763  uint32_t                     timer_delta;
2764
2765  transfer.write_req = rtems_bdbuf_swapout_writereq_alloc ();
2766  rtems_chain_initialize_empty (&transfer.bds);
2767  transfer.dd = BDBUF_INVALID_DEV;
2768  transfer.syncing = false;
2769
2770  /*
2771   * Localise the period.
2772   */
2773  period_in_ticks = RTEMS_MICROSECONDS_TO_TICKS (period_in_msecs * 1000);
2774
2775  /*
2776   * This is temporary. Needs to be changed to use the real time clock.
2777   */
2778  timer_delta = period_in_msecs;
2779
2780  /*
2781   * Create the worker threads.
2782   */
2783  rtems_bdbuf_swapout_workers_open ();
2784
2785  while (bdbuf_cache.swapout_enabled)
2786  {
2787    rtems_event_set   out;
2788    rtems_status_code sc;
2789
2790    /*
2791     * Only update the timers once in the processing cycle.
2792     */
2793    bool update_timers = true;
2794
2795    /*
2796     * If we write buffers to any disk perform a check again. We only write a
2797     * single device at a time and the cache may have more than one device's
2798     * buffers modified waiting to be written.
2799     */
2800    bool transfered_buffers;
2801
2802    do
2803    {
2804      transfered_buffers = false;
2805
2806      /*
2807       * Extact all the buffers we find for a specific device. The device is
2808       * the first one we find on a modified list. Process the sync queue of
2809       * buffers first.
2810       */
2811      if (rtems_bdbuf_swapout_processing (timer_delta,
2812                                          update_timers,
2813                                          &transfer))
2814      {
2815        transfered_buffers = true;
2816      }
2817
2818      /*
2819       * Only update the timers once.
2820       */
2821      update_timers = false;
2822    }
2823    while (transfered_buffers);
2824
2825    sc = rtems_event_receive (RTEMS_BDBUF_SWAPOUT_SYNC,
2826                              RTEMS_EVENT_ALL | RTEMS_WAIT,
2827                              period_in_ticks,
2828                              &out);
2829
2830    if ((sc != RTEMS_SUCCESSFUL) && (sc != RTEMS_TIMEOUT))
2831      rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
2832  }
2833
2834  rtems_bdbuf_swapout_workers_close ();
2835
2836  free (transfer.write_req);
2837
2838  rtems_task_delete (RTEMS_SELF);
2839}
2840
2841static void
2842rtems_bdbuf_purge_list (rtems_chain_control *purge_list)
2843{
2844  bool wake_buffer_waiters = false;
2845  rtems_chain_node *node = NULL;
2846
2847  while ((node = rtems_chain_get_unprotected (purge_list)) != NULL)
2848  {
2849    rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
2850
2851    if (bd->waiters == 0)
2852      wake_buffer_waiters = true;
2853
2854    rtems_bdbuf_discard_buffer (bd);
2855  }
2856
2857  if (wake_buffer_waiters)
2858    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
2859}
2860
2861static void
2862rtems_bdbuf_gather_for_purge (rtems_chain_control *purge_list,
2863                              const rtems_disk_device *dd)
2864{
2865  rtems_bdbuf_buffer *stack [RTEMS_BDBUF_AVL_MAX_HEIGHT];
2866  rtems_bdbuf_buffer **prev = stack;
2867  rtems_bdbuf_buffer *cur = bdbuf_cache.tree;
2868
2869  *prev = NULL;
2870
2871  while (cur != NULL)
2872  {
2873    if (cur->dd == dd)
2874    {
2875      switch (cur->state)
2876      {
2877        case RTEMS_BDBUF_STATE_FREE:
2878        case RTEMS_BDBUF_STATE_EMPTY:
2879        case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2880        case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
2881          break;
2882        case RTEMS_BDBUF_STATE_SYNC:
2883          rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
2884          /* Fall through */
2885        case RTEMS_BDBUF_STATE_MODIFIED:
2886          rtems_bdbuf_group_release (cur);
2887          /* Fall through */
2888        case RTEMS_BDBUF_STATE_CACHED:
2889          rtems_chain_extract_unprotected (&cur->link);
2890          rtems_chain_append_unprotected (purge_list, &cur->link);
2891          break;
2892        case RTEMS_BDBUF_STATE_TRANSFER:
2893          rtems_bdbuf_set_state (cur, RTEMS_BDBUF_STATE_TRANSFER_PURGED);
2894          break;
2895        case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2896        case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2897        case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2898          rtems_bdbuf_set_state (cur, RTEMS_BDBUF_STATE_ACCESS_PURGED);
2899          break;
2900        default:
2901          rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_STATE_11);
2902      }
2903    }
2904
2905    if (cur->avl.left != NULL)
2906    {
2907      /* Left */
2908      ++prev;
2909      *prev = cur;
2910      cur = cur->avl.left;
2911    }
2912    else if (cur->avl.right != NULL)
2913    {
2914      /* Right */
2915      ++prev;
2916      *prev = cur;
2917      cur = cur->avl.right;
2918    }
2919    else
2920    {
2921      while (*prev != NULL
2922             && (cur == (*prev)->avl.right || (*prev)->avl.right == NULL))
2923      {
2924        /* Up */
2925        cur = *prev;
2926        --prev;
2927      }
2928      if (*prev != NULL)
2929        /* Right */
2930        cur = (*prev)->avl.right;
2931      else
2932        /* Finished */
2933        cur = NULL;
2934    }
2935  }
2936}
2937
2938void
2939rtems_bdbuf_purge_dev (rtems_disk_device *dd)
2940{
2941  rtems_chain_control purge_list;
2942
2943  rtems_chain_initialize_empty (&purge_list);
2944  rtems_bdbuf_lock_cache ();
2945  rtems_bdbuf_read_ahead_reset (dd);
2946  rtems_bdbuf_gather_for_purge (&purge_list, dd);
2947  rtems_bdbuf_purge_list (&purge_list);
2948  rtems_bdbuf_unlock_cache ();
2949}
2950
2951rtems_status_code
2952rtems_bdbuf_set_block_size (rtems_disk_device *dd,
2953                            uint32_t           block_size,
2954                            bool               sync)
2955{
2956  rtems_status_code sc = RTEMS_SUCCESSFUL;
2957
2958  /*
2959   * We do not care about the synchronization status since we will purge the
2960   * device later.
2961   */
2962  if (sync)
2963    rtems_bdbuf_syncdev (dd);
2964
2965  rtems_bdbuf_lock_cache ();
2966
2967  if (block_size > 0)
2968  {
2969    size_t bds_per_group = rtems_bdbuf_bds_per_group (block_size);
2970
2971    if (bds_per_group != 0)
2972    {
2973      int block_to_media_block_shift = 0;
2974      uint32_t media_blocks_per_block = block_size / dd->media_block_size;
2975      uint32_t one = 1;
2976
2977      while ((one << block_to_media_block_shift) < media_blocks_per_block)
2978      {
2979        ++block_to_media_block_shift;
2980      }
2981
2982      if ((dd->media_block_size << block_to_media_block_shift) != block_size)
2983        block_to_media_block_shift = -1;
2984
2985      dd->block_size = block_size;
2986      dd->block_count = dd->size / media_blocks_per_block;
2987      dd->media_blocks_per_block = media_blocks_per_block;
2988      dd->block_to_media_block_shift = block_to_media_block_shift;
2989      dd->bds_per_group = bds_per_group;
2990
2991      rtems_bdbuf_purge_dev (dd);
2992    }
2993    else
2994    {
2995      sc = RTEMS_INVALID_NUMBER;
2996    }
2997  }
2998  else
2999  {
3000    sc = RTEMS_INVALID_NUMBER;
3001  }
3002
3003  rtems_bdbuf_unlock_cache ();
3004
3005  return sc;
3006}
3007
3008static rtems_task
3009rtems_bdbuf_read_ahead_task (rtems_task_argument arg)
3010{
3011  rtems_chain_control *chain = &bdbuf_cache.read_ahead_chain;
3012
3013  while (bdbuf_cache.read_ahead_enabled)
3014  {
3015    rtems_chain_node *node;
3016
3017    rtems_bdbuf_wait_for_event (RTEMS_BDBUF_READ_AHEAD_WAKE_UP);
3018    rtems_bdbuf_lock_cache ();
3019
3020    while ((node = rtems_chain_get_unprotected (chain)) != NULL)
3021    {
3022      rtems_disk_device *dd = (rtems_disk_device *)
3023        ((char *) node - offsetof (rtems_disk_device, read_ahead.node));
3024      rtems_blkdev_bnum block = dd->read_ahead.next;
3025      rtems_blkdev_bnum media_block = 0;
3026      rtems_status_code sc =
3027        rtems_bdbuf_get_media_block (dd, block, &media_block);
3028
3029      rtems_chain_set_off_chain (&dd->read_ahead.node);
3030
3031      if (sc == RTEMS_SUCCESSFUL)
3032      {
3033        rtems_bdbuf_buffer *bd =
3034          rtems_bdbuf_get_buffer_for_read_ahead (dd, media_block);
3035
3036        if (bd != NULL)
3037        {
3038          uint32_t transfer_count = dd->block_count - block;
3039          uint32_t max_transfer_count = bdbuf_config.max_read_ahead_blocks;
3040
3041          if (transfer_count >= max_transfer_count)
3042          {
3043            transfer_count = max_transfer_count;
3044            dd->read_ahead.trigger = block + transfer_count / 2;
3045            dd->read_ahead.next = block + transfer_count;
3046          }
3047          else
3048          {
3049            dd->read_ahead.trigger = RTEMS_DISK_READ_AHEAD_NO_TRIGGER;
3050          }
3051
3052          ++dd->stats.read_ahead_transfers;
3053          rtems_bdbuf_execute_read_request (dd, bd, transfer_count);
3054        }
3055      }
3056      else
3057      {
3058        dd->read_ahead.trigger = RTEMS_DISK_READ_AHEAD_NO_TRIGGER;
3059      }
3060    }
3061
3062    rtems_bdbuf_unlock_cache ();
3063  }
3064
3065  rtems_task_delete (RTEMS_SELF);
3066}
3067
3068void rtems_bdbuf_get_device_stats (const rtems_disk_device *dd,
3069                                   rtems_blkdev_stats      *stats)
3070{
3071  rtems_bdbuf_lock_cache ();
3072  *stats = dd->stats;
3073  rtems_bdbuf_unlock_cache ();
3074}
3075
3076void rtems_bdbuf_reset_device_stats (rtems_disk_device *dd)
3077{
3078  rtems_bdbuf_lock_cache ();
3079  memset (&dd->stats, 0, sizeof(dd->stats));
3080  rtems_bdbuf_unlock_cache ();
3081}
Note: See TracBrowser for help on using the repository browser.