source: rtems/cpukit/libblock/src/bdbuf.c @ 1c554014

4.115
Last change on this file since 1c554014 was 1c554014, checked in by Ralf Corsépius <ralf.corsepius@…>, on 07/19/12 at 14:14:53

Remove CVS-Ids.

  • Property mode set to 100644
File size: 83.6 KB
Line 
1/**
2 * @file
3 *
4 * @ingroup rtems_bdbuf
5 *
6 * Block device buffer management.
7 */
8
9/*
10 * Disk I/O buffering
11 * Buffer managment
12 *
13 * Copyright (C) 2001 OKTET Ltd., St.-Peterburg, Russia
14 * Author: Andrey G. Ivanov <Andrey.Ivanov@oktet.ru>
15 *         Victor V. Vengerov <vvv@oktet.ru>
16 *         Alexander Kukuta <kam@oktet.ru>
17 *
18 * Copyright (C) 2008,2009 Chris Johns <chrisj@rtems.org>
19 *    Rewritten to remove score mutex access. Fixes many performance
20 *    issues.
21 *
22 * Copyright (c) 2009-2012 embedded brains GmbH.
23 */
24
25/**
26 * Set to 1 to enable debug tracing.
27 */
28#define RTEMS_BDBUF_TRACE 0
29
30#if HAVE_CONFIG_H
31#include "config.h"
32#endif
33#include <limits.h>
34#include <errno.h>
35#include <stdio.h>
36#include <string.h>
37#include <inttypes.h>
38
39#include <rtems.h>
40#include <rtems/error.h>
41#include <rtems/malloc.h>
42
43#include "rtems/bdbuf.h"
44
45#define BDBUF_INVALID_DEV NULL
46
47/*
48 * Simpler label for this file.
49 */
50#define bdbuf_config rtems_bdbuf_configuration
51
52/**
53 * A swapout transfer transaction data. This data is passed to a worked thread
54 * to handle the write phase of the transfer.
55 */
56typedef struct rtems_bdbuf_swapout_transfer
57{
58  rtems_chain_control   bds;         /**< The transfer list of BDs. */
59  rtems_disk_device    *dd;          /**< The device the transfer is for. */
60  bool                  syncing;     /**< The data is a sync'ing. */
61  rtems_blkdev_request* write_req;   /**< The write request array. */
62} rtems_bdbuf_swapout_transfer;
63
64/**
65 * Swapout worker thread. These are available to take processing from the
66 * main swapout thread and handle the I/O operation.
67 */
68typedef struct rtems_bdbuf_swapout_worker
69{
70  rtems_chain_node             link;     /**< The threads sit on a chain when
71                                          * idle. */
72  rtems_id                     id;       /**< The id of the task so we can wake
73                                          * it. */
74  bool                         enabled;  /**< The worker is enabled. */
75  rtems_bdbuf_swapout_transfer transfer; /**< The transfer data for this
76                                          * thread. */
77} rtems_bdbuf_swapout_worker;
78
79/**
80 * Buffer waiters synchronization.
81 */
82typedef struct rtems_bdbuf_waiters {
83  unsigned count;
84  rtems_id sema;
85} rtems_bdbuf_waiters;
86
87/**
88 * The BD buffer cache.
89 */
90typedef struct rtems_bdbuf_cache
91{
92  rtems_id            swapout;           /**< Swapout task ID */
93  bool                swapout_enabled;   /**< Swapout is only running if
94                                          * enabled. Set to false to kill the
95                                          * swap out task. It deletes itself. */
96  rtems_chain_control swapout_workers;   /**< The work threads for the swapout
97                                          * task. */
98
99  rtems_bdbuf_buffer* bds;               /**< Pointer to table of buffer
100                                          * descriptors. */
101  void*               buffers;           /**< The buffer's memory. */
102  size_t              buffer_min_count;  /**< Number of minimum size buffers
103                                          * that fit the buffer memory. */
104  size_t              max_bds_per_group; /**< The number of BDs of minimum
105                                          * buffer size that fit in a group. */
106  uint32_t            flags;             /**< Configuration flags. */
107
108  rtems_id            lock;              /**< The cache lock. It locks all
109                                          * cache data, BD and lists. */
110  rtems_id            sync_lock;         /**< Sync calls block writes. */
111  bool                sync_active;       /**< True if a sync is active. */
112  rtems_id            sync_requester;    /**< The sync requester. */
113  rtems_disk_device  *sync_device;       /**< The device to sync and
114                                          * BDBUF_INVALID_DEV not a device
115                                          * sync. */
116
117  rtems_bdbuf_buffer* tree;              /**< Buffer descriptor lookup AVL tree
118                                          * root. There is only one. */
119  rtems_chain_control lru;               /**< Least recently used list */
120  rtems_chain_control modified;          /**< Modified buffers list */
121  rtems_chain_control sync;              /**< Buffers to sync list */
122
123  rtems_bdbuf_waiters access_waiters;    /**< Wait for a buffer in
124                                          * ACCESS_CACHED, ACCESS_MODIFIED or
125                                          * ACCESS_EMPTY
126                                          * state. */
127  rtems_bdbuf_waiters transfer_waiters;  /**< Wait for a buffer in TRANSFER
128                                          * state. */
129  rtems_bdbuf_waiters buffer_waiters;    /**< Wait for a buffer and no one is
130                                          * available. */
131
132  size_t              group_count;       /**< The number of groups. */
133  rtems_bdbuf_group*  groups;            /**< The groups. */
134  rtems_id            read_ahead_task;   /**< Read-ahead task */
135  rtems_chain_control read_ahead_chain;  /**< Read-ahead request chain */
136  bool                read_ahead_enabled; /**< Read-ahead enabled */
137
138  bool                initialised;       /**< Initialised state. */
139} rtems_bdbuf_cache;
140
141/**
142 * Fatal errors
143 */
144#define RTEMS_BLKDEV_FATAL_ERROR(n) \
145  (((uint32_t)'B' << 24) | ((uint32_t)(n) & (uint32_t)0x00FFFFFF))
146
147#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_11      RTEMS_BLKDEV_FATAL_ERROR(1)
148#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_4       RTEMS_BLKDEV_FATAL_ERROR(2)
149#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_5       RTEMS_BLKDEV_FATAL_ERROR(3)
150#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_6       RTEMS_BLKDEV_FATAL_ERROR(4)
151#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_7       RTEMS_BLKDEV_FATAL_ERROR(5)
152#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_8       RTEMS_BLKDEV_FATAL_ERROR(6)
153#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_9       RTEMS_BLKDEV_FATAL_ERROR(7)
154#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_10      RTEMS_BLKDEV_FATAL_ERROR(8)
155#define RTEMS_BLKDEV_FATAL_BDBUF_TREE_RM       RTEMS_BLKDEV_FATAL_ERROR(9)
156#define RTEMS_BLKDEV_FATAL_BDBUF_SWAPOUT       RTEMS_BLKDEV_FATAL_ERROR(10)
157
158/*
159 * The lock/unlock fatal errors occur in case the bdbuf is not initialized with
160 * rtems_bdbuf_init().  General system corruption like stack overflow etc. may
161 * also trigger these fatal errors.
162 */
163#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK     RTEMS_BLKDEV_FATAL_ERROR(11)
164#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK   RTEMS_BLKDEV_FATAL_ERROR(12)
165#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_LOCK    RTEMS_BLKDEV_FATAL_ERROR(13)
166#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_UNLOCK  RTEMS_BLKDEV_FATAL_ERROR(14)
167
168#define RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_DIS   RTEMS_BLKDEV_FATAL_ERROR(15)
169#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_2  RTEMS_BLKDEV_FATAL_ERROR(16)
170#define RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_RST   RTEMS_BLKDEV_FATAL_ERROR(17)
171#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_TO RTEMS_BLKDEV_FATAL_ERROR(18)
172#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAKE    RTEMS_BLKDEV_FATAL_ERROR(19)
173#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE       RTEMS_BLKDEV_FATAL_ERROR(20)
174#define RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM      RTEMS_BLKDEV_FATAL_ERROR(21)
175#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_CREATE  RTEMS_BLKDEV_FATAL_ERROR(22)
176#define BLKDEV_FATAL_BDBUF_SWAPOUT_RE          RTEMS_BLKDEV_FATAL_ERROR(24)
177#define BLKDEV_FATAL_BDBUF_SWAPOUT_TS          RTEMS_BLKDEV_FATAL_ERROR(25)
178#define RTEMS_BLKDEV_FATAL_BDBUF_WAIT_EVNT     RTEMS_BLKDEV_FATAL_ERROR(26)
179#define RTEMS_BLKDEV_FATAL_BDBUF_RECYCLE       RTEMS_BLKDEV_FATAL_ERROR(27)
180#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_0       RTEMS_BLKDEV_FATAL_ERROR(28)
181#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_1       RTEMS_BLKDEV_FATAL_ERROR(29)
182#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_2       RTEMS_BLKDEV_FATAL_ERROR(30)
183#define RTEMS_BLKDEV_FATAL_BDBUF_RA_WAKE_UP    RTEMS_BLKDEV_FATAL_ERROR(31)
184
185/**
186 * The events used in this code. These should be system events rather than
187 * application events.
188 */
189#define RTEMS_BDBUF_TRANSFER_SYNC  RTEMS_EVENT_1
190#define RTEMS_BDBUF_SWAPOUT_SYNC   RTEMS_EVENT_2
191#define RTEMS_BDBUF_READ_AHEAD_WAKE_UP RTEMS_EVENT_1
192
193/**
194 * Lock semaphore attributes. This is used for locking type mutexes.
195 *
196 * @warning Priority inheritance is on.
197 */
198#define RTEMS_BDBUF_CACHE_LOCK_ATTRIBS \
199  (RTEMS_PRIORITY | RTEMS_BINARY_SEMAPHORE | \
200   RTEMS_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
201
202/**
203 * Waiter semaphore attributes.
204 *
205 * @warning Do not configure as inherit priority. If a driver is in the driver
206 *          initialisation table this locked semaphore will have the IDLE task
207 *          as the holder and a blocking task will raise the priority of the
208 *          IDLE task which can cause unsual side effects.
209 */
210#define RTEMS_BDBUF_CACHE_WAITER_ATTRIBS \
211  (RTEMS_PRIORITY | RTEMS_SIMPLE_BINARY_SEMAPHORE | \
212   RTEMS_NO_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
213
214/**
215 * Waiter timeout. Set to non-zero to find some info on a waiter that is
216 * waiting too long.
217 */
218#define RTEMS_BDBUF_WAIT_TIMEOUT RTEMS_NO_TIMEOUT
219#if !defined (RTEMS_BDBUF_WAIT_TIMEOUT)
220#define RTEMS_BDBUF_WAIT_TIMEOUT \
221  (TOD_MICROSECONDS_TO_TICKS (20000000))
222#endif
223
224static rtems_task rtems_bdbuf_swapout_task(rtems_task_argument arg);
225
226static rtems_task rtems_bdbuf_read_ahead_task(rtems_task_argument arg);
227
228/**
229 * The Buffer Descriptor cache.
230 */
231static rtems_bdbuf_cache bdbuf_cache;
232
233#if RTEMS_BDBUF_TRACE
234/**
235 * If true output the trace message.
236 */
237bool rtems_bdbuf_tracer;
238
239/**
240 * Return the number of items on the list.
241 *
242 * @param list The chain control.
243 * @return uint32_t The number of items on the list.
244 */
245uint32_t
246rtems_bdbuf_list_count (rtems_chain_control* list)
247{
248  rtems_chain_node* node = rtems_chain_first (list);
249  uint32_t          count = 0;
250  while (!rtems_chain_is_tail (list, node))
251  {
252    count++;
253    node = rtems_chain_next (node);
254  }
255  return count;
256}
257
258/**
259 * Show the usage for the bdbuf cache.
260 */
261void
262rtems_bdbuf_show_usage (void)
263{
264  uint32_t group;
265  uint32_t total = 0;
266  uint32_t val;
267
268  for (group = 0; group < bdbuf_cache.group_count; group++)
269    total += bdbuf_cache.groups[group].users;
270  printf ("bdbuf:group users=%lu", total);
271  val = rtems_bdbuf_list_count (&bdbuf_cache.lru);
272  printf (", lru=%lu", val);
273  total = val;
274  val = rtems_bdbuf_list_count (&bdbuf_cache.modified);
275  printf (", mod=%lu", val);
276  total += val;
277  val = rtems_bdbuf_list_count (&bdbuf_cache.sync);
278  printf (", sync=%lu", val);
279  total += val;
280  printf (", total=%lu\n", total);
281}
282
283/**
284 * Show the users for a group of a bd.
285 *
286 * @param where A label to show the context of output.
287 * @param bd The bd to show the users of.
288 */
289void
290rtems_bdbuf_show_users (const char* where, rtems_bdbuf_buffer* bd)
291{
292  const char* states[] =
293    { "FR", "EM", "CH", "AC", "AM", "AE", "AP", "MD", "SY", "TR", "TP" };
294
295  printf ("bdbuf:users: %15s: [%" PRIu32 " (%s)] %td:%td = %" PRIu32 " %s\n",
296          where,
297          bd->block, states[bd->state],
298          bd->group - bdbuf_cache.groups,
299          bd - bdbuf_cache.bds,
300          bd->group->users,
301          bd->group->users > 8 ? "<<<<<<<" : "");
302}
303#else
304#define rtems_bdbuf_tracer (0)
305#define rtems_bdbuf_show_usage() ((void) 0)
306#define rtems_bdbuf_show_users(_w, _b) ((void) 0)
307#endif
308
309/**
310 * The default maximum height of 32 allows for AVL trees having between
311 * 5,704,880 and 4,294,967,295 nodes, depending on order of insertion.  You may
312 * change this compile-time constant as you wish.
313 */
314#ifndef RTEMS_BDBUF_AVL_MAX_HEIGHT
315#define RTEMS_BDBUF_AVL_MAX_HEIGHT (32)
316#endif
317
318static void
319rtems_bdbuf_fatal (rtems_bdbuf_buf_state state, uint32_t error)
320{
321  rtems_fatal_error_occurred ((((uint32_t) state) << 16) | error);
322}
323
324/**
325 * Searches for the node with specified dd/block.
326 *
327 * @param root pointer to the root node of the AVL-Tree
328 * @param dd disk device search key
329 * @param block block search key
330 * @retval NULL node with the specified dd/block is not found
331 * @return pointer to the node with specified dd/block
332 */
333static rtems_bdbuf_buffer *
334rtems_bdbuf_avl_search (rtems_bdbuf_buffer** root,
335                        const rtems_disk_device *dd,
336                        rtems_blkdev_bnum    block)
337{
338  rtems_bdbuf_buffer* p = *root;
339
340  while ((p != NULL) && ((p->dd != dd) || (p->block != block)))
341  {
342    if (((uintptr_t) p->dd < (uintptr_t) dd)
343        || ((p->dd == dd) && (p->block < block)))
344    {
345      p = p->avl.right;
346    }
347    else
348    {
349      p = p->avl.left;
350    }
351  }
352
353  return p;
354}
355
356/**
357 * Inserts the specified node to the AVl-Tree.
358 *
359 * @param root pointer to the root node of the AVL-Tree
360 * @param node Pointer to the node to add.
361 * @retval 0 The node added successfully
362 * @retval -1 An error occured
363 */
364static int
365rtems_bdbuf_avl_insert(rtems_bdbuf_buffer** root,
366                       rtems_bdbuf_buffer*  node)
367{
368  const rtems_disk_device *dd = node->dd;
369  rtems_blkdev_bnum block = node->block;
370
371  rtems_bdbuf_buffer*  p = *root;
372  rtems_bdbuf_buffer*  q;
373  rtems_bdbuf_buffer*  p1;
374  rtems_bdbuf_buffer*  p2;
375  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
376  rtems_bdbuf_buffer** buf_prev = buf_stack;
377
378  bool modified = false;
379
380  if (p == NULL)
381  {
382    *root = node;
383    node->avl.left = NULL;
384    node->avl.right = NULL;
385    node->avl.bal = 0;
386    return 0;
387  }
388
389  while (p != NULL)
390  {
391    *buf_prev++ = p;
392
393    if (((uintptr_t) p->dd < (uintptr_t) dd)
394        || ((p->dd == dd) && (p->block < block)))
395    {
396      p->avl.cache = 1;
397      q = p->avl.right;
398      if (q == NULL)
399      {
400        q = node;
401        p->avl.right = q = node;
402        break;
403      }
404    }
405    else if ((p->dd != dd) || (p->block != block))
406    {
407      p->avl.cache = -1;
408      q = p->avl.left;
409      if (q == NULL)
410      {
411        q = node;
412        p->avl.left = q;
413        break;
414      }
415    }
416    else
417    {
418      return -1;
419    }
420
421    p = q;
422  }
423
424  q->avl.left = q->avl.right = NULL;
425  q->avl.bal = 0;
426  modified = true;
427  buf_prev--;
428
429  while (modified)
430  {
431    if (p->avl.cache == -1)
432    {
433      switch (p->avl.bal)
434      {
435        case 1:
436          p->avl.bal = 0;
437          modified = false;
438          break;
439
440        case 0:
441          p->avl.bal = -1;
442          break;
443
444        case -1:
445          p1 = p->avl.left;
446          if (p1->avl.bal == -1) /* simple LL-turn */
447          {
448            p->avl.left = p1->avl.right;
449            p1->avl.right = p;
450            p->avl.bal = 0;
451            p = p1;
452          }
453          else /* double LR-turn */
454          {
455            p2 = p1->avl.right;
456            p1->avl.right = p2->avl.left;
457            p2->avl.left = p1;
458            p->avl.left = p2->avl.right;
459            p2->avl.right = p;
460            if (p2->avl.bal == -1) p->avl.bal = +1; else p->avl.bal = 0;
461            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
462            p = p2;
463          }
464          p->avl.bal = 0;
465          modified = false;
466          break;
467
468        default:
469          break;
470      }
471    }
472    else
473    {
474      switch (p->avl.bal)
475      {
476        case -1:
477          p->avl.bal = 0;
478          modified = false;
479          break;
480
481        case 0:
482          p->avl.bal = 1;
483          break;
484
485        case 1:
486          p1 = p->avl.right;
487          if (p1->avl.bal == 1) /* simple RR-turn */
488          {
489            p->avl.right = p1->avl.left;
490            p1->avl.left = p;
491            p->avl.bal = 0;
492            p = p1;
493          }
494          else /* double RL-turn */
495          {
496            p2 = p1->avl.left;
497            p1->avl.left = p2->avl.right;
498            p2->avl.right = p1;
499            p->avl.right = p2->avl.left;
500            p2->avl.left = p;
501            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
502            if (p2->avl.bal == -1) p1->avl.bal = +1; else p1->avl.bal = 0;
503            p = p2;
504          }
505          p->avl.bal = 0;
506          modified = false;
507          break;
508
509        default:
510          break;
511      }
512    }
513    q = p;
514    if (buf_prev > buf_stack)
515    {
516      p = *--buf_prev;
517
518      if (p->avl.cache == -1)
519      {
520        p->avl.left = q;
521      }
522      else
523      {
524        p->avl.right = q;
525      }
526    }
527    else
528    {
529      *root = p;
530      break;
531    }
532  };
533
534  return 0;
535}
536
537
538/**
539 * Removes the node from the tree.
540 *
541 * @param root Pointer to pointer to the root node
542 * @param node Pointer to the node to remove
543 * @retval 0 Item removed
544 * @retval -1 No such item found
545 */
546static int
547rtems_bdbuf_avl_remove(rtems_bdbuf_buffer**      root,
548                       const rtems_bdbuf_buffer* node)
549{
550  const rtems_disk_device *dd = node->dd;
551  rtems_blkdev_bnum block = node->block;
552
553  rtems_bdbuf_buffer*  p = *root;
554  rtems_bdbuf_buffer*  q;
555  rtems_bdbuf_buffer*  r;
556  rtems_bdbuf_buffer*  s;
557  rtems_bdbuf_buffer*  p1;
558  rtems_bdbuf_buffer*  p2;
559  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
560  rtems_bdbuf_buffer** buf_prev = buf_stack;
561
562  bool modified = false;
563
564  memset (buf_stack, 0, sizeof(buf_stack));
565
566  while (p != NULL)
567  {
568    *buf_prev++ = p;
569
570    if (((uintptr_t) p->dd < (uintptr_t) dd)
571        || ((p->dd == dd) && (p->block < block)))
572    {
573      p->avl.cache = 1;
574      p = p->avl.right;
575    }
576    else if ((p->dd != dd) || (p->block != block))
577    {
578      p->avl.cache = -1;
579      p = p->avl.left;
580    }
581    else
582    {
583      /* node found */
584      break;
585    }
586  }
587
588  if (p == NULL)
589  {
590    /* there is no such node */
591    return -1;
592  }
593
594  q = p;
595
596  buf_prev--;
597  if (buf_prev > buf_stack)
598  {
599    p = *(buf_prev - 1);
600  }
601  else
602  {
603    p = NULL;
604  }
605
606  /* at this moment q - is a node to delete, p is q's parent */
607  if (q->avl.right == NULL)
608  {
609    r = q->avl.left;
610    if (r != NULL)
611    {
612      r->avl.bal = 0;
613    }
614    q = r;
615  }
616  else
617  {
618    rtems_bdbuf_buffer **t;
619
620    r = q->avl.right;
621
622    if (r->avl.left == NULL)
623    {
624      r->avl.left = q->avl.left;
625      r->avl.bal = q->avl.bal;
626      r->avl.cache = 1;
627      *buf_prev++ = q = r;
628    }
629    else
630    {
631      t = buf_prev++;
632      s = r;
633
634      while (s->avl.left != NULL)
635      {
636        *buf_prev++ = r = s;
637        s = r->avl.left;
638        r->avl.cache = -1;
639      }
640
641      s->avl.left = q->avl.left;
642      r->avl.left = s->avl.right;
643      s->avl.right = q->avl.right;
644      s->avl.bal = q->avl.bal;
645      s->avl.cache = 1;
646
647      *t = q = s;
648    }
649  }
650
651  if (p != NULL)
652  {
653    if (p->avl.cache == -1)
654    {
655      p->avl.left = q;
656    }
657    else
658    {
659      p->avl.right = q;
660    }
661  }
662  else
663  {
664    *root = q;
665  }
666
667  modified = true;
668
669  while (modified)
670  {
671    if (buf_prev > buf_stack)
672    {
673      p = *--buf_prev;
674    }
675    else
676    {
677      break;
678    }
679
680    if (p->avl.cache == -1)
681    {
682      /* rebalance left branch */
683      switch (p->avl.bal)
684      {
685        case -1:
686          p->avl.bal = 0;
687          break;
688        case  0:
689          p->avl.bal = 1;
690          modified = false;
691          break;
692
693        case +1:
694          p1 = p->avl.right;
695
696          if (p1->avl.bal >= 0) /* simple RR-turn */
697          {
698            p->avl.right = p1->avl.left;
699            p1->avl.left = p;
700
701            if (p1->avl.bal == 0)
702            {
703              p1->avl.bal = -1;
704              modified = false;
705            }
706            else
707            {
708              p->avl.bal = 0;
709              p1->avl.bal = 0;
710            }
711            p = p1;
712          }
713          else /* double RL-turn */
714          {
715            p2 = p1->avl.left;
716
717            p1->avl.left = p2->avl.right;
718            p2->avl.right = p1;
719            p->avl.right = p2->avl.left;
720            p2->avl.left = p;
721
722            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
723            if (p2->avl.bal == -1) p1->avl.bal = 1; else p1->avl.bal = 0;
724
725            p = p2;
726            p2->avl.bal = 0;
727          }
728          break;
729
730        default:
731          break;
732      }
733    }
734    else
735    {
736      /* rebalance right branch */
737      switch (p->avl.bal)
738      {
739        case +1:
740          p->avl.bal = 0;
741          break;
742
743        case  0:
744          p->avl.bal = -1;
745          modified = false;
746          break;
747
748        case -1:
749          p1 = p->avl.left;
750
751          if (p1->avl.bal <= 0) /* simple LL-turn */
752          {
753            p->avl.left = p1->avl.right;
754            p1->avl.right = p;
755            if (p1->avl.bal == 0)
756            {
757              p1->avl.bal = 1;
758              modified = false;
759            }
760            else
761            {
762              p->avl.bal = 0;
763              p1->avl.bal = 0;
764            }
765            p = p1;
766          }
767          else /* double LR-turn */
768          {
769            p2 = p1->avl.right;
770
771            p1->avl.right = p2->avl.left;
772            p2->avl.left = p1;
773            p->avl.left = p2->avl.right;
774            p2->avl.right = p;
775
776            if (p2->avl.bal == -1) p->avl.bal = 1; else p->avl.bal = 0;
777            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
778
779            p = p2;
780            p2->avl.bal = 0;
781          }
782          break;
783
784        default:
785          break;
786      }
787    }
788
789    if (buf_prev > buf_stack)
790    {
791      q = *(buf_prev - 1);
792
793      if (q->avl.cache == -1)
794      {
795        q->avl.left = p;
796      }
797      else
798      {
799        q->avl.right = p;
800      }
801    }
802    else
803    {
804      *root = p;
805      break;
806    }
807
808  }
809
810  return 0;
811}
812
813static void
814rtems_bdbuf_set_state (rtems_bdbuf_buffer *bd, rtems_bdbuf_buf_state state)
815{
816  bd->state = state;
817}
818
819static rtems_blkdev_bnum
820rtems_bdbuf_media_block (const rtems_disk_device *dd, rtems_blkdev_bnum block)
821{
822  if (dd->block_to_media_block_shift >= 0)
823    return block << dd->block_to_media_block_shift;
824  else
825    /*
826     * Change the block number for the block size to the block number for the media
827     * block size. We have to use 64bit maths. There is no short cut here.
828     */
829    return (rtems_blkdev_bnum)
830      ((((uint64_t) block) * dd->block_size) / dd->media_block_size);
831}
832
833/**
834 * Lock the mutex. A single task can nest calls.
835 *
836 * @param lock The mutex to lock.
837 * @param fatal_error_code The error code if the call fails.
838 */
839static void
840rtems_bdbuf_lock (rtems_id lock, uint32_t fatal_error_code)
841{
842  rtems_status_code sc = rtems_semaphore_obtain (lock,
843                                                 RTEMS_WAIT,
844                                                 RTEMS_NO_TIMEOUT);
845  if (sc != RTEMS_SUCCESSFUL)
846    rtems_fatal_error_occurred (fatal_error_code);
847}
848
849/**
850 * Unlock the mutex.
851 *
852 * @param lock The mutex to unlock.
853 * @param fatal_error_code The error code if the call fails.
854 */
855static void
856rtems_bdbuf_unlock (rtems_id lock, uint32_t fatal_error_code)
857{
858  rtems_status_code sc = rtems_semaphore_release (lock);
859  if (sc != RTEMS_SUCCESSFUL)
860    rtems_fatal_error_occurred (fatal_error_code);
861}
862
863/**
864 * Lock the cache. A single task can nest calls.
865 */
866static void
867rtems_bdbuf_lock_cache (void)
868{
869  rtems_bdbuf_lock (bdbuf_cache.lock, RTEMS_BLKDEV_FATAL_BDBUF_CACHE_LOCK);
870}
871
872/**
873 * Unlock the cache.
874 */
875static void
876rtems_bdbuf_unlock_cache (void)
877{
878  rtems_bdbuf_unlock (bdbuf_cache.lock, RTEMS_BLKDEV_FATAL_BDBUF_CACHE_UNLOCK);
879}
880
881/**
882 * Lock the cache's sync. A single task can nest calls.
883 */
884static void
885rtems_bdbuf_lock_sync (void)
886{
887  rtems_bdbuf_lock (bdbuf_cache.sync_lock, RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK);
888}
889
890/**
891 * Unlock the cache's sync lock. Any blocked writers are woken.
892 */
893static void
894rtems_bdbuf_unlock_sync (void)
895{
896  rtems_bdbuf_unlock (bdbuf_cache.sync_lock,
897                      RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK);
898}
899
900static void
901rtems_bdbuf_group_obtain (rtems_bdbuf_buffer *bd)
902{
903  ++bd->group->users;
904}
905
906static void
907rtems_bdbuf_group_release (rtems_bdbuf_buffer *bd)
908{
909  --bd->group->users;
910}
911
912static rtems_mode
913rtems_bdbuf_disable_preemption (void)
914{
915  rtems_status_code sc = RTEMS_SUCCESSFUL;
916  rtems_mode prev_mode = 0;
917
918  sc = rtems_task_mode (RTEMS_NO_PREEMPT, RTEMS_PREEMPT_MASK, &prev_mode);
919  if (sc != RTEMS_SUCCESSFUL)
920    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_DIS);
921
922  return prev_mode;
923}
924
925static void
926rtems_bdbuf_restore_preemption (rtems_mode prev_mode)
927{
928  rtems_status_code sc = RTEMS_SUCCESSFUL;
929
930  sc = rtems_task_mode (prev_mode, RTEMS_ALL_MODE_MASKS, &prev_mode);
931  if (sc != RTEMS_SUCCESSFUL)
932    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_RST);
933}
934
935/**
936 * Wait until woken. Semaphores are used so a number of tasks can wait and can
937 * be woken at once. Task events would require we maintain a list of tasks to
938 * be woken and this would require storage and we do not know the number of
939 * tasks that could be waiting.
940 *
941 * While we have the cache locked we can try and claim the semaphore and
942 * therefore know when we release the lock to the cache we will block until the
943 * semaphore is released. This may even happen before we get to block.
944 *
945 * A counter is used to save the release call when no one is waiting.
946 *
947 * The function assumes the cache is locked on entry and it will be locked on
948 * exit.
949 */
950static void
951rtems_bdbuf_anonymous_wait (rtems_bdbuf_waiters *waiters)
952{
953  rtems_status_code sc;
954  rtems_mode        prev_mode;
955
956  /*
957   * Indicate we are waiting.
958   */
959  ++waiters->count;
960
961  /*
962   * Disable preemption then unlock the cache and block.  There is no POSIX
963   * condition variable in the core API so this is a work around.
964   *
965   * The issue is a task could preempt after the cache is unlocked because it is
966   * blocking or just hits that window, and before this task has blocked on the
967   * semaphore. If the preempting task flushes the queue this task will not see
968   * the flush and may block for ever or until another transaction flushes this
969   * semaphore.
970   */
971  prev_mode = rtems_bdbuf_disable_preemption ();
972
973  /*
974   * Unlock the cache, wait, and lock the cache when we return.
975   */
976  rtems_bdbuf_unlock_cache ();
977
978  sc = rtems_semaphore_obtain (waiters->sema, RTEMS_WAIT, RTEMS_BDBUF_WAIT_TIMEOUT);
979
980  if (sc == RTEMS_TIMEOUT)
981    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_TO);
982
983  if (sc != RTEMS_UNSATISFIED)
984    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_2);
985
986  rtems_bdbuf_lock_cache ();
987
988  rtems_bdbuf_restore_preemption (prev_mode);
989
990  --waiters->count;
991}
992
993static void
994rtems_bdbuf_wait (rtems_bdbuf_buffer *bd, rtems_bdbuf_waiters *waiters)
995{
996  rtems_bdbuf_group_obtain (bd);
997  ++bd->waiters;
998  rtems_bdbuf_anonymous_wait (waiters);
999  --bd->waiters;
1000  rtems_bdbuf_group_release (bd);
1001}
1002
1003/**
1004 * Wake a blocked resource. The resource has a counter that lets us know if
1005 * there are any waiters.
1006 */
1007static void
1008rtems_bdbuf_wake (const rtems_bdbuf_waiters *waiters)
1009{
1010  rtems_status_code sc = RTEMS_SUCCESSFUL;
1011
1012  if (waiters->count > 0)
1013  {
1014    sc = rtems_semaphore_flush (waiters->sema);
1015    if (sc != RTEMS_SUCCESSFUL)
1016      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAKE);
1017  }
1018}
1019
1020static void
1021rtems_bdbuf_wake_swapper (void)
1022{
1023  rtems_status_code sc = rtems_event_send (bdbuf_cache.swapout,
1024                                           RTEMS_BDBUF_SWAPOUT_SYNC);
1025  if (sc != RTEMS_SUCCESSFUL)
1026    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE);
1027}
1028
1029static bool
1030rtems_bdbuf_has_buffer_waiters (void)
1031{
1032  return bdbuf_cache.buffer_waiters.count;
1033}
1034
1035static void
1036rtems_bdbuf_remove_from_tree (rtems_bdbuf_buffer *bd)
1037{
1038  if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
1039    rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_TREE_RM);
1040}
1041
1042static void
1043rtems_bdbuf_remove_from_tree_and_lru_list (rtems_bdbuf_buffer *bd)
1044{
1045  switch (bd->state)
1046  {
1047    case RTEMS_BDBUF_STATE_FREE:
1048      break;
1049    case RTEMS_BDBUF_STATE_CACHED:
1050      rtems_bdbuf_remove_from_tree (bd);
1051      break;
1052    default:
1053      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_10);
1054  }
1055
1056  rtems_chain_extract_unprotected (&bd->link);
1057}
1058
1059static void
1060rtems_bdbuf_make_free_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
1061{
1062  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_FREE);
1063  rtems_chain_prepend_unprotected (&bdbuf_cache.lru, &bd->link);
1064}
1065
1066static void
1067rtems_bdbuf_make_empty (rtems_bdbuf_buffer *bd)
1068{
1069  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_EMPTY);
1070}
1071
1072static void
1073rtems_bdbuf_make_cached_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
1074{
1075  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_CACHED);
1076  rtems_chain_append_unprotected (&bdbuf_cache.lru, &bd->link);
1077}
1078
1079static void
1080rtems_bdbuf_discard_buffer (rtems_bdbuf_buffer *bd)
1081{
1082  rtems_bdbuf_make_empty (bd);
1083
1084  if (bd->waiters == 0)
1085  {
1086    rtems_bdbuf_remove_from_tree (bd);
1087    rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1088  }
1089}
1090
1091static void
1092rtems_bdbuf_add_to_modified_list_after_access (rtems_bdbuf_buffer *bd)
1093{
1094  if (bdbuf_cache.sync_active && bdbuf_cache.sync_device == bd->dd)
1095  {
1096    rtems_bdbuf_unlock_cache ();
1097
1098    /*
1099     * Wait for the sync lock.
1100     */
1101    rtems_bdbuf_lock_sync ();
1102
1103    rtems_bdbuf_unlock_sync ();
1104    rtems_bdbuf_lock_cache ();
1105  }
1106
1107  /*
1108   * Only the first modified release sets the timer and any further user
1109   * accesses do not change the timer value which should move down. This
1110   * assumes the user's hold of the buffer is much less than the time on the
1111   * modified list. Resetting the timer on each access which could result in a
1112   * buffer never getting to 0 and never being forced onto disk. This raises a
1113   * difficult question. Is a snapshot of a block that is changing better than
1114   * nothing being written? We have tended to think we should hold changes for
1115   * only a specific period of time even if still changing and get onto disk
1116   * and letting the file system try and recover this position if it can.
1117   */
1118  if (bd->state == RTEMS_BDBUF_STATE_ACCESS_CACHED
1119        || bd->state == RTEMS_BDBUF_STATE_ACCESS_EMPTY)
1120    bd->hold_timer = bdbuf_config.swap_block_hold;
1121
1122  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_MODIFIED);
1123  rtems_chain_append_unprotected (&bdbuf_cache.modified, &bd->link);
1124
1125  if (bd->waiters)
1126    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1127  else if (rtems_bdbuf_has_buffer_waiters ())
1128    rtems_bdbuf_wake_swapper ();
1129}
1130
1131static void
1132rtems_bdbuf_add_to_lru_list_after_access (rtems_bdbuf_buffer *bd)
1133{
1134  rtems_bdbuf_group_release (bd);
1135  rtems_bdbuf_make_cached_and_add_to_lru_list (bd);
1136
1137  if (bd->waiters)
1138    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1139  else
1140    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1141}
1142
1143/**
1144 * Compute the number of BDs per group for a given buffer size.
1145 *
1146 * @param size The buffer size. It can be any size and we scale up.
1147 */
1148static size_t
1149rtems_bdbuf_bds_per_group (size_t size)
1150{
1151  size_t bufs_per_size;
1152  size_t bds_per_size;
1153
1154  if (size > bdbuf_config.buffer_max)
1155    return 0;
1156
1157  bufs_per_size = ((size - 1) / bdbuf_config.buffer_min) + 1;
1158
1159  for (bds_per_size = 1;
1160       bds_per_size < bufs_per_size;
1161       bds_per_size <<= 1)
1162    ;
1163
1164  return bdbuf_cache.max_bds_per_group / bds_per_size;
1165}
1166
1167static void
1168rtems_bdbuf_discard_buffer_after_access (rtems_bdbuf_buffer *bd)
1169{
1170  rtems_bdbuf_group_release (bd);
1171  rtems_bdbuf_discard_buffer (bd);
1172
1173  if (bd->waiters)
1174    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1175  else
1176    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1177}
1178
1179/**
1180 * Reallocate a group. The BDs currently allocated in the group are removed
1181 * from the ALV tree and any lists then the new BD's are prepended to the ready
1182 * list of the cache.
1183 *
1184 * @param group The group to reallocate.
1185 * @param new_bds_per_group The new count of BDs per group.
1186 * @return A buffer of this group.
1187 */
1188static rtems_bdbuf_buffer *
1189rtems_bdbuf_group_realloc (rtems_bdbuf_group* group, size_t new_bds_per_group)
1190{
1191  rtems_bdbuf_buffer* bd;
1192  size_t              b;
1193  size_t              bufs_per_bd;
1194
1195  if (rtems_bdbuf_tracer)
1196    printf ("bdbuf:realloc: %tu: %zd -> %zd\n",
1197            group - bdbuf_cache.groups, group->bds_per_group,
1198            new_bds_per_group);
1199
1200  bufs_per_bd = bdbuf_cache.max_bds_per_group / group->bds_per_group;
1201
1202  for (b = 0, bd = group->bdbuf;
1203       b < group->bds_per_group;
1204       b++, bd += bufs_per_bd)
1205    rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1206
1207  group->bds_per_group = new_bds_per_group;
1208  bufs_per_bd = bdbuf_cache.max_bds_per_group / new_bds_per_group;
1209
1210  for (b = 1, bd = group->bdbuf + bufs_per_bd;
1211       b < group->bds_per_group;
1212       b++, bd += bufs_per_bd)
1213    rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1214
1215  if (b > 1)
1216    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1217
1218  return group->bdbuf;
1219}
1220
1221static void
1222rtems_bdbuf_setup_empty_buffer (rtems_bdbuf_buffer *bd,
1223                                rtems_disk_device  *dd,
1224                                rtems_blkdev_bnum   block)
1225{
1226  bd->dd        = dd ;
1227  bd->block     = block;
1228  bd->avl.left  = NULL;
1229  bd->avl.right = NULL;
1230  bd->waiters   = 0;
1231
1232  if (rtems_bdbuf_avl_insert (&bdbuf_cache.tree, bd) != 0)
1233    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_RECYCLE);
1234
1235  rtems_bdbuf_make_empty (bd);
1236}
1237
1238static rtems_bdbuf_buffer *
1239rtems_bdbuf_get_buffer_from_lru_list (rtems_disk_device *dd,
1240                                      rtems_blkdev_bnum  block)
1241{
1242  rtems_chain_node *node = rtems_chain_first (&bdbuf_cache.lru);
1243
1244  while (!rtems_chain_is_tail (&bdbuf_cache.lru, node))
1245  {
1246    rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
1247    rtems_bdbuf_buffer *empty_bd = NULL;
1248
1249    if (rtems_bdbuf_tracer)
1250      printf ("bdbuf:next-bd: %tu (%td:%" PRId32 ") %zd -> %zd\n",
1251              bd - bdbuf_cache.bds,
1252              bd->group - bdbuf_cache.groups, bd->group->users,
1253              bd->group->bds_per_group, dd->bds_per_group);
1254
1255    /*
1256     * If nobody waits for this BD, we may recycle it.
1257     */
1258    if (bd->waiters == 0)
1259    {
1260      if (bd->group->bds_per_group == dd->bds_per_group)
1261      {
1262        rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1263
1264        empty_bd = bd;
1265      }
1266      else if (bd->group->users == 0)
1267        empty_bd = rtems_bdbuf_group_realloc (bd->group, dd->bds_per_group);
1268    }
1269
1270    if (empty_bd != NULL)
1271    {
1272      rtems_bdbuf_setup_empty_buffer (empty_bd, dd, block);
1273
1274      return empty_bd;
1275    }
1276
1277    node = rtems_chain_next (node);
1278  }
1279
1280  return NULL;
1281}
1282
1283static rtems_status_code
1284rtems_bdbuf_create_task(
1285  rtems_name name,
1286  rtems_task_priority priority,
1287  rtems_task_priority default_priority,
1288  rtems_task_entry entry,
1289  rtems_task_argument arg,
1290  rtems_id *id
1291)
1292{
1293  rtems_status_code sc;
1294  size_t stack_size = bdbuf_config.task_stack_size ?
1295    bdbuf_config.task_stack_size : RTEMS_BDBUF_TASK_STACK_SIZE_DEFAULT;
1296
1297  priority = priority != 0 ? priority : default_priority;
1298
1299  sc = rtems_task_create (name,
1300                          priority,
1301                          stack_size,
1302                          RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
1303                          RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT,
1304                          id);
1305
1306  if (sc == RTEMS_SUCCESSFUL)
1307    sc = rtems_task_start (*id, entry, arg);
1308
1309  return sc;
1310}
1311
1312/**
1313 * Initialise the cache.
1314 *
1315 * @return rtems_status_code The initialisation status.
1316 */
1317rtems_status_code
1318rtems_bdbuf_init (void)
1319{
1320  rtems_bdbuf_group*  group;
1321  rtems_bdbuf_buffer* bd;
1322  uint8_t*            buffer;
1323  size_t              b;
1324  size_t              cache_aligment;
1325  rtems_status_code   sc;
1326  rtems_mode          prev_mode;
1327
1328  if (rtems_bdbuf_tracer)
1329    printf ("bdbuf:init\n");
1330
1331  if (rtems_interrupt_is_in_progress())
1332    return RTEMS_CALLED_FROM_ISR;
1333
1334  /*
1335   * Check the configuration table values.
1336   */
1337  if ((bdbuf_config.buffer_max % bdbuf_config.buffer_min) != 0)
1338    return RTEMS_INVALID_NUMBER;
1339
1340  /*
1341   * We use a special variable to manage the initialisation incase we have
1342   * completing threads doing this. You may get errors if the another thread
1343   * makes a call and we have not finished initialisation.
1344   */
1345  prev_mode = rtems_bdbuf_disable_preemption ();
1346  if (bdbuf_cache.initialised)
1347  {
1348    rtems_bdbuf_restore_preemption (prev_mode);
1349    return RTEMS_RESOURCE_IN_USE;
1350  }
1351
1352  memset(&bdbuf_cache, 0, sizeof(bdbuf_cache));
1353  bdbuf_cache.initialised = true;
1354  rtems_bdbuf_restore_preemption (prev_mode);
1355
1356  /*
1357   * For unspecified cache alignments we use the CPU alignment.
1358   */
1359  cache_aligment = 32; /* FIXME rtems_cache_get_data_line_size() */
1360  if (cache_aligment <= 0)
1361    cache_aligment = CPU_ALIGNMENT;
1362
1363  bdbuf_cache.sync_device = BDBUF_INVALID_DEV;
1364
1365  rtems_chain_initialize_empty (&bdbuf_cache.swapout_workers);
1366  rtems_chain_initialize_empty (&bdbuf_cache.lru);
1367  rtems_chain_initialize_empty (&bdbuf_cache.modified);
1368  rtems_chain_initialize_empty (&bdbuf_cache.sync);
1369  rtems_chain_initialize_empty (&bdbuf_cache.read_ahead_chain);
1370
1371  /*
1372   * Create the locks for the cache.
1373   */
1374  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'l'),
1375                               1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0,
1376                               &bdbuf_cache.lock);
1377  if (sc != RTEMS_SUCCESSFUL)
1378    goto error;
1379
1380  rtems_bdbuf_lock_cache ();
1381
1382  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 's'),
1383                               1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0,
1384                               &bdbuf_cache.sync_lock);
1385  if (sc != RTEMS_SUCCESSFUL)
1386    goto error;
1387
1388  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'a'),
1389                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1390                               &bdbuf_cache.access_waiters.sema);
1391  if (sc != RTEMS_SUCCESSFUL)
1392    goto error;
1393
1394  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 't'),
1395                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1396                               &bdbuf_cache.transfer_waiters.sema);
1397  if (sc != RTEMS_SUCCESSFUL)
1398    goto error;
1399
1400  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'b'),
1401                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1402                               &bdbuf_cache.buffer_waiters.sema);
1403  if (sc != RTEMS_SUCCESSFUL)
1404    goto error;
1405
1406  /*
1407   * Compute the various number of elements in the cache.
1408   */
1409  bdbuf_cache.buffer_min_count =
1410    bdbuf_config.size / bdbuf_config.buffer_min;
1411  bdbuf_cache.max_bds_per_group =
1412    bdbuf_config.buffer_max / bdbuf_config.buffer_min;
1413  bdbuf_cache.group_count =
1414    bdbuf_cache.buffer_min_count / bdbuf_cache.max_bds_per_group;
1415
1416  /*
1417   * Allocate the memory for the buffer descriptors.
1418   */
1419  bdbuf_cache.bds = calloc (sizeof (rtems_bdbuf_buffer),
1420                            bdbuf_cache.buffer_min_count);
1421  if (!bdbuf_cache.bds)
1422    goto error;
1423
1424  /*
1425   * Allocate the memory for the buffer descriptors.
1426   */
1427  bdbuf_cache.groups = calloc (sizeof (rtems_bdbuf_group),
1428                               bdbuf_cache.group_count);
1429  if (!bdbuf_cache.groups)
1430    goto error;
1431
1432  /*
1433   * Allocate memory for buffer memory. The buffer memory will be cache
1434   * aligned. It is possible to free the memory allocated by rtems_memalign()
1435   * with free(). Return 0 if allocated.
1436   *
1437   * The memory allocate allows a
1438   */
1439  if (rtems_memalign ((void **) &bdbuf_cache.buffers,
1440                      cache_aligment,
1441                      bdbuf_cache.buffer_min_count * bdbuf_config.buffer_min) != 0)
1442    goto error;
1443
1444  /*
1445   * The cache is empty after opening so we need to add all the buffers to it
1446   * and initialise the groups.
1447   */
1448  for (b = 0, group = bdbuf_cache.groups,
1449         bd = bdbuf_cache.bds, buffer = bdbuf_cache.buffers;
1450       b < bdbuf_cache.buffer_min_count;
1451       b++, bd++, buffer += bdbuf_config.buffer_min)
1452  {
1453    bd->dd    = BDBUF_INVALID_DEV;
1454    bd->group  = group;
1455    bd->buffer = buffer;
1456
1457    rtems_chain_append_unprotected (&bdbuf_cache.lru, &bd->link);
1458
1459    if ((b % bdbuf_cache.max_bds_per_group) ==
1460        (bdbuf_cache.max_bds_per_group - 1))
1461      group++;
1462  }
1463
1464  for (b = 0,
1465         group = bdbuf_cache.groups,
1466         bd = bdbuf_cache.bds;
1467       b < bdbuf_cache.group_count;
1468       b++,
1469         group++,
1470         bd += bdbuf_cache.max_bds_per_group)
1471  {
1472    group->bds_per_group = bdbuf_cache.max_bds_per_group;
1473    group->bdbuf = bd;
1474  }
1475
1476  /*
1477   * Create and start swapout task. This task will create and manage the worker
1478   * threads.
1479   */
1480  bdbuf_cache.swapout_enabled = true;
1481
1482  sc = rtems_bdbuf_create_task (rtems_build_name('B', 'S', 'W', 'P'),
1483                                bdbuf_config.swapout_priority,
1484                                RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT,
1485                                rtems_bdbuf_swapout_task,
1486                                0,
1487                                &bdbuf_cache.swapout);
1488  if (sc != RTEMS_SUCCESSFUL)
1489    goto error;
1490
1491  if (bdbuf_config.max_read_ahead_blocks > 0)
1492  {
1493    bdbuf_cache.read_ahead_enabled = true;
1494    sc = rtems_bdbuf_create_task (rtems_build_name('B', 'R', 'D', 'A'),
1495                                  bdbuf_config.read_ahead_priority,
1496                                  RTEMS_BDBUF_READ_AHEAD_TASK_PRIORITY_DEFAULT,
1497                                  rtems_bdbuf_read_ahead_task,
1498                                  0,
1499                                  &bdbuf_cache.read_ahead_task);
1500    if (sc != RTEMS_SUCCESSFUL)
1501      goto error;
1502  }
1503
1504  rtems_bdbuf_unlock_cache ();
1505
1506  return RTEMS_SUCCESSFUL;
1507
1508error:
1509
1510  if (bdbuf_cache.read_ahead_task != 0)
1511    rtems_task_delete (bdbuf_cache.read_ahead_task);
1512
1513  if (bdbuf_cache.swapout != 0)
1514    rtems_task_delete (bdbuf_cache.swapout);
1515
1516  free (bdbuf_cache.buffers);
1517  free (bdbuf_cache.groups);
1518  free (bdbuf_cache.bds);
1519
1520  rtems_semaphore_delete (bdbuf_cache.buffer_waiters.sema);
1521  rtems_semaphore_delete (bdbuf_cache.access_waiters.sema);
1522  rtems_semaphore_delete (bdbuf_cache.transfer_waiters.sema);
1523  rtems_semaphore_delete (bdbuf_cache.sync_lock);
1524
1525  if (bdbuf_cache.lock != 0)
1526  {
1527    rtems_bdbuf_unlock_cache ();
1528    rtems_semaphore_delete (bdbuf_cache.lock);
1529  }
1530
1531  bdbuf_cache.initialised = false;
1532
1533  return RTEMS_UNSATISFIED;
1534}
1535
1536static void
1537rtems_bdbuf_wait_for_event (rtems_event_set event)
1538{
1539  rtems_status_code sc = RTEMS_SUCCESSFUL;
1540  rtems_event_set   out = 0;
1541
1542  sc = rtems_event_receive (event,
1543                            RTEMS_EVENT_ALL | RTEMS_WAIT,
1544                            RTEMS_NO_TIMEOUT,
1545                            &out);
1546
1547  if (sc != RTEMS_SUCCESSFUL || out != event)
1548    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_WAIT_EVNT);
1549}
1550
1551static void
1552rtems_bdbuf_wait_for_access (rtems_bdbuf_buffer *bd)
1553{
1554  while (true)
1555  {
1556    switch (bd->state)
1557    {
1558      case RTEMS_BDBUF_STATE_MODIFIED:
1559        rtems_bdbuf_group_release (bd);
1560        /* Fall through */
1561      case RTEMS_BDBUF_STATE_CACHED:
1562        rtems_chain_extract_unprotected (&bd->link);
1563        /* Fall through */
1564      case RTEMS_BDBUF_STATE_EMPTY:
1565        return;
1566      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1567      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1568      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1569      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1570        rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
1571        break;
1572      case RTEMS_BDBUF_STATE_SYNC:
1573      case RTEMS_BDBUF_STATE_TRANSFER:
1574      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1575        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1576        break;
1577      default:
1578        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_7);
1579    }
1580  }
1581}
1582
1583static void
1584rtems_bdbuf_request_sync_for_modified_buffer (rtems_bdbuf_buffer *bd)
1585{
1586  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
1587  rtems_chain_extract_unprotected (&bd->link);
1588  rtems_chain_append_unprotected (&bdbuf_cache.sync, &bd->link);
1589  rtems_bdbuf_wake_swapper ();
1590}
1591
1592/**
1593 * @brief Waits until the buffer is ready for recycling.
1594 *
1595 * @retval @c true Buffer is valid and may be recycled.
1596 * @retval @c false Buffer is invalid and has to searched again.
1597 */
1598static bool
1599rtems_bdbuf_wait_for_recycle (rtems_bdbuf_buffer *bd)
1600{
1601  while (true)
1602  {
1603    switch (bd->state)
1604    {
1605      case RTEMS_BDBUF_STATE_FREE:
1606        return true;
1607      case RTEMS_BDBUF_STATE_MODIFIED:
1608        rtems_bdbuf_request_sync_for_modified_buffer (bd);
1609        break;
1610      case RTEMS_BDBUF_STATE_CACHED:
1611      case RTEMS_BDBUF_STATE_EMPTY:
1612        if (bd->waiters == 0)
1613          return true;
1614        else
1615        {
1616          /*
1617           * It is essential that we wait here without a special wait count and
1618           * without the group in use.  Otherwise we could trigger a wait ping
1619           * pong with another recycle waiter.  The state of the buffer is
1620           * arbitrary afterwards.
1621           */
1622          rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
1623          return false;
1624        }
1625      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1626      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1627      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1628      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1629        rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
1630        break;
1631      case RTEMS_BDBUF_STATE_SYNC:
1632      case RTEMS_BDBUF_STATE_TRANSFER:
1633      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1634        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1635        break;
1636      default:
1637        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_8);
1638    }
1639  }
1640}
1641
1642static void
1643rtems_bdbuf_wait_for_sync_done (rtems_bdbuf_buffer *bd)
1644{
1645  while (true)
1646  {
1647    switch (bd->state)
1648    {
1649      case RTEMS_BDBUF_STATE_CACHED:
1650      case RTEMS_BDBUF_STATE_EMPTY:
1651      case RTEMS_BDBUF_STATE_MODIFIED:
1652      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1653      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1654      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1655      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1656        return;
1657      case RTEMS_BDBUF_STATE_SYNC:
1658      case RTEMS_BDBUF_STATE_TRANSFER:
1659      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1660        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1661        break;
1662      default:
1663        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_9);
1664    }
1665  }
1666}
1667
1668static void
1669rtems_bdbuf_wait_for_buffer (void)
1670{
1671  if (!rtems_chain_is_empty (&bdbuf_cache.modified))
1672    rtems_bdbuf_wake_swapper ();
1673
1674  rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
1675}
1676
1677static void
1678rtems_bdbuf_sync_after_access (rtems_bdbuf_buffer *bd)
1679{
1680  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
1681
1682  rtems_chain_append_unprotected (&bdbuf_cache.sync, &bd->link);
1683
1684  if (bd->waiters)
1685    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1686
1687  rtems_bdbuf_wake_swapper ();
1688  rtems_bdbuf_wait_for_sync_done (bd);
1689
1690  /*
1691   * We may have created a cached or empty buffer which may be recycled.
1692   */
1693  if (bd->waiters == 0
1694        && (bd->state == RTEMS_BDBUF_STATE_CACHED
1695          || bd->state == RTEMS_BDBUF_STATE_EMPTY))
1696  {
1697    if (bd->state == RTEMS_BDBUF_STATE_EMPTY)
1698    {
1699      rtems_bdbuf_remove_from_tree (bd);
1700      rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1701    }
1702    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1703  }
1704}
1705
1706static rtems_bdbuf_buffer *
1707rtems_bdbuf_get_buffer_for_read_ahead (rtems_disk_device *dd,
1708                                       rtems_blkdev_bnum  block)
1709{
1710  rtems_bdbuf_buffer *bd = NULL;
1711
1712  bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dd, block);
1713
1714  if (bd == NULL)
1715  {
1716    bd = rtems_bdbuf_get_buffer_from_lru_list (dd, block);
1717
1718    if (bd != NULL)
1719      rtems_bdbuf_group_obtain (bd);
1720  }
1721  else
1722    /*
1723     * The buffer is in the cache.  So it is already available or in use, and
1724     * thus no need for a read ahead.
1725     */
1726    bd = NULL;
1727
1728  return bd;
1729}
1730
1731static rtems_bdbuf_buffer *
1732rtems_bdbuf_get_buffer_for_access (rtems_disk_device *dd,
1733                                   rtems_blkdev_bnum  block)
1734{
1735  rtems_bdbuf_buffer *bd = NULL;
1736
1737  do
1738  {
1739    bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dd, block);
1740
1741    if (bd != NULL)
1742    {
1743      if (bd->group->bds_per_group != dd->bds_per_group)
1744      {
1745        if (rtems_bdbuf_wait_for_recycle (bd))
1746        {
1747          rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1748          rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1749          rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1750        }
1751        bd = NULL;
1752      }
1753    }
1754    else
1755    {
1756      bd = rtems_bdbuf_get_buffer_from_lru_list (dd, block);
1757
1758      if (bd == NULL)
1759        rtems_bdbuf_wait_for_buffer ();
1760    }
1761  }
1762  while (bd == NULL);
1763
1764  rtems_bdbuf_wait_for_access (bd);
1765  rtems_bdbuf_group_obtain (bd);
1766
1767  return bd;
1768}
1769
1770static rtems_status_code
1771rtems_bdbuf_get_media_block (const rtems_disk_device *dd,
1772                             rtems_blkdev_bnum        block,
1773                             rtems_blkdev_bnum       *media_block_ptr)
1774{
1775  rtems_status_code sc = RTEMS_SUCCESSFUL;
1776
1777  if (block < dd->block_count)
1778  {
1779    /*
1780     * Compute the media block number. Drivers work with media block number not
1781     * the block number a BD may have as this depends on the block size set by
1782     * the user.
1783     */
1784    *media_block_ptr = rtems_bdbuf_media_block (dd, block) + dd->start;
1785  }
1786  else
1787  {
1788    sc = RTEMS_INVALID_ID;
1789  }
1790
1791  return sc;
1792}
1793
1794rtems_status_code
1795rtems_bdbuf_get (rtems_disk_device   *dd,
1796                 rtems_blkdev_bnum    block,
1797                 rtems_bdbuf_buffer **bd_ptr)
1798{
1799  rtems_status_code   sc = RTEMS_SUCCESSFUL;
1800  rtems_bdbuf_buffer *bd = NULL;
1801  rtems_blkdev_bnum   media_block;
1802
1803  rtems_bdbuf_lock_cache ();
1804
1805  sc = rtems_bdbuf_get_media_block (dd, block, &media_block);
1806  if (sc == RTEMS_SUCCESSFUL)
1807  {
1808    /*
1809     * Print the block index relative to the physical disk.
1810     */
1811    if (rtems_bdbuf_tracer)
1812      printf ("bdbuf:get: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
1813              media_block, block, (unsigned) dd->dev);
1814
1815    bd = rtems_bdbuf_get_buffer_for_access (dd, media_block);
1816
1817    switch (bd->state)
1818    {
1819      case RTEMS_BDBUF_STATE_CACHED:
1820        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
1821        break;
1822      case RTEMS_BDBUF_STATE_EMPTY:
1823        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_EMPTY);
1824        break;
1825      case RTEMS_BDBUF_STATE_MODIFIED:
1826        /*
1827         * To get a modified buffer could be considered a bug in the caller
1828         * because you should not be getting an already modified buffer but
1829         * user may have modified a byte in a block then decided to seek the
1830         * start and write the whole block and the file system will have no
1831         * record of this so just gets the block to fill.
1832         */
1833        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
1834        break;
1835      default:
1836        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_2);
1837        break;
1838    }
1839
1840    if (rtems_bdbuf_tracer)
1841    {
1842      rtems_bdbuf_show_users ("get", bd);
1843      rtems_bdbuf_show_usage ();
1844    }
1845  }
1846
1847  rtems_bdbuf_unlock_cache ();
1848
1849  *bd_ptr = bd;
1850
1851  return sc;
1852}
1853
1854/**
1855 * Call back handler called by the low level driver when the transfer has
1856 * completed. This function may be invoked from interrupt handler.
1857 *
1858 * @param arg Arbitrary argument specified in block device request
1859 *            structure (in this case - pointer to the appropriate
1860 *            block device request structure).
1861 * @param status I/O completion status
1862 */
1863static void
1864rtems_bdbuf_transfer_done (void* arg, rtems_status_code status)
1865{
1866  rtems_blkdev_request* req = (rtems_blkdev_request*) arg;
1867
1868  req->status = status;
1869
1870  rtems_event_send (req->io_task, RTEMS_BDBUF_TRANSFER_SYNC);
1871}
1872
1873static rtems_status_code
1874rtems_bdbuf_execute_transfer_request (rtems_disk_device    *dd,
1875                                      rtems_blkdev_request *req,
1876                                      bool                  cache_locked)
1877{
1878  rtems_status_code sc = RTEMS_SUCCESSFUL;
1879  int result = 0;
1880  uint32_t transfer_index = 0;
1881  bool wake_transfer_waiters = false;
1882  bool wake_buffer_waiters = false;
1883
1884  if (cache_locked)
1885    rtems_bdbuf_unlock_cache ();
1886
1887  result = dd->ioctl (dd->phys_dev, RTEMS_BLKIO_REQUEST, req);
1888
1889  if (result == 0)
1890  {
1891    rtems_bdbuf_wait_for_event (RTEMS_BDBUF_TRANSFER_SYNC);
1892    sc = req->status;
1893  }
1894  else
1895    sc = RTEMS_IO_ERROR;
1896
1897  rtems_bdbuf_lock_cache ();
1898
1899  /* Statistics */
1900  if (req->req == RTEMS_BLKDEV_REQ_READ)
1901  {
1902    dd->stats.read_blocks += req->bufnum;
1903    if (sc != RTEMS_SUCCESSFUL)
1904      ++dd->stats.read_errors;
1905  }
1906  else
1907  {
1908    dd->stats.write_blocks += req->bufnum;
1909    ++dd->stats.write_transfers;
1910    if (sc != RTEMS_SUCCESSFUL)
1911      ++dd->stats.write_errors;
1912  }
1913
1914  for (transfer_index = 0; transfer_index < req->bufnum; ++transfer_index)
1915  {
1916    rtems_bdbuf_buffer *bd = req->bufs [transfer_index].user;
1917    bool waiters = bd->waiters;
1918
1919    if (waiters)
1920      wake_transfer_waiters = true;
1921    else
1922      wake_buffer_waiters = true;
1923
1924    rtems_bdbuf_group_release (bd);
1925
1926    if (sc == RTEMS_SUCCESSFUL && bd->state == RTEMS_BDBUF_STATE_TRANSFER)
1927      rtems_bdbuf_make_cached_and_add_to_lru_list (bd);
1928    else
1929      rtems_bdbuf_discard_buffer (bd);
1930
1931    if (rtems_bdbuf_tracer)
1932      rtems_bdbuf_show_users ("transfer", bd);
1933  }
1934
1935  if (wake_transfer_waiters)
1936    rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
1937
1938  if (wake_buffer_waiters)
1939    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1940
1941  if (!cache_locked)
1942    rtems_bdbuf_unlock_cache ();
1943
1944  if (sc == RTEMS_SUCCESSFUL || sc == RTEMS_UNSATISFIED)
1945    return sc;
1946  else
1947    return RTEMS_IO_ERROR;
1948}
1949
1950static rtems_status_code
1951rtems_bdbuf_execute_read_request (rtems_disk_device  *dd,
1952                                  rtems_bdbuf_buffer *bd,
1953                                  uint32_t            transfer_count)
1954{
1955  rtems_blkdev_request *req = NULL;
1956  rtems_blkdev_bnum media_block = bd->block;
1957  uint32_t media_blocks_per_block = dd->media_blocks_per_block;
1958  uint32_t block_size = dd->block_size;
1959  uint32_t transfer_index = 1;
1960
1961  /*
1962   * TODO: This type of request structure is wrong and should be removed.
1963   */
1964#define bdbuf_alloc(size) __builtin_alloca (size)
1965
1966  req = bdbuf_alloc (sizeof (rtems_blkdev_request) +
1967                     sizeof (rtems_blkdev_sg_buffer) * transfer_count);
1968
1969  req->req = RTEMS_BLKDEV_REQ_READ;
1970  req->req_done = rtems_bdbuf_transfer_done;
1971  req->done_arg = req;
1972  req->io_task = rtems_task_self ();
1973  req->status = RTEMS_RESOURCE_IN_USE;
1974  req->bufnum = 0;
1975
1976  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
1977
1978  req->bufs [0].user   = bd;
1979  req->bufs [0].block  = media_block;
1980  req->bufs [0].length = block_size;
1981  req->bufs [0].buffer = bd->buffer;
1982
1983  if (rtems_bdbuf_tracer)
1984    rtems_bdbuf_show_users ("read", bd);
1985
1986  while (transfer_index < transfer_count)
1987  {
1988    media_block += media_blocks_per_block;
1989
1990    bd = rtems_bdbuf_get_buffer_for_read_ahead (dd, media_block);
1991
1992    if (bd == NULL)
1993      break;
1994
1995    rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
1996
1997    req->bufs [transfer_index].user   = bd;
1998    req->bufs [transfer_index].block  = media_block;
1999    req->bufs [transfer_index].length = block_size;
2000    req->bufs [transfer_index].buffer = bd->buffer;
2001
2002    if (rtems_bdbuf_tracer)
2003      rtems_bdbuf_show_users ("read", bd);
2004
2005    ++transfer_index;
2006  }
2007
2008  req->bufnum = transfer_index;
2009
2010  return rtems_bdbuf_execute_transfer_request (dd, req, true);
2011}
2012
2013static bool
2014rtems_bdbuf_is_read_ahead_active (const rtems_disk_device *dd)
2015{
2016  return !rtems_chain_is_node_off_chain (&dd->read_ahead.node);
2017}
2018
2019static void
2020rtems_bdbuf_read_ahead_cancel (rtems_disk_device *dd)
2021{
2022  if (rtems_bdbuf_is_read_ahead_active (dd))
2023  {
2024    rtems_chain_extract_unprotected (&dd->read_ahead.node);
2025    rtems_chain_set_off_chain (&dd->read_ahead.node);
2026  }
2027}
2028
2029static void
2030rtems_bdbuf_read_ahead_reset (rtems_disk_device *dd)
2031{
2032  rtems_bdbuf_read_ahead_cancel (dd);
2033  dd->read_ahead.trigger = RTEMS_DISK_READ_AHEAD_NO_TRIGGER;
2034}
2035
2036static void
2037rtems_bdbuf_check_read_ahead_trigger (rtems_disk_device *dd,
2038                                      rtems_blkdev_bnum  block)
2039{
2040  if (bdbuf_cache.read_ahead_task != 0
2041      && dd->read_ahead.trigger == block
2042      && !rtems_bdbuf_is_read_ahead_active (dd))
2043  {
2044    rtems_status_code sc;
2045    rtems_chain_control *chain = &bdbuf_cache.read_ahead_chain;
2046
2047    if (rtems_chain_is_empty (chain))
2048    {
2049      sc = rtems_event_send (bdbuf_cache.read_ahead_task,
2050                             RTEMS_BDBUF_READ_AHEAD_WAKE_UP);
2051      if (sc != RTEMS_SUCCESSFUL)
2052        rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_RA_WAKE_UP);
2053    }
2054
2055    rtems_chain_append_unprotected (chain, &dd->read_ahead.node);
2056  }
2057}
2058
2059static void
2060rtems_bdbuf_set_read_ahead_trigger (rtems_disk_device *dd,
2061                                    rtems_blkdev_bnum  block)
2062{
2063  if (dd->read_ahead.trigger != block)
2064  {
2065    rtems_bdbuf_read_ahead_cancel (dd);
2066    dd->read_ahead.trigger = block + 1;
2067    dd->read_ahead.next = block + 2;
2068  }
2069}
2070
2071rtems_status_code
2072rtems_bdbuf_read (rtems_disk_device   *dd,
2073                  rtems_blkdev_bnum    block,
2074                  rtems_bdbuf_buffer **bd_ptr)
2075{
2076  rtems_status_code     sc = RTEMS_SUCCESSFUL;
2077  rtems_bdbuf_buffer   *bd = NULL;
2078  rtems_blkdev_bnum     media_block;
2079
2080  rtems_bdbuf_lock_cache ();
2081
2082  sc = rtems_bdbuf_get_media_block (dd, block, &media_block);
2083  if (sc == RTEMS_SUCCESSFUL)
2084  {
2085    if (rtems_bdbuf_tracer)
2086      printf ("bdbuf:read: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
2087              media_block + dd->start, block, (unsigned) dd->dev);
2088
2089    bd = rtems_bdbuf_get_buffer_for_access (dd, media_block);
2090    switch (bd->state)
2091    {
2092      case RTEMS_BDBUF_STATE_CACHED:
2093        ++dd->stats.read_hits;
2094        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
2095        break;
2096      case RTEMS_BDBUF_STATE_MODIFIED:
2097        ++dd->stats.read_hits;
2098        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
2099        break;
2100      case RTEMS_BDBUF_STATE_EMPTY:
2101        ++dd->stats.read_misses;
2102        rtems_bdbuf_set_read_ahead_trigger (dd, block);
2103        sc = rtems_bdbuf_execute_read_request (dd, bd, 1);
2104        if (sc == RTEMS_SUCCESSFUL)
2105        {
2106          rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
2107          rtems_chain_extract_unprotected (&bd->link);
2108          rtems_bdbuf_group_obtain (bd);
2109        }
2110        else
2111        {
2112          bd = NULL;
2113        }
2114        break;
2115      default:
2116        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_4);
2117        break;
2118    }
2119
2120    rtems_bdbuf_check_read_ahead_trigger (dd, block);
2121  }
2122
2123  rtems_bdbuf_unlock_cache ();
2124
2125  *bd_ptr = bd;
2126
2127  return sc;
2128}
2129
2130static rtems_status_code
2131rtems_bdbuf_check_bd_and_lock_cache (rtems_bdbuf_buffer *bd, const char *kind)
2132{
2133  if (bd == NULL)
2134    return RTEMS_INVALID_ADDRESS;
2135  if (rtems_bdbuf_tracer)
2136  {
2137    printf ("bdbuf:%s: %" PRIu32 "\n", kind, bd->block);
2138    rtems_bdbuf_show_users (kind, bd);
2139  }
2140  rtems_bdbuf_lock_cache();
2141
2142  return RTEMS_SUCCESSFUL;
2143}
2144
2145rtems_status_code
2146rtems_bdbuf_release (rtems_bdbuf_buffer *bd)
2147{
2148  rtems_status_code sc = RTEMS_SUCCESSFUL;
2149
2150  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release");
2151  if (sc != RTEMS_SUCCESSFUL)
2152    return sc;
2153
2154  switch (bd->state)
2155  {
2156    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2157      rtems_bdbuf_add_to_lru_list_after_access (bd);
2158      break;
2159    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2160    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2161      rtems_bdbuf_discard_buffer_after_access (bd);
2162      break;
2163    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2164      rtems_bdbuf_add_to_modified_list_after_access (bd);
2165      break;
2166    default:
2167      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_0);
2168      break;
2169  }
2170
2171  if (rtems_bdbuf_tracer)
2172    rtems_bdbuf_show_usage ();
2173
2174  rtems_bdbuf_unlock_cache ();
2175
2176  return RTEMS_SUCCESSFUL;
2177}
2178
2179rtems_status_code
2180rtems_bdbuf_release_modified (rtems_bdbuf_buffer *bd)
2181{
2182  rtems_status_code sc = RTEMS_SUCCESSFUL;
2183
2184  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release modified");
2185  if (sc != RTEMS_SUCCESSFUL)
2186    return sc;
2187
2188  switch (bd->state)
2189  {
2190    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2191    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2192    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2193      rtems_bdbuf_add_to_modified_list_after_access (bd);
2194      break;
2195    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2196      rtems_bdbuf_discard_buffer_after_access (bd);
2197      break;
2198    default:
2199      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_6);
2200      break;
2201  }
2202
2203  if (rtems_bdbuf_tracer)
2204    rtems_bdbuf_show_usage ();
2205
2206  rtems_bdbuf_unlock_cache ();
2207
2208  return RTEMS_SUCCESSFUL;
2209}
2210
2211rtems_status_code
2212rtems_bdbuf_sync (rtems_bdbuf_buffer *bd)
2213{
2214  rtems_status_code sc = RTEMS_SUCCESSFUL;
2215
2216  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "sync");
2217  if (sc != RTEMS_SUCCESSFUL)
2218    return sc;
2219
2220  switch (bd->state)
2221  {
2222    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2223    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2224    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2225      rtems_bdbuf_sync_after_access (bd);
2226      break;
2227    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2228      rtems_bdbuf_discard_buffer_after_access (bd);
2229      break;
2230    default:
2231      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_5);
2232      break;
2233  }
2234
2235  if (rtems_bdbuf_tracer)
2236    rtems_bdbuf_show_usage ();
2237
2238  rtems_bdbuf_unlock_cache ();
2239
2240  return RTEMS_SUCCESSFUL;
2241}
2242
2243rtems_status_code
2244rtems_bdbuf_syncdev (rtems_disk_device *dd)
2245{
2246  if (rtems_bdbuf_tracer)
2247    printf ("bdbuf:syncdev: %08x\n", (unsigned) dd->dev);
2248
2249  /*
2250   * Take the sync lock before locking the cache. Once we have the sync lock we
2251   * can lock the cache. If another thread has the sync lock it will cause this
2252   * thread to block until it owns the sync lock then it can own the cache. The
2253   * sync lock can only be obtained with the cache unlocked.
2254   */
2255  rtems_bdbuf_lock_sync ();
2256  rtems_bdbuf_lock_cache ();
2257
2258  /*
2259   * Set the cache to have a sync active for a specific device and let the swap
2260   * out task know the id of the requester to wake when done.
2261   *
2262   * The swap out task will negate the sync active flag when no more buffers
2263   * for the device are held on the "modified for sync" queues.
2264   */
2265  bdbuf_cache.sync_active    = true;
2266  bdbuf_cache.sync_requester = rtems_task_self ();
2267  bdbuf_cache.sync_device    = dd;
2268
2269  rtems_bdbuf_wake_swapper ();
2270  rtems_bdbuf_unlock_cache ();
2271  rtems_bdbuf_wait_for_event (RTEMS_BDBUF_TRANSFER_SYNC);
2272  rtems_bdbuf_unlock_sync ();
2273
2274  return RTEMS_SUCCESSFUL;
2275}
2276
2277/**
2278 * Swapout transfer to the driver. The driver will break this I/O into groups
2279 * of consecutive write requests is multiple consecutive buffers are required
2280 * by the driver. The cache is not locked.
2281 *
2282 * @param transfer The transfer transaction.
2283 */
2284static void
2285rtems_bdbuf_swapout_write (rtems_bdbuf_swapout_transfer* transfer)
2286{
2287  rtems_chain_node *node;
2288
2289  if (rtems_bdbuf_tracer)
2290    printf ("bdbuf:swapout transfer: %08x\n", (unsigned) transfer->dd->dev);
2291
2292  /*
2293   * If there are buffers to transfer to the media transfer them.
2294   */
2295  if (!rtems_chain_is_empty (&transfer->bds))
2296  {
2297    /*
2298     * The last block number used when the driver only supports
2299     * continuous blocks in a single request.
2300     */
2301    uint32_t last_block = 0;
2302
2303    rtems_disk_device *dd = transfer->dd;
2304    uint32_t media_blocks_per_block = dd->media_blocks_per_block;
2305    bool need_continuous_blocks =
2306      (dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_MULTISECTOR_CONT) != 0;
2307
2308    /*
2309     * Take as many buffers as configured and pass to the driver. Note, the
2310     * API to the drivers has an array of buffers and if a chain was passed
2311     * we could have just passed the list. If the driver API is updated it
2312     * should be possible to make this change with little effect in this
2313     * code. The array that is passed is broken in design and should be
2314     * removed. Merging members of a struct into the first member is
2315     * trouble waiting to happen.
2316     */
2317    transfer->write_req->status = RTEMS_RESOURCE_IN_USE;
2318    transfer->write_req->bufnum = 0;
2319
2320    while ((node = rtems_chain_get_unprotected(&transfer->bds)) != NULL)
2321    {
2322      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2323      bool                write = false;
2324
2325      /*
2326       * If the device only accepts sequential buffers and this is not the
2327       * first buffer (the first is always sequential, and the buffer is not
2328       * sequential then put the buffer back on the transfer chain and write
2329       * the committed buffers.
2330       */
2331
2332      if (rtems_bdbuf_tracer)
2333        printf ("bdbuf:swapout write: bd:%" PRIu32 ", bufnum:%" PRIu32 " mode:%s\n",
2334                bd->block, transfer->write_req->bufnum,
2335                need_continuous_blocks ? "MULTI" : "SCAT");
2336
2337      if (need_continuous_blocks && transfer->write_req->bufnum &&
2338          bd->block != last_block + media_blocks_per_block)
2339      {
2340        rtems_chain_prepend_unprotected (&transfer->bds, &bd->link);
2341        write = true;
2342      }
2343      else
2344      {
2345        rtems_blkdev_sg_buffer* buf;
2346        buf = &transfer->write_req->bufs[transfer->write_req->bufnum];
2347        transfer->write_req->bufnum++;
2348        buf->user   = bd;
2349        buf->block  = bd->block;
2350        buf->length = dd->block_size;
2351        buf->buffer = bd->buffer;
2352        last_block  = bd->block;
2353      }
2354
2355      /*
2356       * Perform the transfer if there are no more buffers, or the transfer
2357       * size has reached the configured max. value.
2358       */
2359
2360      if (rtems_chain_is_empty (&transfer->bds) ||
2361          (transfer->write_req->bufnum >= bdbuf_config.max_write_blocks))
2362        write = true;
2363
2364      if (write)
2365      {
2366        rtems_bdbuf_execute_transfer_request (dd, transfer->write_req, false);
2367
2368        transfer->write_req->status = RTEMS_RESOURCE_IN_USE;
2369        transfer->write_req->bufnum = 0;
2370      }
2371    }
2372
2373    /*
2374     * If sync'ing and the deivce is capability of handling a sync IO control
2375     * call perform the call.
2376     */
2377    if (transfer->syncing &&
2378        (dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_SYNC))
2379    {
2380      /* int result = */ dd->ioctl (dd->phys_dev, RTEMS_BLKDEV_REQ_SYNC, NULL);
2381      /* How should the error be handled ? */
2382    }
2383  }
2384}
2385
2386/**
2387 * Process the modified list of buffers. There is a sync or modified list that
2388 * needs to be handled so we have a common function to do the work.
2389 *
2390 * @param dd_ptr Pointer to the device to handle. If BDBUF_INVALID_DEV no
2391 * device is selected so select the device of the first buffer to be written to
2392 * disk.
2393 * @param chain The modified chain to process.
2394 * @param transfer The chain to append buffers to be written too.
2395 * @param sync_active If true this is a sync operation so expire all timers.
2396 * @param update_timers If true update the timers.
2397 * @param timer_delta It update_timers is true update the timers by this
2398 *                    amount.
2399 */
2400static void
2401rtems_bdbuf_swapout_modified_processing (rtems_disk_device  **dd_ptr,
2402                                         rtems_chain_control* chain,
2403                                         rtems_chain_control* transfer,
2404                                         bool                 sync_active,
2405                                         bool                 update_timers,
2406                                         uint32_t             timer_delta)
2407{
2408  if (!rtems_chain_is_empty (chain))
2409  {
2410    rtems_chain_node* node = rtems_chain_head (chain);
2411    bool              sync_all;
2412   
2413    node = node->next;
2414
2415    /*
2416     * A sync active with no valid dev means sync all.
2417     */
2418    if (sync_active && (*dd_ptr == BDBUF_INVALID_DEV))
2419      sync_all = true;
2420    else
2421      sync_all = false;
2422   
2423    while (!rtems_chain_is_tail (chain, node))
2424    {
2425      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2426
2427      /*
2428       * Check if the buffer's hold timer has reached 0. If a sync is active
2429       * or someone waits for a buffer written force all the timers to 0.
2430       *
2431       * @note Lots of sync requests will skew this timer. It should be based
2432       *       on TOD to be accurate. Does it matter ?
2433       */
2434      if (sync_all || (sync_active && (*dd_ptr == bd->dd))
2435          || rtems_bdbuf_has_buffer_waiters ())
2436        bd->hold_timer = 0;
2437
2438      if (bd->hold_timer)
2439      {
2440        if (update_timers)
2441        {
2442          if (bd->hold_timer > timer_delta)
2443            bd->hold_timer -= timer_delta;
2444          else
2445            bd->hold_timer = 0;
2446        }
2447
2448        if (bd->hold_timer)
2449        {
2450          node = node->next;
2451          continue;
2452        }
2453      }
2454
2455      /*
2456       * This assumes we can set it to BDBUF_INVALID_DEV which is just an
2457       * assumption. Cannot use the transfer list being empty the sync dev
2458       * calls sets the dev to use.
2459       */
2460      if (*dd_ptr == BDBUF_INVALID_DEV)
2461        *dd_ptr = bd->dd;
2462
2463      if (bd->dd == *dd_ptr)
2464      {
2465        rtems_chain_node* next_node = node->next;
2466        rtems_chain_node* tnode = rtems_chain_tail (transfer);
2467
2468        /*
2469         * The blocks on the transfer list are sorted in block order. This
2470         * means multi-block transfers for drivers that require consecutive
2471         * blocks perform better with sorted blocks and for real disks it may
2472         * help lower head movement.
2473         */
2474
2475        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
2476
2477        rtems_chain_extract_unprotected (node);
2478
2479        tnode = tnode->previous;
2480
2481        while (node && !rtems_chain_is_head (transfer, tnode))
2482        {
2483          rtems_bdbuf_buffer* tbd = (rtems_bdbuf_buffer*) tnode;
2484
2485          if (bd->block > tbd->block)
2486          {
2487            rtems_chain_insert_unprotected (tnode, node);
2488            node = NULL;
2489          }
2490          else
2491            tnode = tnode->previous;
2492        }
2493
2494        if (node)
2495          rtems_chain_prepend_unprotected (transfer, node);
2496
2497        node = next_node;
2498      }
2499      else
2500      {
2501        node = node->next;
2502      }
2503    }
2504  }
2505}
2506
2507/**
2508 * Process the cache's modified buffers. Check the sync list first then the
2509 * modified list extracting the buffers suitable to be written to disk. We have
2510 * a device at a time. The task level loop will repeat this operation while
2511 * there are buffers to be written. If the transfer fails place the buffers
2512 * back on the modified list and try again later. The cache is unlocked while
2513 * the buffers are being written to disk.
2514 *
2515 * @param timer_delta It update_timers is true update the timers by this
2516 *                    amount.
2517 * @param update_timers If true update the timers.
2518 * @param transfer The transfer transaction data.
2519 *
2520 * @retval true Buffers where written to disk so scan again.
2521 * @retval false No buffers where written to disk.
2522 */
2523static bool
2524rtems_bdbuf_swapout_processing (unsigned long                 timer_delta,
2525                                bool                          update_timers,
2526                                rtems_bdbuf_swapout_transfer* transfer)
2527{
2528  rtems_bdbuf_swapout_worker* worker;
2529  bool                        transfered_buffers = false;
2530
2531  rtems_bdbuf_lock_cache ();
2532
2533  /*
2534   * If a sync is active do not use a worker because the current code does not
2535   * cleaning up after. We need to know the buffers have been written when
2536   * syncing to release sync lock and currently worker threads do not return to
2537   * here. We do not know the worker is the last in a sequence of sync writes
2538   * until after we have it running so we do not know to tell it to release the
2539   * lock. The simplest solution is to get the main swap out task perform all
2540   * sync operations.
2541   */
2542  if (bdbuf_cache.sync_active)
2543    worker = NULL;
2544  else
2545  {
2546    worker = (rtems_bdbuf_swapout_worker*)
2547      rtems_chain_get_unprotected (&bdbuf_cache.swapout_workers);
2548    if (worker)
2549      transfer = &worker->transfer;
2550  }
2551
2552  rtems_chain_initialize_empty (&transfer->bds);
2553  transfer->dd = BDBUF_INVALID_DEV;
2554  transfer->syncing = bdbuf_cache.sync_active;
2555 
2556  /*
2557   * When the sync is for a device limit the sync to that device. If the sync
2558   * is for a buffer handle process the devices in the order on the sync
2559   * list. This means the dev is BDBUF_INVALID_DEV.
2560   */
2561  if (bdbuf_cache.sync_active)
2562    transfer->dd = bdbuf_cache.sync_device;
2563   
2564  /*
2565   * If we have any buffers in the sync queue move them to the modified
2566   * list. The first sync buffer will select the device we use.
2567   */
2568  rtems_bdbuf_swapout_modified_processing (&transfer->dd,
2569                                           &bdbuf_cache.sync,
2570                                           &transfer->bds,
2571                                           true, false,
2572                                           timer_delta);
2573
2574  /*
2575   * Process the cache's modified list.
2576   */
2577  rtems_bdbuf_swapout_modified_processing (&transfer->dd,
2578                                           &bdbuf_cache.modified,
2579                                           &transfer->bds,
2580                                           bdbuf_cache.sync_active,
2581                                           update_timers,
2582                                           timer_delta);
2583
2584  /*
2585   * We have all the buffers that have been modified for this device so the
2586   * cache can be unlocked because the state of each buffer has been set to
2587   * TRANSFER.
2588   */
2589  rtems_bdbuf_unlock_cache ();
2590
2591  /*
2592   * If there are buffers to transfer to the media transfer them.
2593   */
2594  if (!rtems_chain_is_empty (&transfer->bds))
2595  {
2596    if (worker)
2597    {
2598      rtems_status_code sc = rtems_event_send (worker->id,
2599                                               RTEMS_BDBUF_SWAPOUT_SYNC);
2600      if (sc != RTEMS_SUCCESSFUL)
2601        rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE);
2602    }
2603    else
2604    {
2605      rtems_bdbuf_swapout_write (transfer);
2606    }
2607
2608    transfered_buffers = true;
2609  }
2610
2611  if (bdbuf_cache.sync_active && !transfered_buffers)
2612  {
2613    rtems_id sync_requester;
2614    rtems_bdbuf_lock_cache ();
2615    sync_requester = bdbuf_cache.sync_requester;
2616    bdbuf_cache.sync_active = false;
2617    bdbuf_cache.sync_requester = 0;
2618    rtems_bdbuf_unlock_cache ();
2619    if (sync_requester)
2620      rtems_event_send (sync_requester, RTEMS_BDBUF_TRANSFER_SYNC);
2621  }
2622
2623  return transfered_buffers;
2624}
2625
2626/**
2627 * Allocate the write request and initialise it for good measure.
2628 *
2629 * @return rtems_blkdev_request* The write reference memory.
2630 */
2631static rtems_blkdev_request*
2632rtems_bdbuf_swapout_writereq_alloc (void)
2633{
2634  /*
2635   * @note chrisj The rtems_blkdev_request and the array at the end is a hack.
2636   * I am disappointment at finding code like this in RTEMS. The request should
2637   * have been a rtems_chain_control. Simple, fast and less storage as the node
2638   * is already part of the buffer structure.
2639   */
2640  rtems_blkdev_request* write_req =
2641    malloc (sizeof (rtems_blkdev_request) +
2642            (bdbuf_config.max_write_blocks * sizeof (rtems_blkdev_sg_buffer)));
2643
2644  if (!write_req)
2645    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM);
2646
2647  write_req->req = RTEMS_BLKDEV_REQ_WRITE;
2648  write_req->req_done = rtems_bdbuf_transfer_done;
2649  write_req->done_arg = write_req;
2650  write_req->io_task = rtems_task_self ();
2651
2652  return write_req;
2653}
2654
2655/**
2656 * The swapout worker thread body.
2657 *
2658 * @param arg A pointer to the worker thread's private data.
2659 * @return rtems_task Not used.
2660 */
2661static rtems_task
2662rtems_bdbuf_swapout_worker_task (rtems_task_argument arg)
2663{
2664  rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) arg;
2665
2666  while (worker->enabled)
2667  {
2668    rtems_bdbuf_wait_for_event (RTEMS_BDBUF_SWAPOUT_SYNC);
2669
2670    rtems_bdbuf_swapout_write (&worker->transfer);
2671
2672    rtems_bdbuf_lock_cache ();
2673
2674    rtems_chain_initialize_empty (&worker->transfer.bds);
2675    worker->transfer.dd = BDBUF_INVALID_DEV;
2676
2677    rtems_chain_append_unprotected (&bdbuf_cache.swapout_workers, &worker->link);
2678
2679    rtems_bdbuf_unlock_cache ();
2680  }
2681
2682  free (worker->transfer.write_req);
2683  free (worker);
2684
2685  rtems_task_delete (RTEMS_SELF);
2686}
2687
2688/**
2689 * Open the swapout worker threads.
2690 */
2691static void
2692rtems_bdbuf_swapout_workers_open (void)
2693{
2694  rtems_status_code sc;
2695  size_t            w;
2696
2697  rtems_bdbuf_lock_cache ();
2698
2699  for (w = 0; w < bdbuf_config.swapout_workers; w++)
2700  {
2701    rtems_bdbuf_swapout_worker* worker;
2702
2703    worker = malloc (sizeof (rtems_bdbuf_swapout_worker));
2704    if (!worker)
2705      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM);
2706
2707    rtems_chain_append_unprotected (&bdbuf_cache.swapout_workers, &worker->link);
2708    worker->enabled = true;
2709    worker->transfer.write_req = rtems_bdbuf_swapout_writereq_alloc ();
2710
2711    rtems_chain_initialize_empty (&worker->transfer.bds);
2712    worker->transfer.dd = BDBUF_INVALID_DEV;
2713
2714    sc = rtems_bdbuf_create_task (rtems_build_name('B', 'D', 'o', 'a' + w),
2715                                  bdbuf_config.swapout_worker_priority,
2716                                  RTEMS_BDBUF_SWAPOUT_WORKER_TASK_PRIORITY_DEFAULT,
2717                                  rtems_bdbuf_swapout_worker_task,
2718                                  (rtems_task_argument) worker,
2719                                  &worker->id);
2720    if (sc != RTEMS_SUCCESSFUL)
2721      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_CREATE);
2722  }
2723
2724  rtems_bdbuf_unlock_cache ();
2725}
2726
2727/**
2728 * Close the swapout worker threads.
2729 */
2730static void
2731rtems_bdbuf_swapout_workers_close (void)
2732{
2733  rtems_chain_node* node;
2734
2735  rtems_bdbuf_lock_cache ();
2736
2737  node = rtems_chain_first (&bdbuf_cache.swapout_workers);
2738  while (!rtems_chain_is_tail (&bdbuf_cache.swapout_workers, node))
2739  {
2740    rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) node;
2741    worker->enabled = false;
2742    rtems_event_send (worker->id, RTEMS_BDBUF_SWAPOUT_SYNC);
2743    node = rtems_chain_next (node);
2744  }
2745
2746  rtems_bdbuf_unlock_cache ();
2747}
2748
2749/**
2750 * Body of task which takes care on flushing modified buffers to the disk.
2751 *
2752 * @param arg A pointer to the global cache data. Use the global variable and
2753 *            not this.
2754 * @return rtems_task Not used.
2755 */
2756static rtems_task
2757rtems_bdbuf_swapout_task (rtems_task_argument arg)
2758{
2759  rtems_bdbuf_swapout_transfer transfer;
2760  uint32_t                     period_in_ticks;
2761  const uint32_t               period_in_msecs = bdbuf_config.swapout_period;
2762  uint32_t                     timer_delta;
2763
2764  transfer.write_req = rtems_bdbuf_swapout_writereq_alloc ();
2765  rtems_chain_initialize_empty (&transfer.bds);
2766  transfer.dd = BDBUF_INVALID_DEV;
2767  transfer.syncing = false;
2768
2769  /*
2770   * Localise the period.
2771   */
2772  period_in_ticks = RTEMS_MICROSECONDS_TO_TICKS (period_in_msecs * 1000);
2773
2774  /*
2775   * This is temporary. Needs to be changed to use the real time clock.
2776   */
2777  timer_delta = period_in_msecs;
2778
2779  /*
2780   * Create the worker threads.
2781   */
2782  rtems_bdbuf_swapout_workers_open ();
2783
2784  while (bdbuf_cache.swapout_enabled)
2785  {
2786    rtems_event_set   out;
2787    rtems_status_code sc;
2788
2789    /*
2790     * Only update the timers once in the processing cycle.
2791     */
2792    bool update_timers = true;
2793
2794    /*
2795     * If we write buffers to any disk perform a check again. We only write a
2796     * single device at a time and the cache may have more than one device's
2797     * buffers modified waiting to be written.
2798     */
2799    bool transfered_buffers;
2800
2801    do
2802    {
2803      transfered_buffers = false;
2804
2805      /*
2806       * Extact all the buffers we find for a specific device. The device is
2807       * the first one we find on a modified list. Process the sync queue of
2808       * buffers first.
2809       */
2810      if (rtems_bdbuf_swapout_processing (timer_delta,
2811                                          update_timers,
2812                                          &transfer))
2813      {
2814        transfered_buffers = true;
2815      }
2816
2817      /*
2818       * Only update the timers once.
2819       */
2820      update_timers = false;
2821    }
2822    while (transfered_buffers);
2823
2824    sc = rtems_event_receive (RTEMS_BDBUF_SWAPOUT_SYNC,
2825                              RTEMS_EVENT_ALL | RTEMS_WAIT,
2826                              period_in_ticks,
2827                              &out);
2828
2829    if ((sc != RTEMS_SUCCESSFUL) && (sc != RTEMS_TIMEOUT))
2830      rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
2831  }
2832
2833  rtems_bdbuf_swapout_workers_close ();
2834
2835  free (transfer.write_req);
2836
2837  rtems_task_delete (RTEMS_SELF);
2838}
2839
2840static void
2841rtems_bdbuf_purge_list (rtems_chain_control *purge_list)
2842{
2843  bool wake_buffer_waiters = false;
2844  rtems_chain_node *node = NULL;
2845
2846  while ((node = rtems_chain_get_unprotected (purge_list)) != NULL)
2847  {
2848    rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
2849
2850    if (bd->waiters == 0)
2851      wake_buffer_waiters = true;
2852
2853    rtems_bdbuf_discard_buffer (bd);
2854  }
2855
2856  if (wake_buffer_waiters)
2857    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
2858}
2859
2860static void
2861rtems_bdbuf_gather_for_purge (rtems_chain_control *purge_list,
2862                              const rtems_disk_device *dd)
2863{
2864  rtems_bdbuf_buffer *stack [RTEMS_BDBUF_AVL_MAX_HEIGHT];
2865  rtems_bdbuf_buffer **prev = stack;
2866  rtems_bdbuf_buffer *cur = bdbuf_cache.tree;
2867
2868  *prev = NULL;
2869
2870  while (cur != NULL)
2871  {
2872    if (cur->dd == dd)
2873    {
2874      switch (cur->state)
2875      {
2876        case RTEMS_BDBUF_STATE_FREE:
2877        case RTEMS_BDBUF_STATE_EMPTY:
2878        case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2879        case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
2880          break;
2881        case RTEMS_BDBUF_STATE_SYNC:
2882          rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
2883          /* Fall through */
2884        case RTEMS_BDBUF_STATE_MODIFIED:
2885          rtems_bdbuf_group_release (cur);
2886          /* Fall through */
2887        case RTEMS_BDBUF_STATE_CACHED:
2888          rtems_chain_extract_unprotected (&cur->link);
2889          rtems_chain_append_unprotected (purge_list, &cur->link);
2890          break;
2891        case RTEMS_BDBUF_STATE_TRANSFER:
2892          rtems_bdbuf_set_state (cur, RTEMS_BDBUF_STATE_TRANSFER_PURGED);
2893          break;
2894        case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2895        case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2896        case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2897          rtems_bdbuf_set_state (cur, RTEMS_BDBUF_STATE_ACCESS_PURGED);
2898          break;
2899        default:
2900          rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_STATE_11);
2901      }
2902    }
2903
2904    if (cur->avl.left != NULL)
2905    {
2906      /* Left */
2907      ++prev;
2908      *prev = cur;
2909      cur = cur->avl.left;
2910    }
2911    else if (cur->avl.right != NULL)
2912    {
2913      /* Right */
2914      ++prev;
2915      *prev = cur;
2916      cur = cur->avl.right;
2917    }
2918    else
2919    {
2920      while (*prev != NULL
2921             && (cur == (*prev)->avl.right || (*prev)->avl.right == NULL))
2922      {
2923        /* Up */
2924        cur = *prev;
2925        --prev;
2926      }
2927      if (*prev != NULL)
2928        /* Right */
2929        cur = (*prev)->avl.right;
2930      else
2931        /* Finished */
2932        cur = NULL;
2933    }
2934  }
2935}
2936
2937void
2938rtems_bdbuf_purge_dev (rtems_disk_device *dd)
2939{
2940  rtems_chain_control purge_list;
2941
2942  rtems_chain_initialize_empty (&purge_list);
2943  rtems_bdbuf_lock_cache ();
2944  rtems_bdbuf_read_ahead_reset (dd);
2945  rtems_bdbuf_gather_for_purge (&purge_list, dd);
2946  rtems_bdbuf_purge_list (&purge_list);
2947  rtems_bdbuf_unlock_cache ();
2948}
2949
2950rtems_status_code
2951rtems_bdbuf_set_block_size (rtems_disk_device *dd, uint32_t block_size)
2952{
2953  rtems_status_code sc = RTEMS_SUCCESSFUL;
2954
2955  rtems_bdbuf_lock_cache ();
2956
2957  if (block_size > 0)
2958  {
2959    size_t bds_per_group = rtems_bdbuf_bds_per_group (block_size);
2960
2961    if (bds_per_group != 0)
2962    {
2963      int block_to_media_block_shift = 0;
2964      uint32_t media_blocks_per_block = block_size / dd->media_block_size;
2965      uint32_t one = 1;
2966
2967      while ((one << block_to_media_block_shift) < media_blocks_per_block)
2968      {
2969        ++block_to_media_block_shift;
2970      }
2971
2972      if ((dd->media_block_size << block_to_media_block_shift) != block_size)
2973        block_to_media_block_shift = -1;
2974
2975      dd->block_size = block_size;
2976      dd->block_count = dd->size / media_blocks_per_block;
2977      dd->media_blocks_per_block = media_blocks_per_block;
2978      dd->block_to_media_block_shift = block_to_media_block_shift;
2979      dd->bds_per_group = bds_per_group;
2980
2981      rtems_bdbuf_read_ahead_reset (dd);
2982    }
2983    else
2984    {
2985      sc = RTEMS_INVALID_NUMBER;
2986    }
2987  }
2988  else
2989  {
2990    sc = RTEMS_INVALID_NUMBER;
2991  }
2992
2993  rtems_bdbuf_unlock_cache ();
2994
2995  return sc;
2996}
2997
2998static rtems_task
2999rtems_bdbuf_read_ahead_task (rtems_task_argument arg)
3000{
3001  rtems_chain_control *chain = &bdbuf_cache.read_ahead_chain;
3002
3003  while (bdbuf_cache.read_ahead_enabled)
3004  {
3005    rtems_chain_node *node;
3006
3007    rtems_bdbuf_wait_for_event (RTEMS_BDBUF_READ_AHEAD_WAKE_UP);
3008    rtems_bdbuf_lock_cache ();
3009
3010    while ((node = rtems_chain_get_unprotected (chain)) != NULL)
3011    {
3012      rtems_disk_device *dd = (rtems_disk_device *)
3013        ((char *) node - offsetof (rtems_disk_device, read_ahead.node));
3014      rtems_blkdev_bnum block = dd->read_ahead.next;
3015      rtems_blkdev_bnum media_block = 0;
3016      rtems_status_code sc =
3017        rtems_bdbuf_get_media_block (dd, block, &media_block);
3018
3019      rtems_chain_set_off_chain (&dd->read_ahead.node);
3020
3021      if (sc == RTEMS_SUCCESSFUL)
3022      {
3023        rtems_bdbuf_buffer *bd =
3024          rtems_bdbuf_get_buffer_for_read_ahead (dd, media_block);
3025
3026        if (bd != NULL)
3027        {
3028          uint32_t transfer_count = dd->block_count - block;
3029          uint32_t max_transfer_count = bdbuf_config.max_read_ahead_blocks;
3030
3031          if (transfer_count >= max_transfer_count)
3032          {
3033            transfer_count = max_transfer_count;
3034            dd->read_ahead.trigger = block + transfer_count / 2;
3035            dd->read_ahead.next = block + transfer_count;
3036          }
3037          else
3038          {
3039            dd->read_ahead.trigger = RTEMS_DISK_READ_AHEAD_NO_TRIGGER;
3040          }
3041
3042          ++dd->stats.read_ahead_transfers;
3043          rtems_bdbuf_execute_read_request (dd, bd, transfer_count);
3044        }
3045      }
3046      else
3047      {
3048        dd->read_ahead.trigger = RTEMS_DISK_READ_AHEAD_NO_TRIGGER;
3049      }
3050    }
3051
3052    rtems_bdbuf_unlock_cache ();
3053  }
3054
3055  rtems_task_delete (RTEMS_SELF);
3056}
3057
3058void rtems_bdbuf_get_device_stats (const rtems_disk_device *dd,
3059                                   rtems_blkdev_stats      *stats)
3060{
3061  rtems_bdbuf_lock_cache ();
3062  *stats = dd->stats;
3063  rtems_bdbuf_unlock_cache ();
3064}
3065
3066void rtems_bdbuf_reset_device_stats (rtems_disk_device *dd)
3067{
3068  rtems_bdbuf_lock_cache ();
3069  memset (&dd->stats, 0, sizeof(dd->stats));
3070  rtems_bdbuf_unlock_cache ();
3071}
Note: See TracBrowser for help on using the repository browser.