source: rtems/cpukit/libblock/src/bdbuf.c @ 71092f7

4.115
Last change on this file since 71092f7 was 71092f7, checked in by Sebastian Huber <sebastian.huber@…>, on 07/02/12 at 14:58:01

libblock: Fix read-ahead trigger and next update

The previous version was sub-optimal for read-ahead transfer counts of
one.

  • Property mode set to 100644
File size: 83.5 KB
RevLine 
[57aa979]1/**
2 * @file
3 *
[4670d91]4 * @ingroup rtems_bdbuf
5 *
[57aa979]6 * Block device buffer management.
7 */
8
[e51bd96]9/*
10 * Disk I/O buffering
11 * Buffer managment
12 *
13 * Copyright (C) 2001 OKTET Ltd., St.-Peterburg, Russia
14 * Author: Andrey G. Ivanov <Andrey.Ivanov@oktet.ru>
15 *         Victor V. Vengerov <vvv@oktet.ru>
[df6348bb]16 *         Alexander Kukuta <kam@oktet.ru>
[e51bd96]17 *
[0d15414e]18 * Copyright (C) 2008,2009 Chris Johns <chrisj@rtems.org>
[c21c850e]19 *    Rewritten to remove score mutex access. Fixes many performance
20 *    issues.
[6d612944]21 *
[796967c]22 * Copyright (c) 2009-2012 embedded brains GmbH.
[18daff9]23 *
[3d14a45]24 * @(#) bdbuf.c,v 1.14 2004/04/17 08:15:17 ralf Exp
[e51bd96]25 */
26
[3899a537]27/**
28 * Set to 1 to enable debug tracing.
29 */
30#define RTEMS_BDBUF_TRACE 0
31
[006fa1ef]32#if HAVE_CONFIG_H
33#include "config.h"
34#endif
[b5b07cad]35#include <limits.h>
36#include <errno.h>
37#include <stdio.h>
38#include <string.h>
[253c3a1d]39#include <inttypes.h>
40
[e51bd96]41#include <rtems.h>
[3899a537]42#include <rtems/error.h>
[57aa979]43#include <rtems/malloc.h>
[0ebfac19]44
[3899a537]45#include "rtems/bdbuf.h"
[e51bd96]46
[796967c]47#define BDBUF_INVALID_DEV NULL
[b5b07cad]48
[0d15414e]49/*
50 * Simpler label for this file.
51 */
52#define bdbuf_config rtems_bdbuf_configuration
53
54/**
55 * A swapout transfer transaction data. This data is passed to a worked thread
56 * to handle the write phase of the transfer.
57 */
58typedef struct rtems_bdbuf_swapout_transfer
59{
[eb649786]60  rtems_chain_control   bds;         /**< The transfer list of BDs. */
[7d4a859]61  rtems_disk_device    *dd;          /**< The device the transfer is for. */
[8aa608df]62  bool                  syncing;     /**< The data is a sync'ing. */
[eb649786]63  rtems_blkdev_request* write_req;   /**< The write request array. */
[0d15414e]64} rtems_bdbuf_swapout_transfer;
65
66/**
67 * Swapout worker thread. These are available to take processing from the
68 * main swapout thread and handle the I/O operation.
69 */
70typedef struct rtems_bdbuf_swapout_worker
71{
72  rtems_chain_node             link;     /**< The threads sit on a chain when
73                                          * idle. */
74  rtems_id                     id;       /**< The id of the task so we can wake
75                                          * it. */
[c42b03f4]76  bool                         enabled;  /**< The worker is enabled. */
[0d15414e]77  rtems_bdbuf_swapout_transfer transfer; /**< The transfer data for this
78                                          * thread. */
79} rtems_bdbuf_swapout_worker;
80
[b5b07cad]81/**
82 * Buffer waiters synchronization.
83 */
84typedef struct rtems_bdbuf_waiters {
[c42b03f4]85  unsigned count;
[b5b07cad]86  rtems_id sema;
87} rtems_bdbuf_waiters;
88
[3899a537]89/**
[0d15414e]90 * The BD buffer cache.
[0ebfac19]91 */
[0d15414e]92typedef struct rtems_bdbuf_cache
93{
94  rtems_id            swapout;           /**< Swapout task ID */
[c42b03f4]95  bool                swapout_enabled;   /**< Swapout is only running if
[0d15414e]96                                          * enabled. Set to false to kill the
97                                          * swap out task. It deletes itself. */
98  rtems_chain_control swapout_workers;   /**< The work threads for the swapout
99                                          * task. */
[18daff9]100
[0d15414e]101  rtems_bdbuf_buffer* bds;               /**< Pointer to table of buffer
102                                          * descriptors. */
103  void*               buffers;           /**< The buffer's memory. */
104  size_t              buffer_min_count;  /**< Number of minimum size buffers
105                                          * that fit the buffer memory. */
106  size_t              max_bds_per_group; /**< The number of BDs of minimum
107                                          * buffer size that fit in a group. */
108  uint32_t            flags;             /**< Configuration flags. */
109
110  rtems_id            lock;              /**< The cache lock. It locks all
111                                          * cache data, BD and lists. */
112  rtems_id            sync_lock;         /**< Sync calls block writes. */
[c42b03f4]113  bool                sync_active;       /**< True if a sync is active. */
114  rtems_id            sync_requester;    /**< The sync requester. */
[7d4a859]115  rtems_disk_device  *sync_device;       /**< The device to sync and
[b5b07cad]116                                          * BDBUF_INVALID_DEV not a device
117                                          * sync. */
[0d15414e]118
119  rtems_bdbuf_buffer* tree;              /**< Buffer descriptor lookup AVL tree
120                                          * root. There is only one. */
121  rtems_chain_control lru;               /**< Least recently used list */
122  rtems_chain_control modified;          /**< Modified buffers list */
123  rtems_chain_control sync;              /**< Buffers to sync list */
124
[5c587596]125  rtems_bdbuf_waiters access_waiters;    /**< Wait for a buffer in
126                                          * ACCESS_CACHED, ACCESS_MODIFIED or
127                                          * ACCESS_EMPTY
[b5b07cad]128                                          * state. */
129  rtems_bdbuf_waiters transfer_waiters;  /**< Wait for a buffer in TRANSFER
130                                          * state. */
131  rtems_bdbuf_waiters buffer_waiters;    /**< Wait for a buffer and no one is
132                                          * available. */
[0d15414e]133
134  size_t              group_count;       /**< The number of groups. */
135  rtems_bdbuf_group*  groups;            /**< The groups. */
[39ee704e]136  rtems_id            read_ahead_task;   /**< Read-ahead task */
137  rtems_chain_control read_ahead_chain;  /**< Read-ahead request chain */
138  bool                read_ahead_enabled; /**< Read-ahead enabled */
[18daff9]139
[0d15414e]140  bool                initialised;       /**< Initialised state. */
141} rtems_bdbuf_cache;
[3899a537]142
143/**
144 * Fatal errors
[0ebfac19]145 */
[c21c850e]146#define RTEMS_BLKDEV_FATAL_ERROR(n) \
147  (((uint32_t)'B' << 24) | ((uint32_t)(n) & (uint32_t)0x00FFFFFF))
[3899a537]148
[e7fb54e]149#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_11      RTEMS_BLKDEV_FATAL_ERROR(1)
[6d612944]150#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_4       RTEMS_BLKDEV_FATAL_ERROR(2)
151#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_5       RTEMS_BLKDEV_FATAL_ERROR(3)
152#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_6       RTEMS_BLKDEV_FATAL_ERROR(4)
153#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_7       RTEMS_BLKDEV_FATAL_ERROR(5)
154#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_8       RTEMS_BLKDEV_FATAL_ERROR(6)
155#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_9       RTEMS_BLKDEV_FATAL_ERROR(7)
156#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_10      RTEMS_BLKDEV_FATAL_ERROR(8)
[5c587596]157#define RTEMS_BLKDEV_FATAL_BDBUF_TREE_RM       RTEMS_BLKDEV_FATAL_ERROR(9)
[945884fe]158#define RTEMS_BLKDEV_FATAL_BDBUF_SWAPOUT       RTEMS_BLKDEV_FATAL_ERROR(10)
[3d60c1b]159
160/*
161 * The lock/unlock fatal errors occur in case the bdbuf is not initialized with
162 * rtems_bdbuf_init().  General system corruption like stack overflow etc. may
163 * also trigger these fatal errors.
164 */
[945884fe]165#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK     RTEMS_BLKDEV_FATAL_ERROR(11)
166#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK   RTEMS_BLKDEV_FATAL_ERROR(12)
167#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_LOCK    RTEMS_BLKDEV_FATAL_ERROR(13)
168#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_UNLOCK  RTEMS_BLKDEV_FATAL_ERROR(14)
[3d60c1b]169
[b5b07cad]170#define RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_DIS   RTEMS_BLKDEV_FATAL_ERROR(15)
[945884fe]171#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_2  RTEMS_BLKDEV_FATAL_ERROR(16)
[b5b07cad]172#define RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_RST   RTEMS_BLKDEV_FATAL_ERROR(17)
[eb649786]173#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_TO RTEMS_BLKDEV_FATAL_ERROR(18)
174#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAKE    RTEMS_BLKDEV_FATAL_ERROR(19)
175#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE       RTEMS_BLKDEV_FATAL_ERROR(20)
176#define RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM      RTEMS_BLKDEV_FATAL_ERROR(21)
177#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_CREATE  RTEMS_BLKDEV_FATAL_ERROR(22)
178#define BLKDEV_FATAL_BDBUF_SWAPOUT_RE          RTEMS_BLKDEV_FATAL_ERROR(24)
179#define BLKDEV_FATAL_BDBUF_SWAPOUT_TS          RTEMS_BLKDEV_FATAL_ERROR(25)
[6d612944]180#define RTEMS_BLKDEV_FATAL_BDBUF_WAIT_EVNT     RTEMS_BLKDEV_FATAL_ERROR(26)
181#define RTEMS_BLKDEV_FATAL_BDBUF_RECYCLE       RTEMS_BLKDEV_FATAL_ERROR(27)
182#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_0       RTEMS_BLKDEV_FATAL_ERROR(28)
183#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_1       RTEMS_BLKDEV_FATAL_ERROR(29)
184#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_2       RTEMS_BLKDEV_FATAL_ERROR(30)
[39ee704e]185#define RTEMS_BLKDEV_FATAL_BDBUF_RA_WAKE_UP    RTEMS_BLKDEV_FATAL_ERROR(31)
[3899a537]186
[c21c850e]187/**
188 * The events used in this code. These should be system events rather than
189 * application events.
190 */
[3899a537]191#define RTEMS_BDBUF_TRANSFER_SYNC  RTEMS_EVENT_1
192#define RTEMS_BDBUF_SWAPOUT_SYNC   RTEMS_EVENT_2
[39ee704e]193#define RTEMS_BDBUF_READ_AHEAD_WAKE_UP RTEMS_EVENT_1
[3899a537]194
195/**
196 * Lock semaphore attributes. This is used for locking type mutexes.
[c21c850e]197 *
198 * @warning Priority inheritance is on.
[3899a537]199 */
[0d15414e]200#define RTEMS_BDBUF_CACHE_LOCK_ATTRIBS \
[3899a537]201  (RTEMS_PRIORITY | RTEMS_BINARY_SEMAPHORE | \
202   RTEMS_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
[048dcd2b]203
[3899a537]204/**
205 * Waiter semaphore attributes.
206 *
[c21c850e]207 * @warning Do not configure as inherit priority. If a driver is in the driver
208 *          initialisation table this locked semaphore will have the IDLE task
209 *          as the holder and a blocking task will raise the priority of the
210 *          IDLE task which can cause unsual side effects.
[3899a537]211 */
[0d15414e]212#define RTEMS_BDBUF_CACHE_WAITER_ATTRIBS \
[c21c850e]213  (RTEMS_PRIORITY | RTEMS_SIMPLE_BINARY_SEMAPHORE | \
[3899a537]214   RTEMS_NO_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
[e51bd96]215
[eb649786]216/**
217 * Waiter timeout. Set to non-zero to find some info on a waiter that is
218 * waiting too long.
219 */
220#define RTEMS_BDBUF_WAIT_TIMEOUT RTEMS_NO_TIMEOUT
221#if !defined (RTEMS_BDBUF_WAIT_TIMEOUT)
222#define RTEMS_BDBUF_WAIT_TIMEOUT \
223  (TOD_MICROSECONDS_TO_TICKS (20000000))
224#endif
225
[3899a537]226static rtems_task rtems_bdbuf_swapout_task(rtems_task_argument arg);
[048dcd2b]227
[39ee704e]228static rtems_task rtems_bdbuf_read_ahead_task(rtems_task_argument arg);
229
[3899a537]230/**
[0d15414e]231 * The Buffer Descriptor cache.
[3899a537]232 */
[0d15414e]233static rtems_bdbuf_cache bdbuf_cache;
[e51bd96]234
[eb649786]235#if RTEMS_BDBUF_TRACE
[3899a537]236/**
[eb649786]237 * If true output the trace message.
[3899a537]238 */
[4f971343]239bool rtems_bdbuf_tracer;
[eb649786]240
241/**
242 * Return the number of items on the list.
243 *
244 * @param list The chain control.
245 * @return uint32_t The number of items on the list.
246 */
247uint32_t
248rtems_bdbuf_list_count (rtems_chain_control* list)
[3899a537]249{
[eb649786]250  rtems_chain_node* node = rtems_chain_first (list);
251  uint32_t          count = 0;
252  while (!rtems_chain_is_tail (list, node))
[3899a537]253  {
[eb649786]254    count++;
255    node = rtems_chain_next (node);
[3899a537]256  }
[eb649786]257  return count;
258}
259
260/**
261 * Show the usage for the bdbuf cache.
262 */
263void
264rtems_bdbuf_show_usage (void)
265{
266  uint32_t group;
267  uint32_t total = 0;
268  uint32_t val;
[b5b07cad]269
[eb649786]270  for (group = 0; group < bdbuf_cache.group_count; group++)
271    total += bdbuf_cache.groups[group].users;
272  printf ("bdbuf:group users=%lu", total);
273  val = rtems_bdbuf_list_count (&bdbuf_cache.lru);
274  printf (", lru=%lu", val);
[6d612944]275  total = val;
[eb649786]276  val = rtems_bdbuf_list_count (&bdbuf_cache.modified);
277  printf (", mod=%lu", val);
278  total += val;
279  val = rtems_bdbuf_list_count (&bdbuf_cache.sync);
280  printf (", sync=%lu", val);
281  total += val;
282  printf (", total=%lu\n", total);
[3899a537]283}
[eb649786]284
285/**
286 * Show the users for a group of a bd.
287 *
288 * @param where A label to show the context of output.
289 * @param bd The bd to show the users of.
290 */
291void
292rtems_bdbuf_show_users (const char* where, rtems_bdbuf_buffer* bd)
293{
294  const char* states[] =
[e7096b1f]295    { "FR", "EM", "CH", "AC", "AM", "AE", "AP", "MD", "SY", "TR", "TP" };
[b5b07cad]296
297  printf ("bdbuf:users: %15s: [%" PRIu32 " (%s)] %td:%td = %" PRIu32 " %s\n",
[eb649786]298          where,
299          bd->block, states[bd->state],
300          bd->group - bdbuf_cache.groups,
301          bd - bdbuf_cache.bds,
302          bd->group->users,
303          bd->group->users > 8 ? "<<<<<<<" : "");
304}
305#else
306#define rtems_bdbuf_tracer (0)
[6d612944]307#define rtems_bdbuf_show_usage() ((void) 0)
308#define rtems_bdbuf_show_users(_w, _b) ((void) 0)
[e51bd96]309#endif
310
[3899a537]311/**
312 * The default maximum height of 32 allows for AVL trees having between
313 * 5,704,880 and 4,294,967,295 nodes, depending on order of insertion.  You may
314 * change this compile-time constant as you wish.
315 */
316#ifndef RTEMS_BDBUF_AVL_MAX_HEIGHT
317#define RTEMS_BDBUF_AVL_MAX_HEIGHT (32)
[e51bd96]318#endif
319
[6d612944]320static void
321rtems_bdbuf_fatal (rtems_bdbuf_buf_state state, uint32_t error)
322{
323  rtems_fatal_error_occurred ((((uint32_t) state) << 16) | error);
324}
325
[3899a537]326/**
[796967c]327 * Searches for the node with specified dd/block.
[e51bd96]328 *
[3899a537]329 * @param root pointer to the root node of the AVL-Tree
[796967c]330 * @param dd disk device search key
[3899a537]331 * @param block block search key
[796967c]332 * @retval NULL node with the specified dd/block is not found
333 * @return pointer to the node with specified dd/block
[e51bd96]334 */
[3899a537]335static rtems_bdbuf_buffer *
336rtems_bdbuf_avl_search (rtems_bdbuf_buffer** root,
[796967c]337                        const rtems_disk_device *dd,
[3899a537]338                        rtems_blkdev_bnum    block)
[e51bd96]339{
[3899a537]340  rtems_bdbuf_buffer* p = *root;
[df6348bb]341
[796967c]342  while ((p != NULL) && ((p->dd != dd) || (p->block != block)))
[3899a537]343  {
[796967c]344    if (((uintptr_t) p->dd < (uintptr_t) dd)
345        || ((p->dd == dd) && (p->block < block)))
[e51bd96]346    {
[3899a537]347      p = p->avl.right;
348    }
349    else
350    {
351      p = p->avl.left;
[e51bd96]352    }
[3899a537]353  }
[048dcd2b]354
[3899a537]355  return p;
[e51bd96]356}
357
[3899a537]358/**
359 * Inserts the specified node to the AVl-Tree.
[e51bd96]360 *
[3899a537]361 * @param root pointer to the root node of the AVL-Tree
362 * @param node Pointer to the node to add.
363 * @retval 0 The node added successfully
364 * @retval -1 An error occured
[e51bd96]365 */
366static int
[3899a537]367rtems_bdbuf_avl_insert(rtems_bdbuf_buffer** root,
368                       rtems_bdbuf_buffer*  node)
[e51bd96]369{
[796967c]370  const rtems_disk_device *dd = node->dd;
[3899a537]371  rtems_blkdev_bnum block = node->block;
372
373  rtems_bdbuf_buffer*  p = *root;
374  rtems_bdbuf_buffer*  q;
375  rtems_bdbuf_buffer*  p1;
376  rtems_bdbuf_buffer*  p2;
377  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
378  rtems_bdbuf_buffer** buf_prev = buf_stack;
379
[4f971343]380  bool modified = false;
[3899a537]381
382  if (p == NULL)
383  {
384    *root = node;
385    node->avl.left = NULL;
386    node->avl.right = NULL;
387    node->avl.bal = 0;
388    return 0;
389  }
[e51bd96]390
[3899a537]391  while (p != NULL)
392  {
393    *buf_prev++ = p;
[e51bd96]394
[796967c]395    if (((uintptr_t) p->dd < (uintptr_t) dd)
396        || ((p->dd == dd) && (p->block < block)))
[e51bd96]397    {
[3899a537]398      p->avl.cache = 1;
399      q = p->avl.right;
400      if (q == NULL)
401      {
402        q = node;
403        p->avl.right = q = node;
404        break;
405      }
[e51bd96]406    }
[796967c]407    else if ((p->dd != dd) || (p->block != block))
[e51bd96]408    {
[3899a537]409      p->avl.cache = -1;
410      q = p->avl.left;
411      if (q == NULL)
412      {
413        q = node;
414        p->avl.left = q;
415        break;
416      }
417    }
418    else
419    {
420      return -1;
[e51bd96]421    }
422
[3899a537]423    p = q;
424  }
[18daff9]425
[3899a537]426  q->avl.left = q->avl.right = NULL;
427  q->avl.bal = 0;
[4f971343]428  modified = true;
[3899a537]429  buf_prev--;
[e51bd96]430
[3899a537]431  while (modified)
432  {
433    if (p->avl.cache == -1)
434    {
435      switch (p->avl.bal)
436      {
437        case 1:
438          p->avl.bal = 0;
[4f971343]439          modified = false;
[3899a537]440          break;
441
442        case 0:
443          p->avl.bal = -1;
444          break;
445
446        case -1:
447          p1 = p->avl.left;
448          if (p1->avl.bal == -1) /* simple LL-turn */
449          {
450            p->avl.left = p1->avl.right;
451            p1->avl.right = p;
452            p->avl.bal = 0;
453            p = p1;
454          }
455          else /* double LR-turn */
456          {
457            p2 = p1->avl.right;
458            p1->avl.right = p2->avl.left;
459            p2->avl.left = p1;
460            p->avl.left = p2->avl.right;
461            p2->avl.right = p;
462            if (p2->avl.bal == -1) p->avl.bal = +1; else p->avl.bal = 0;
463            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
464            p = p2;
465          }
466          p->avl.bal = 0;
[4f971343]467          modified = false;
[3899a537]468          break;
469
470        default:
471          break;
472      }
473    }
474    else
475    {
476      switch (p->avl.bal)
477      {
478        case -1:
479          p->avl.bal = 0;
[4f971343]480          modified = false;
[3899a537]481          break;
482
483        case 0:
484          p->avl.bal = 1;
485          break;
486
487        case 1:
488          p1 = p->avl.right;
489          if (p1->avl.bal == 1) /* simple RR-turn */
490          {
491            p->avl.right = p1->avl.left;
492            p1->avl.left = p;
493            p->avl.bal = 0;
494            p = p1;
495          }
496          else /* double RL-turn */
497          {
498            p2 = p1->avl.left;
499            p1->avl.left = p2->avl.right;
500            p2->avl.right = p1;
501            p->avl.right = p2->avl.left;
502            p2->avl.left = p;
503            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
504            if (p2->avl.bal == -1) p1->avl.bal = +1; else p1->avl.bal = 0;
505            p = p2;
506          }
507          p->avl.bal = 0;
[4f971343]508          modified = false;
[3899a537]509          break;
510
511        default:
512          break;
513      }
514    }
515    q = p;
516    if (buf_prev > buf_stack)
517    {
518      p = *--buf_prev;
519
520      if (p->avl.cache == -1)
521      {
522        p->avl.left = q;
523      }
524      else
525      {
526        p->avl.right = q;
527      }
528    }
529    else
530    {
531      *root = p;
532      break;
533    }
534  };
[e51bd96]535
[3899a537]536  return 0;
[e51bd96]537}
538
539
[3899a537]540/**
541 * Removes the node from the tree.
[e51bd96]542 *
[57aa979]543 * @param root Pointer to pointer to the root node
[3899a537]544 * @param node Pointer to the node to remove
545 * @retval 0 Item removed
546 * @retval -1 No such item found
[e51bd96]547 */
548static int
[3899a537]549rtems_bdbuf_avl_remove(rtems_bdbuf_buffer**      root,
550                       const rtems_bdbuf_buffer* node)
[e51bd96]551{
[796967c]552  const rtems_disk_device *dd = node->dd;
[3899a537]553  rtems_blkdev_bnum block = node->block;
[e51bd96]554
[3899a537]555  rtems_bdbuf_buffer*  p = *root;
556  rtems_bdbuf_buffer*  q;
557  rtems_bdbuf_buffer*  r;
558  rtems_bdbuf_buffer*  s;
559  rtems_bdbuf_buffer*  p1;
560  rtems_bdbuf_buffer*  p2;
561  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
562  rtems_bdbuf_buffer** buf_prev = buf_stack;
[e51bd96]563
[4f971343]564  bool modified = false;
[e51bd96]565
[3899a537]566  memset (buf_stack, 0, sizeof(buf_stack));
[e51bd96]567
[3899a537]568  while (p != NULL)
569  {
570    *buf_prev++ = p;
[e51bd96]571
[796967c]572    if (((uintptr_t) p->dd < (uintptr_t) dd)
573        || ((p->dd == dd) && (p->block < block)))
[3899a537]574    {
575      p->avl.cache = 1;
576      p = p->avl.right;
577    }
[796967c]578    else if ((p->dd != dd) || (p->block != block))
[3899a537]579    {
580      p->avl.cache = -1;
581      p = p->avl.left;
[df6348bb]582    }
[3899a537]583    else
584    {
585      /* node found */
586      break;
587    }
588  }
589
590  if (p == NULL)
591  {
592    /* there is no such node */
593    return -1;
594  }
[048dcd2b]595
[3899a537]596  q = p;
597
598  buf_prev--;
599  if (buf_prev > buf_stack)
600  {
601    p = *(buf_prev - 1);
602  }
603  else
604  {
605    p = NULL;
606  }
607
608  /* at this moment q - is a node to delete, p is q's parent */
609  if (q->avl.right == NULL)
610  {
611    r = q->avl.left;
612    if (r != NULL)
[df6348bb]613    {
[3899a537]614      r->avl.bal = 0;
[e51bd96]615    }
[3899a537]616    q = r;
617  }
618  else
619  {
620    rtems_bdbuf_buffer **t;
[e51bd96]621
[3899a537]622    r = q->avl.right;
[df6348bb]623
[3899a537]624    if (r->avl.left == NULL)
[e51bd96]625    {
[3899a537]626      r->avl.left = q->avl.left;
627      r->avl.bal = q->avl.bal;
628      r->avl.cache = 1;
629      *buf_prev++ = q = r;
[e51bd96]630    }
631    else
632    {
[3899a537]633      t = buf_prev++;
634      s = r;
635
636      while (s->avl.left != NULL)
637      {
638        *buf_prev++ = r = s;
639        s = r->avl.left;
640        r->avl.cache = -1;
641      }
642
643      s->avl.left = q->avl.left;
644      r->avl.left = s->avl.right;
645      s->avl.right = q->avl.right;
646      s->avl.bal = q->avl.bal;
647      s->avl.cache = 1;
648
649      *t = q = s;
[e51bd96]650    }
[3899a537]651  }
[df6348bb]652
[3899a537]653  if (p != NULL)
654  {
655    if (p->avl.cache == -1)
[e51bd96]656    {
[3899a537]657      p->avl.left = q;
[e51bd96]658    }
659    else
660    {
[3899a537]661      p->avl.right = q;
[e51bd96]662    }
[3899a537]663  }
664  else
665  {
666    *root = q;
667  }
[e51bd96]668
[4f971343]669  modified = true;
[3899a537]670
671  while (modified)
672  {
673    if (buf_prev > buf_stack)
[e51bd96]674    {
[3899a537]675      p = *--buf_prev;
[df6348bb]676    }
677    else
678    {
[3899a537]679      break;
[df6348bb]680    }
[e51bd96]681
[3899a537]682    if (p->avl.cache == -1)
[df6348bb]683    {
[3899a537]684      /* rebalance left branch */
685      switch (p->avl.bal)
686      {
687        case -1:
688          p->avl.bal = 0;
689          break;
690        case  0:
691          p->avl.bal = 1;
[4f971343]692          modified = false;
[3899a537]693          break;
694
695        case +1:
696          p1 = p->avl.right;
697
698          if (p1->avl.bal >= 0) /* simple RR-turn */
699          {
700            p->avl.right = p1->avl.left;
701            p1->avl.left = p;
702
703            if (p1->avl.bal == 0)
[e51bd96]704            {
[3899a537]705              p1->avl.bal = -1;
[4f971343]706              modified = false;
[df6348bb]707            }
[3899a537]708            else
[e51bd96]709            {
[3899a537]710              p->avl.bal = 0;
711              p1->avl.bal = 0;
[df6348bb]712            }
[3899a537]713            p = p1;
714          }
715          else /* double RL-turn */
716          {
717            p2 = p1->avl.left;
718
719            p1->avl.left = p2->avl.right;
720            p2->avl.right = p1;
721            p->avl.right = p2->avl.left;
722            p2->avl.left = p;
723
724            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
725            if (p2->avl.bal == -1) p1->avl.bal = 1; else p1->avl.bal = 0;
726
727            p = p2;
728            p2->avl.bal = 0;
729          }
730          break;
731
732        default:
733          break;
734      }
735    }
736    else
737    {
738      /* rebalance right branch */
739      switch (p->avl.bal)
740      {
741        case +1:
742          p->avl.bal = 0;
743          break;
744
745        case  0:
746          p->avl.bal = -1;
[4f971343]747          modified = false;
[3899a537]748          break;
749
750        case -1:
751          p1 = p->avl.left;
752
753          if (p1->avl.bal <= 0) /* simple LL-turn */
754          {
755            p->avl.left = p1->avl.right;
756            p1->avl.right = p;
757            if (p1->avl.bal == 0)
[df6348bb]758            {
[3899a537]759              p1->avl.bal = 1;
[4f971343]760              modified = false;
[df6348bb]761            }
762            else
763            {
[3899a537]764              p->avl.bal = 0;
765              p1->avl.bal = 0;
[df6348bb]766            }
[3899a537]767            p = p1;
768          }
769          else /* double LR-turn */
770          {
771            p2 = p1->avl.right;
772
773            p1->avl.right = p2->avl.left;
774            p2->avl.left = p1;
775            p->avl.left = p2->avl.right;
776            p2->avl.right = p;
777
778            if (p2->avl.bal == -1) p->avl.bal = 1; else p->avl.bal = 0;
779            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
780
781            p = p2;
782            p2->avl.bal = 0;
783          }
784          break;
785
786        default:
787          break;
788      }
789    }
[df6348bb]790
[3899a537]791    if (buf_prev > buf_stack)
792    {
793      q = *(buf_prev - 1);
794
795      if (q->avl.cache == -1)
796      {
797        q->avl.left = p;
798      }
799      else
800      {
801        q->avl.right = p;
802      }
803    }
804    else
805    {
806      *root = p;
807      break;
[df6348bb]808    }
[048dcd2b]809
[3899a537]810  }
811
812  return 0;
813}
814
[6d612944]815static void
816rtems_bdbuf_set_state (rtems_bdbuf_buffer *bd, rtems_bdbuf_buf_state state)
817{
818  bd->state = state;
819}
820
[eb649786]821static rtems_blkdev_bnum
[b467782b]822rtems_bdbuf_media_block (const rtems_disk_device *dd, rtems_blkdev_bnum block)
[eb649786]823{
[b467782b]824  if (dd->block_to_media_block_shift >= 0)
825    return block << dd->block_to_media_block_shift;
826  else
827    /*
828     * Change the block number for the block size to the block number for the media
829     * block size. We have to use 64bit maths. There is no short cut here.
830     */
831    return (rtems_blkdev_bnum)
832      ((((uint64_t) block) * dd->block_size) / dd->media_block_size);
[eb649786]833}
834
[3899a537]835/**
[0d15414e]836 * Lock the mutex. A single task can nest calls.
[3899a537]837 *
[0d15414e]838 * @param lock The mutex to lock.
839 * @param fatal_error_code The error code if the call fails.
[3899a537]840 */
[0d15414e]841static void
842rtems_bdbuf_lock (rtems_id lock, uint32_t fatal_error_code)
[3899a537]843{
[0d15414e]844  rtems_status_code sc = rtems_semaphore_obtain (lock,
845                                                 RTEMS_WAIT,
846                                                 RTEMS_NO_TIMEOUT);
847  if (sc != RTEMS_SUCCESSFUL)
848    rtems_fatal_error_occurred (fatal_error_code);
[3899a537]849}
850
851/**
[0d15414e]852 * Unlock the mutex.
[3899a537]853 *
[0d15414e]854 * @param lock The mutex to unlock.
855 * @param fatal_error_code The error code if the call fails.
[3899a537]856 */
857static void
[0d15414e]858rtems_bdbuf_unlock (rtems_id lock, uint32_t fatal_error_code)
[3899a537]859{
[0d15414e]860  rtems_status_code sc = rtems_semaphore_release (lock);
[3899a537]861  if (sc != RTEMS_SUCCESSFUL)
[0d15414e]862    rtems_fatal_error_occurred (fatal_error_code);
[3899a537]863}
864
865/**
[0d15414e]866 * Lock the cache. A single task can nest calls.
[3899a537]867 */
868static void
[0d15414e]869rtems_bdbuf_lock_cache (void)
[3899a537]870{
[0d15414e]871  rtems_bdbuf_lock (bdbuf_cache.lock, RTEMS_BLKDEV_FATAL_BDBUF_CACHE_LOCK);
[3899a537]872}
873
874/**
[0d15414e]875 * Unlock the cache.
[3899a537]876 */
877static void
[0d15414e]878rtems_bdbuf_unlock_cache (void)
[3899a537]879{
[0d15414e]880  rtems_bdbuf_unlock (bdbuf_cache.lock, RTEMS_BLKDEV_FATAL_BDBUF_CACHE_UNLOCK);
[3899a537]881}
882
883/**
[0d15414e]884 * Lock the cache's sync. A single task can nest calls.
[3899a537]885 */
886static void
[0d15414e]887rtems_bdbuf_lock_sync (void)
[3899a537]888{
[0d15414e]889  rtems_bdbuf_lock (bdbuf_cache.sync_lock, RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK);
890}
891
892/**
893 * Unlock the cache's sync lock. Any blocked writers are woken.
894 */
895static void
896rtems_bdbuf_unlock_sync (void)
897{
898  rtems_bdbuf_unlock (bdbuf_cache.sync_lock,
899                      RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK);
[3899a537]900}
901
[6d612944]902static void
903rtems_bdbuf_group_obtain (rtems_bdbuf_buffer *bd)
904{
905  ++bd->group->users;
906}
907
908static void
909rtems_bdbuf_group_release (rtems_bdbuf_buffer *bd)
910{
911  --bd->group->users;
912}
913
[b5b07cad]914static rtems_mode
915rtems_bdbuf_disable_preemption (void)
916{
917  rtems_status_code sc = RTEMS_SUCCESSFUL;
918  rtems_mode prev_mode = 0;
919
920  sc = rtems_task_mode (RTEMS_NO_PREEMPT, RTEMS_PREEMPT_MASK, &prev_mode);
921  if (sc != RTEMS_SUCCESSFUL)
922    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_DIS);
923
924  return prev_mode;
925}
926
927static void
928rtems_bdbuf_restore_preemption (rtems_mode prev_mode)
929{
930  rtems_status_code sc = RTEMS_SUCCESSFUL;
931
932  sc = rtems_task_mode (prev_mode, RTEMS_ALL_MODE_MASKS, &prev_mode);
933  if (sc != RTEMS_SUCCESSFUL)
934    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_PREEMPT_RST);
935}
936
[3899a537]937/**
[c21c850e]938 * Wait until woken. Semaphores are used so a number of tasks can wait and can
939 * be woken at once. Task events would require we maintain a list of tasks to
[6d612944]940 * be woken and this would require storage and we do not know the number of
[c21c850e]941 * tasks that could be waiting.
[3899a537]942 *
[0d15414e]943 * While we have the cache locked we can try and claim the semaphore and
944 * therefore know when we release the lock to the cache we will block until the
[3899a537]945 * semaphore is released. This may even happen before we get to block.
946 *
947 * A counter is used to save the release call when no one is waiting.
948 *
[0d15414e]949 * The function assumes the cache is locked on entry and it will be locked on
[c21c850e]950 * exit.
[3899a537]951 */
952static void
[6d612944]953rtems_bdbuf_anonymous_wait (rtems_bdbuf_waiters *waiters)
[3899a537]954{
955  rtems_status_code sc;
956  rtems_mode        prev_mode;
[18daff9]957
[3899a537]958  /*
959   * Indicate we are waiting.
960   */
[b5b07cad]961  ++waiters->count;
[3899a537]962
963  /*
[0d15414e]964   * Disable preemption then unlock the cache and block.  There is no POSIX
965   * condition variable in the core API so this is a work around.
[3899a537]966   *
[0d15414e]967   * The issue is a task could preempt after the cache is unlocked because it is
968   * blocking or just hits that window, and before this task has blocked on the
969   * semaphore. If the preempting task flushes the queue this task will not see
970   * the flush and may block for ever or until another transaction flushes this
[3899a537]971   * semaphore.
972   */
[b5b07cad]973  prev_mode = rtems_bdbuf_disable_preemption ();
[18daff9]974
[3899a537]975  /*
[0d15414e]976   * Unlock the cache, wait, and lock the cache when we return.
[3899a537]977   */
[0d15414e]978  rtems_bdbuf_unlock_cache ();
[3899a537]979
[b5b07cad]980  sc = rtems_semaphore_obtain (waiters->sema, RTEMS_WAIT, RTEMS_BDBUF_WAIT_TIMEOUT);
[eb649786]981
982  if (sc == RTEMS_TIMEOUT)
983    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_TO);
[18daff9]984
[3899a537]985  if (sc != RTEMS_UNSATISFIED)
[0d15414e]986    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAIT_2);
[18daff9]987
[0d15414e]988  rtems_bdbuf_lock_cache ();
[3899a537]989
[b5b07cad]990  rtems_bdbuf_restore_preemption (prev_mode);
[18daff9]991
[b5b07cad]992  --waiters->count;
[3899a537]993}
994
[6d612944]995static void
996rtems_bdbuf_wait (rtems_bdbuf_buffer *bd, rtems_bdbuf_waiters *waiters)
997{
998  rtems_bdbuf_group_obtain (bd);
999  ++bd->waiters;
1000  rtems_bdbuf_anonymous_wait (waiters);
1001  --bd->waiters;
1002  rtems_bdbuf_group_release (bd);
1003}
1004
[3899a537]1005/**
1006 * Wake a blocked resource. The resource has a counter that lets us know if
1007 * there are any waiters.
1008 */
1009static void
[b5b07cad]1010rtems_bdbuf_wake (const rtems_bdbuf_waiters *waiters)
[3899a537]1011{
[b5b07cad]1012  rtems_status_code sc = RTEMS_SUCCESSFUL;
[3899a537]1013
[b5b07cad]1014  if (waiters->count > 0)
1015  {
1016    sc = rtems_semaphore_flush (waiters->sema);
[3899a537]1017    if (sc != RTEMS_SUCCESSFUL)
[0d15414e]1018      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_WAKE);
[3899a537]1019  }
1020}
1021
1022static void
[6d612944]1023rtems_bdbuf_wake_swapper (void)
[3899a537]1024{
[6d612944]1025  rtems_status_code sc = rtems_event_send (bdbuf_cache.swapout,
1026                                           RTEMS_BDBUF_SWAPOUT_SYNC);
1027  if (sc != RTEMS_SUCCESSFUL)
1028    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE);
1029}
1030
1031static bool
1032rtems_bdbuf_has_buffer_waiters (void)
1033{
1034  return bdbuf_cache.buffer_waiters.count;
1035}
1036
[e7fb54e]1037static void
1038rtems_bdbuf_remove_from_tree (rtems_bdbuf_buffer *bd)
1039{
1040  if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
1041    rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_TREE_RM);
1042}
1043
1044static void
1045rtems_bdbuf_remove_from_tree_and_lru_list (rtems_bdbuf_buffer *bd)
1046{
1047  switch (bd->state)
1048  {
1049    case RTEMS_BDBUF_STATE_FREE:
1050      break;
1051    case RTEMS_BDBUF_STATE_CACHED:
1052      rtems_bdbuf_remove_from_tree (bd);
1053      break;
1054    default:
1055      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_10);
1056  }
1057
[89a84c0]1058  rtems_chain_extract_unprotected (&bd->link);
[e7fb54e]1059}
1060
1061static void
1062rtems_bdbuf_make_free_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
1063{
1064  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_FREE);
[89a84c0]1065  rtems_chain_prepend_unprotected (&bdbuf_cache.lru, &bd->link);
[e7fb54e]1066}
1067
1068static void
1069rtems_bdbuf_make_empty (rtems_bdbuf_buffer *bd)
1070{
1071  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_EMPTY);
1072}
1073
1074static void
1075rtems_bdbuf_make_cached_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
1076{
1077  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_CACHED);
[89a84c0]1078  rtems_chain_append_unprotected (&bdbuf_cache.lru, &bd->link);
[e7fb54e]1079}
1080
1081static void
1082rtems_bdbuf_discard_buffer (rtems_bdbuf_buffer *bd)
1083{
1084  rtems_bdbuf_make_empty (bd);
1085
1086  if (bd->waiters == 0)
1087  {
1088    rtems_bdbuf_remove_from_tree (bd);
1089    rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1090  }
1091}
1092
[6d612944]1093static void
1094rtems_bdbuf_add_to_modified_list_after_access (rtems_bdbuf_buffer *bd)
1095{
[796967c]1096  if (bdbuf_cache.sync_active && bdbuf_cache.sync_device == bd->dd)
[3899a537]1097  {
[0d15414e]1098    rtems_bdbuf_unlock_cache ();
[6d612944]1099
1100    /*
1101     * Wait for the sync lock.
1102     */
[0d15414e]1103    rtems_bdbuf_lock_sync ();
[6d612944]1104
[0d15414e]1105    rtems_bdbuf_unlock_sync ();
1106    rtems_bdbuf_lock_cache ();
[3899a537]1107  }
[6d612944]1108
1109  /*
1110   * Only the first modified release sets the timer and any further user
1111   * accesses do not change the timer value which should move down. This
1112   * assumes the user's hold of the buffer is much less than the time on the
1113   * modified list. Resetting the timer on each access which could result in a
1114   * buffer never getting to 0 and never being forced onto disk. This raises a
1115   * difficult question. Is a snapshot of a block that is changing better than
[5c587596]1116   * nothing being written? We have tended to think we should hold changes for
[6d612944]1117   * only a specific period of time even if still changing and get onto disk
1118   * and letting the file system try and recover this position if it can.
1119   */
[5c587596]1120  if (bd->state == RTEMS_BDBUF_STATE_ACCESS_CACHED
1121        || bd->state == RTEMS_BDBUF_STATE_ACCESS_EMPTY)
[6d612944]1122    bd->hold_timer = bdbuf_config.swap_block_hold;
[18daff9]1123
[6d612944]1124  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_MODIFIED);
[89a84c0]1125  rtems_chain_append_unprotected (&bdbuf_cache.modified, &bd->link);
[6d612944]1126
1127  if (bd->waiters)
1128    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1129  else if (rtems_bdbuf_has_buffer_waiters ())
1130    rtems_bdbuf_wake_swapper ();
[3899a537]1131}
1132
1133static void
[6d612944]1134rtems_bdbuf_add_to_lru_list_after_access (rtems_bdbuf_buffer *bd)
[3899a537]1135{
[6d612944]1136  rtems_bdbuf_group_release (bd);
[e7fb54e]1137  rtems_bdbuf_make_cached_and_add_to_lru_list (bd);
[6d612944]1138
1139  if (bd->waiters)
1140    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1141  else
1142    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1143}
1144
[c21c850e]1145/**
[0d15414e]1146 * Compute the number of BDs per group for a given buffer size.
[e51bd96]1147 *
[0d15414e]1148 * @param size The buffer size. It can be any size and we scale up.
1149 */
1150static size_t
1151rtems_bdbuf_bds_per_group (size_t size)
1152{
1153  size_t bufs_per_size;
1154  size_t bds_per_size;
[18daff9]1155
[6d612944]1156  if (size > bdbuf_config.buffer_max)
[0d15414e]1157    return 0;
[18daff9]1158
[0d15414e]1159  bufs_per_size = ((size - 1) / bdbuf_config.buffer_min) + 1;
[18daff9]1160
[0d15414e]1161  for (bds_per_size = 1;
1162       bds_per_size < bufs_per_size;
1163       bds_per_size <<= 1)
1164    ;
1165
1166  return bdbuf_cache.max_bds_per_group / bds_per_size;
1167}
1168
[6d612944]1169static void
[e7fb54e]1170rtems_bdbuf_discard_buffer_after_access (rtems_bdbuf_buffer *bd)
[5c587596]1171{
1172  rtems_bdbuf_group_release (bd);
[e7fb54e]1173  rtems_bdbuf_discard_buffer (bd);
[5c587596]1174
1175  if (bd->waiters)
1176    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1177  else
1178    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
[6d612944]1179}
1180
[0d15414e]1181/**
1182 * Reallocate a group. The BDs currently allocated in the group are removed
1183 * from the ALV tree and any lists then the new BD's are prepended to the ready
1184 * list of the cache.
[e51bd96]1185 *
[0d15414e]1186 * @param group The group to reallocate.
1187 * @param new_bds_per_group The new count of BDs per group.
[6d612944]1188 * @return A buffer of this group.
[e51bd96]1189 */
[6d612944]1190static rtems_bdbuf_buffer *
[0d15414e]1191rtems_bdbuf_group_realloc (rtems_bdbuf_group* group, size_t new_bds_per_group)
[e51bd96]1192{
[3899a537]1193  rtems_bdbuf_buffer* bd;
[253c3a1d]1194  size_t              b;
[0d15414e]1195  size_t              bufs_per_bd;
[57aa979]1196
[eb649786]1197  if (rtems_bdbuf_tracer)
[253c3a1d]1198    printf ("bdbuf:realloc: %tu: %zd -> %zd\n",
[eb649786]1199            group - bdbuf_cache.groups, group->bds_per_group,
1200            new_bds_per_group);
[18daff9]1201
[0d15414e]1202  bufs_per_bd = bdbuf_cache.max_bds_per_group / group->bds_per_group;
[18daff9]1203
[0d15414e]1204  for (b = 0, bd = group->bdbuf;
1205       b < group->bds_per_group;
1206       b++, bd += bufs_per_bd)
[5c587596]1207    rtems_bdbuf_remove_from_tree_and_lru_list (bd);
[18daff9]1208
[0d15414e]1209  group->bds_per_group = new_bds_per_group;
1210  bufs_per_bd = bdbuf_cache.max_bds_per_group / new_bds_per_group;
[18daff9]1211
[6d612944]1212  for (b = 1, bd = group->bdbuf + bufs_per_bd;
[0d15414e]1213       b < group->bds_per_group;
1214       b++, bd += bufs_per_bd)
[5c587596]1215    rtems_bdbuf_make_free_and_add_to_lru_list (bd);
[6d612944]1216
1217  if (b > 1)
1218    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1219
1220  return group->bdbuf;
[0d15414e]1221}
1222
[6d612944]1223static void
[e7fb54e]1224rtems_bdbuf_setup_empty_buffer (rtems_bdbuf_buffer *bd,
[7d4a859]1225                                rtems_disk_device  *dd,
[e7fb54e]1226                                rtems_blkdev_bnum   block)
[0d15414e]1227{
[796967c]1228  bd->dd        = dd ;
[6d612944]1229  bd->block     = block;
1230  bd->avl.left  = NULL;
1231  bd->avl.right = NULL;
1232  bd->waiters   = 0;
1233
1234  if (rtems_bdbuf_avl_insert (&bdbuf_cache.tree, bd) != 0)
1235    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_RECYCLE);
[5c587596]1236
[e7fb54e]1237  rtems_bdbuf_make_empty (bd);
[6d612944]1238}
1239
1240static rtems_bdbuf_buffer *
[7d4a859]1241rtems_bdbuf_get_buffer_from_lru_list (rtems_disk_device *dd,
1242                                      rtems_blkdev_bnum  block)
[6d612944]1243{
1244  rtems_chain_node *node = rtems_chain_first (&bdbuf_cache.lru);
1245
1246  while (!rtems_chain_is_tail (&bdbuf_cache.lru, node))
[0d15414e]1247  {
[6d612944]1248    rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
[e7fb54e]1249    rtems_bdbuf_buffer *empty_bd = NULL;
[0d15414e]1250
[eb649786]1251    if (rtems_bdbuf_tracer)
[253c3a1d]1252      printf ("bdbuf:next-bd: %tu (%td:%" PRId32 ") %zd -> %zd\n",
[eb649786]1253              bd - bdbuf_cache.bds,
1254              bd->group - bdbuf_cache.groups, bd->group->users,
[2c6cc3c]1255              bd->group->bds_per_group, dd->bds_per_group);
[eb649786]1256
[0d15414e]1257    /*
[6c9b390]1258     * If nobody waits for this BD, we may recycle it.
[0d15414e]1259     */
[6c9b390]1260    if (bd->waiters == 0)
[0d15414e]1261    {
[2c6cc3c]1262      if (bd->group->bds_per_group == dd->bds_per_group)
[6c9b390]1263      {
[5c587596]1264        rtems_bdbuf_remove_from_tree_and_lru_list (bd);
[6c9b390]1265
[e7fb54e]1266        empty_bd = bd;
[6c9b390]1267      }
[6d612944]1268      else if (bd->group->users == 0)
[2c6cc3c]1269        empty_bd = rtems_bdbuf_group_realloc (bd->group, dd->bds_per_group);
[6d612944]1270    }
1271
[e7fb54e]1272    if (empty_bd != NULL)
[6d612944]1273    {
[796967c]1274      rtems_bdbuf_setup_empty_buffer (empty_bd, dd, block);
[6d612944]1275
[e7fb54e]1276      return empty_bd;
[0d15414e]1277    }
1278
1279    node = rtems_chain_next (node);
1280  }
[6d612944]1281
[0d15414e]1282  return NULL;
1283}
1284
[b6911069]1285static rtems_status_code
1286rtems_bdbuf_create_task(
1287  rtems_name name,
1288  rtems_task_priority priority,
1289  rtems_task_priority default_priority,
1290  rtems_task_entry entry,
1291  rtems_task_argument arg,
1292  rtems_id *id
1293)
1294{
1295  rtems_status_code sc;
1296  size_t stack_size = bdbuf_config.task_stack_size ?
1297    bdbuf_config.task_stack_size : RTEMS_BDBUF_TASK_STACK_SIZE_DEFAULT;
1298
1299  priority = priority != 0 ? priority : default_priority;
1300
1301  sc = rtems_task_create (name,
1302                          priority,
1303                          stack_size,
1304                          RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
1305                          RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT,
1306                          id);
1307
1308  if (sc == RTEMS_SUCCESSFUL)
1309    sc = rtems_task_start (*id, entry, arg);
1310
1311  return sc;
1312}
1313
[0d15414e]1314/**
1315 * Initialise the cache.
1316 *
1317 * @return rtems_status_code The initialisation status.
1318 */
1319rtems_status_code
1320rtems_bdbuf_init (void)
1321{
1322  rtems_bdbuf_group*  group;
1323  rtems_bdbuf_buffer* bd;
1324  uint8_t*            buffer;
[e9d50ab]1325  size_t              b;
[b5b07cad]1326  size_t              cache_aligment;
[0d15414e]1327  rtems_status_code   sc;
[b5b07cad]1328  rtems_mode          prev_mode;
[0d15414e]1329
[eb649786]1330  if (rtems_bdbuf_tracer)
1331    printf ("bdbuf:init\n");
[0d15414e]1332
[b5b07cad]1333  if (rtems_interrupt_is_in_progress())
1334    return RTEMS_CALLED_FROM_ISR;
1335
[3899a537]1336  /*
[0d15414e]1337   * Check the configuration table values.
[3899a537]1338   */
[0d15414e]1339  if ((bdbuf_config.buffer_max % bdbuf_config.buffer_min) != 0)
1340    return RTEMS_INVALID_NUMBER;
[18daff9]1341
[0d15414e]1342  /*
1343   * We use a special variable to manage the initialisation incase we have
1344   * completing threads doing this. You may get errors if the another thread
1345   * makes a call and we have not finished initialisation.
1346   */
[b5b07cad]1347  prev_mode = rtems_bdbuf_disable_preemption ();
[0d15414e]1348  if (bdbuf_cache.initialised)
[b5b07cad]1349  {
1350    rtems_bdbuf_restore_preemption (prev_mode);
1351    return RTEMS_RESOURCE_IN_USE;
1352  }
[8aa608df]1353
[b5b07cad]1354  memset(&bdbuf_cache, 0, sizeof(bdbuf_cache));
[0d15414e]1355  bdbuf_cache.initialised = true;
[b5b07cad]1356  rtems_bdbuf_restore_preemption (prev_mode);
[18daff9]1357
[3899a537]1358  /*
[0d15414e]1359   * For unspecified cache alignments we use the CPU alignment.
[3899a537]1360   */
[0d15414e]1361  cache_aligment = 32; /* FIXME rtems_cache_get_data_line_size() */
1362  if (cache_aligment <= 0)
1363    cache_aligment = CPU_ALIGNMENT;
[048dcd2b]1364
[b5b07cad]1365  bdbuf_cache.sync_device = BDBUF_INVALID_DEV;
[0d15414e]1366
1367  rtems_chain_initialize_empty (&bdbuf_cache.swapout_workers);
1368  rtems_chain_initialize_empty (&bdbuf_cache.lru);
1369  rtems_chain_initialize_empty (&bdbuf_cache.modified);
1370  rtems_chain_initialize_empty (&bdbuf_cache.sync);
[39ee704e]1371  rtems_chain_initialize_empty (&bdbuf_cache.read_ahead_chain);
[0d15414e]1372
1373  /*
1374   * Create the locks for the cache.
1375   */
1376  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'l'),
1377                               1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0,
1378                               &bdbuf_cache.lock);
[3899a537]1379  if (sc != RTEMS_SUCCESSFUL)
[b5b07cad]1380    goto error;
[048dcd2b]1381
[0d15414e]1382  rtems_bdbuf_lock_cache ();
[18daff9]1383
[0d15414e]1384  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 's'),
1385                               1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0,
1386                               &bdbuf_cache.sync_lock);
[3899a537]1387  if (sc != RTEMS_SUCCESSFUL)
[b5b07cad]1388    goto error;
[18daff9]1389
[0d15414e]1390  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'a'),
1391                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
[b5b07cad]1392                               &bdbuf_cache.access_waiters.sema);
[3899a537]1393  if (sc != RTEMS_SUCCESSFUL)
[b5b07cad]1394    goto error;
[048dcd2b]1395
[0d15414e]1396  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 't'),
1397                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
[b5b07cad]1398                               &bdbuf_cache.transfer_waiters.sema);
[3899a537]1399  if (sc != RTEMS_SUCCESSFUL)
[b5b07cad]1400    goto error;
[3899a537]1401
[6d612944]1402  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'b'),
[0d15414e]1403                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
[b5b07cad]1404                               &bdbuf_cache.buffer_waiters.sema);
[3899a537]1405  if (sc != RTEMS_SUCCESSFUL)
[b5b07cad]1406    goto error;
[18daff9]1407
[eb649786]1408  /*
1409   * Compute the various number of elements in the cache.
1410   */
1411  bdbuf_cache.buffer_min_count =
1412    bdbuf_config.size / bdbuf_config.buffer_min;
1413  bdbuf_cache.max_bds_per_group =
1414    bdbuf_config.buffer_max / bdbuf_config.buffer_min;
1415  bdbuf_cache.group_count =
1416    bdbuf_cache.buffer_min_count / bdbuf_cache.max_bds_per_group;
1417
[0d15414e]1418  /*
1419   * Allocate the memory for the buffer descriptors.
1420   */
1421  bdbuf_cache.bds = calloc (sizeof (rtems_bdbuf_buffer),
[eb649786]1422                            bdbuf_cache.buffer_min_count);
[0d15414e]1423  if (!bdbuf_cache.bds)
[b5b07cad]1424    goto error;
[e51bd96]1425
[3899a537]1426  /*
[0d15414e]1427   * Allocate the memory for the buffer descriptors.
[3899a537]1428   */
[0d15414e]1429  bdbuf_cache.groups = calloc (sizeof (rtems_bdbuf_group),
1430                               bdbuf_cache.group_count);
1431  if (!bdbuf_cache.groups)
[b5b07cad]1432    goto error;
[18daff9]1433
[0d15414e]1434  /*
1435   * Allocate memory for buffer memory. The buffer memory will be cache
1436   * aligned. It is possible to free the memory allocated by rtems_memalign()
1437   * with free(). Return 0 if allocated.
[eb649786]1438   *
[18daff9]1439   * The memory allocate allows a
[0d15414e]1440   */
1441  if (rtems_memalign ((void **) &bdbuf_cache.buffers,
1442                      cache_aligment,
1443                      bdbuf_cache.buffer_min_count * bdbuf_config.buffer_min) != 0)
[b5b07cad]1444    goto error;
[3899a537]1445
1446  /*
[0d15414e]1447   * The cache is empty after opening so we need to add all the buffers to it
1448   * and initialise the groups.
[3899a537]1449   */
[0d15414e]1450  for (b = 0, group = bdbuf_cache.groups,
1451         bd = bdbuf_cache.bds, buffer = bdbuf_cache.buffers;
1452       b < bdbuf_cache.buffer_min_count;
1453       b++, bd++, buffer += bdbuf_config.buffer_min)
[3899a537]1454  {
[796967c]1455    bd->dd    = BDBUF_INVALID_DEV;
[6d612944]1456    bd->group  = group;
1457    bd->buffer = buffer;
[18daff9]1458
[89a84c0]1459    rtems_chain_append_unprotected (&bdbuf_cache.lru, &bd->link);
[0d15414e]1460
1461    if ((b % bdbuf_cache.max_bds_per_group) ==
1462        (bdbuf_cache.max_bds_per_group - 1))
1463      group++;
[3899a537]1464  }
[e51bd96]1465
[0d15414e]1466  for (b = 0,
1467         group = bdbuf_cache.groups,
1468         bd = bdbuf_cache.bds;
1469       b < bdbuf_cache.group_count;
1470       b++,
1471         group++,
1472         bd += bdbuf_cache.max_bds_per_group)
1473  {
1474    group->bds_per_group = bdbuf_cache.max_bds_per_group;
1475    group->bdbuf = bd;
1476  }
[18daff9]1477
[3899a537]1478  /*
[0d15414e]1479   * Create and start swapout task. This task will create and manage the worker
1480   * threads.
[3899a537]1481   */
[0d15414e]1482  bdbuf_cache.swapout_enabled = true;
[18daff9]1483
[b6911069]1484  sc = rtems_bdbuf_create_task (rtems_build_name('B', 'S', 'W', 'P'),
1485                                bdbuf_config.swapout_priority,
1486                                RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT,
1487                                rtems_bdbuf_swapout_task,
1488                                0,
1489                                &bdbuf_cache.swapout);
[3899a537]1490  if (sc != RTEMS_SUCCESSFUL)
[b5b07cad]1491    goto error;
1492
[39ee704e]1493  if (bdbuf_config.max_read_ahead_blocks > 0)
1494  {
1495    bdbuf_cache.read_ahead_enabled = true;
1496    sc = rtems_bdbuf_create_task (rtems_build_name('B', 'R', 'D', 'A'),
1497                                  bdbuf_config.read_ahead_priority,
1498                                  RTEMS_BDBUF_READ_AHEAD_TASK_PRIORITY_DEFAULT,
1499                                  rtems_bdbuf_read_ahead_task,
1500                                  0,
1501                                  &bdbuf_cache.read_ahead_task);
1502    if (sc != RTEMS_SUCCESSFUL)
1503      goto error;
1504  }
1505
[b5b07cad]1506  rtems_bdbuf_unlock_cache ();
1507
1508  return RTEMS_SUCCESSFUL;
1509
1510error:
1511
[39ee704e]1512  if (bdbuf_cache.read_ahead_task != 0)
1513    rtems_task_delete (bdbuf_cache.read_ahead_task);
1514
[b5b07cad]1515  if (bdbuf_cache.swapout != 0)
[0d15414e]1516    rtems_task_delete (bdbuf_cache.swapout);
[b5b07cad]1517
1518  free (bdbuf_cache.buffers);
1519  free (bdbuf_cache.groups);
1520  free (bdbuf_cache.bds);
1521
1522  rtems_semaphore_delete (bdbuf_cache.buffer_waiters.sema);
1523  rtems_semaphore_delete (bdbuf_cache.access_waiters.sema);
1524  rtems_semaphore_delete (bdbuf_cache.transfer_waiters.sema);
1525  rtems_semaphore_delete (bdbuf_cache.sync_lock);
1526
1527  if (bdbuf_cache.lock != 0)
1528  {
[0d15414e]1529    rtems_bdbuf_unlock_cache ();
1530    rtems_semaphore_delete (bdbuf_cache.lock);
[3899a537]1531  }
[e51bd96]1532
[b5b07cad]1533  bdbuf_cache.initialised = false;
1534
1535  return RTEMS_UNSATISFIED;
[e51bd96]1536}
1537
[6d612944]1538static void
1539rtems_bdbuf_wait_for_event (rtems_event_set event)
[3d5515b]1540{
[6d612944]1541  rtems_status_code sc = RTEMS_SUCCESSFUL;
1542  rtems_event_set   out = 0;
[18daff9]1543
[6d612944]1544  sc = rtems_event_receive (event,
1545                            RTEMS_EVENT_ALL | RTEMS_WAIT,
1546                            RTEMS_NO_TIMEOUT,
1547                            &out);
1548
1549  if (sc != RTEMS_SUCCESSFUL || out != event)
1550    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_WAIT_EVNT);
[3d5515b]1551}
1552
[6d612944]1553static void
1554rtems_bdbuf_wait_for_access (rtems_bdbuf_buffer *bd)
[3d5515b]1555{
[6d612944]1556  while (true)
1557  {
1558    switch (bd->state)
1559    {
1560      case RTEMS_BDBUF_STATE_MODIFIED:
1561        rtems_bdbuf_group_release (bd);
1562        /* Fall through */
1563      case RTEMS_BDBUF_STATE_CACHED:
[89a84c0]1564        rtems_chain_extract_unprotected (&bd->link);
[1027f153]1565        /* Fall through */
1566      case RTEMS_BDBUF_STATE_EMPTY:
[6d612944]1567        return;
[5c587596]1568      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1569      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
[6d612944]1570      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
[e7fb54e]1571      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
[6d612944]1572        rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
1573        break;
1574      case RTEMS_BDBUF_STATE_SYNC:
[e7fb54e]1575      case RTEMS_BDBUF_STATE_TRANSFER:
1576      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
[6d612944]1577        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1578        break;
1579      default:
1580        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_7);
1581    }
1582  }
1583}
1584
1585static void
1586rtems_bdbuf_request_sync_for_modified_buffer (rtems_bdbuf_buffer *bd)
1587{
1588  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
[89a84c0]1589  rtems_chain_extract_unprotected (&bd->link);
1590  rtems_chain_append_unprotected (&bdbuf_cache.sync, &bd->link);
[6d612944]1591  rtems_bdbuf_wake_swapper ();
[3d5515b]1592}
1593
[3899a537]1594/**
[6d612944]1595 * @brief Waits until the buffer is ready for recycling.
[e51bd96]1596 *
[6d612944]1597 * @retval @c true Buffer is valid and may be recycled.
1598 * @retval @c false Buffer is invalid and has to searched again.
[e51bd96]1599 */
[6d612944]1600static bool
1601rtems_bdbuf_wait_for_recycle (rtems_bdbuf_buffer *bd)
[e51bd96]1602{
[6d612944]1603  while (true)
[3899a537]1604  {
[6d612944]1605    switch (bd->state)
[e51bd96]1606    {
[5c587596]1607      case RTEMS_BDBUF_STATE_FREE:
[6d612944]1608        return true;
1609      case RTEMS_BDBUF_STATE_MODIFIED:
1610        rtems_bdbuf_request_sync_for_modified_buffer (bd);
1611        break;
1612      case RTEMS_BDBUF_STATE_CACHED:
[5c587596]1613      case RTEMS_BDBUF_STATE_EMPTY:
[6d612944]1614        if (bd->waiters == 0)
1615          return true;
[e51bd96]1616        else
1617        {
[3899a537]1618          /*
[6d612944]1619           * It is essential that we wait here without a special wait count and
1620           * without the group in use.  Otherwise we could trigger a wait ping
1621           * pong with another recycle waiter.  The state of the buffer is
1622           * arbitrary afterwards.
[3899a537]1623           */
[6d612944]1624          rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
1625          return false;
[e51bd96]1626        }
[5c587596]1627      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1628      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
[6d612944]1629      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
[e7fb54e]1630      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
[6d612944]1631        rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
1632        break;
1633      case RTEMS_BDBUF_STATE_SYNC:
[e7fb54e]1634      case RTEMS_BDBUF_STATE_TRANSFER:
1635      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
[6d612944]1636        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1637        break;
1638      default:
1639        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_8);
1640    }
1641  }
1642}
[3d5515b]1643
[6d612944]1644static void
1645rtems_bdbuf_wait_for_sync_done (rtems_bdbuf_buffer *bd)
1646{
1647  while (true)
1648  {
1649    switch (bd->state)
[e51bd96]1650    {
[3899a537]1651      case RTEMS_BDBUF_STATE_CACHED:
[5c587596]1652      case RTEMS_BDBUF_STATE_EMPTY:
[3899a537]1653      case RTEMS_BDBUF_STATE_MODIFIED:
[5c587596]1654      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1655      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
[3899a537]1656      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
[e7fb54e]1657      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
[6d612944]1658        return;
[3899a537]1659      case RTEMS_BDBUF_STATE_SYNC:
1660      case RTEMS_BDBUF_STATE_TRANSFER:
[e7fb54e]1661      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
[6d612944]1662        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
[3899a537]1663        break;
1664      default:
[6d612944]1665        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_9);
[e51bd96]1666    }
[3899a537]1667  }
[6d612944]1668}
[048dcd2b]1669
[6d612944]1670static void
1671rtems_bdbuf_wait_for_buffer (void)
1672{
1673  if (!rtems_chain_is_empty (&bdbuf_cache.modified))
1674    rtems_bdbuf_wake_swapper ();
[18daff9]1675
[6d612944]1676  rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
1677}
[e51bd96]1678
[e7fb54e]1679static void
1680rtems_bdbuf_sync_after_access (rtems_bdbuf_buffer *bd)
1681{
1682  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
1683
[89a84c0]1684  rtems_chain_append_unprotected (&bdbuf_cache.sync, &bd->link);
[e7fb54e]1685
1686  if (bd->waiters)
1687    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1688
1689  rtems_bdbuf_wake_swapper ();
1690  rtems_bdbuf_wait_for_sync_done (bd);
1691
1692  /*
1693   * We may have created a cached or empty buffer which may be recycled.
1694   */
1695  if (bd->waiters == 0
1696        && (bd->state == RTEMS_BDBUF_STATE_CACHED
1697          || bd->state == RTEMS_BDBUF_STATE_EMPTY))
1698  {
1699    if (bd->state == RTEMS_BDBUF_STATE_EMPTY)
1700    {
1701      rtems_bdbuf_remove_from_tree (bd);
1702      rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1703    }
1704    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1705  }
1706}
1707
[6d612944]1708static rtems_bdbuf_buffer *
[7d4a859]1709rtems_bdbuf_get_buffer_for_read_ahead (rtems_disk_device *dd,
1710                                       rtems_blkdev_bnum  block)
[6d612944]1711{
1712  rtems_bdbuf_buffer *bd = NULL;
[18daff9]1713
[796967c]1714  bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dd, block);
[6d612944]1715
1716  if (bd == NULL)
1717  {
[2c6cc3c]1718    bd = rtems_bdbuf_get_buffer_from_lru_list (dd, block);
[6d612944]1719
1720    if (bd != NULL)
1721      rtems_bdbuf_group_obtain (bd);
1722  }
1723  else
1724    /*
1725     * The buffer is in the cache.  So it is already available or in use, and
1726     * thus no need for a read ahead.
1727     */
1728    bd = NULL;
[3d5515b]1729
[3899a537]1730  return bd;
[e51bd96]1731}
1732
[6d612944]1733static rtems_bdbuf_buffer *
[7d4a859]1734rtems_bdbuf_get_buffer_for_access (rtems_disk_device *dd,
1735                                   rtems_blkdev_bnum  block)
[e51bd96]1736{
[6d612944]1737  rtems_bdbuf_buffer *bd = NULL;
[18daff9]1738
[6d612944]1739  do
1740  {
[796967c]1741    bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dd, block);
[6d612944]1742
1743    if (bd != NULL)
1744    {
[2c6cc3c]1745      if (bd->group->bds_per_group != dd->bds_per_group)
[6d612944]1746      {
1747        if (rtems_bdbuf_wait_for_recycle (bd))
1748        {
[5c587596]1749          rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1750          rtems_bdbuf_make_free_and_add_to_lru_list (bd);
[6d612944]1751          rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1752        }
1753        bd = NULL;
1754      }
1755    }
1756    else
1757    {
[2c6cc3c]1758      bd = rtems_bdbuf_get_buffer_from_lru_list (dd, block);
[6d612944]1759
1760      if (bd == NULL)
1761        rtems_bdbuf_wait_for_buffer ();
1762    }
1763  }
1764  while (bd == NULL);
1765
1766  rtems_bdbuf_wait_for_access (bd);
1767  rtems_bdbuf_group_obtain (bd);
1768
1769  return bd;
1770}
1771
1772static rtems_status_code
[b467782b]1773rtems_bdbuf_get_media_block (const rtems_disk_device *dd,
1774                             rtems_blkdev_bnum        block,
1775                             rtems_blkdev_bnum       *media_block_ptr)
[6d612944]1776{
[73c09b3b]1777  rtems_status_code sc = RTEMS_SUCCESSFUL;
1778
1779  if (block < dd->block_count)
1780  {
1781    /*
1782     * Compute the media block number. Drivers work with media block number not
1783     * the block number a BD may have as this depends on the block size set by
1784     * the user.
1785     */
1786    *media_block_ptr = rtems_bdbuf_media_block (dd, block) + dd->start;
1787  }
1788  else
[3899a537]1789  {
[73c09b3b]1790    sc = RTEMS_INVALID_ID;
[3899a537]1791  }
1792
[73c09b3b]1793  return sc;
[6d612944]1794}
1795
1796rtems_status_code
[40284de]1797rtems_bdbuf_get (rtems_disk_device   *dd,
[6d612944]1798                 rtems_blkdev_bnum    block,
1799                 rtems_bdbuf_buffer **bd_ptr)
1800{
1801  rtems_status_code   sc = RTEMS_SUCCESSFUL;
1802  rtems_bdbuf_buffer *bd = NULL;
[f164ae75]1803  rtems_blkdev_bnum   media_block;
[eb649786]1804
[0d15414e]1805  rtems_bdbuf_lock_cache ();
[3899a537]1806
[f164ae75]1807  sc = rtems_bdbuf_get_media_block (dd, block, &media_block);
1808  if (sc == RTEMS_SUCCESSFUL)
1809  {
1810    /*
1811     * Print the block index relative to the physical disk.
1812     */
1813    if (rtems_bdbuf_tracer)
1814      printf ("bdbuf:get: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
1815              media_block, block, (unsigned) dd->dev);
[3899a537]1816
[f164ae75]1817    bd = rtems_bdbuf_get_buffer_for_access (dd, media_block);
[3899a537]1818
[f164ae75]1819    switch (bd->state)
1820    {
1821      case RTEMS_BDBUF_STATE_CACHED:
1822        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
1823        break;
1824      case RTEMS_BDBUF_STATE_EMPTY:
1825        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_EMPTY);
1826        break;
1827      case RTEMS_BDBUF_STATE_MODIFIED:
1828        /*
1829         * To get a modified buffer could be considered a bug in the caller
1830         * because you should not be getting an already modified buffer but
1831         * user may have modified a byte in a block then decided to seek the
1832         * start and write the whole block and the file system will have no
1833         * record of this so just gets the block to fill.
1834         */
1835        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
1836        break;
1837      default:
1838        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_2);
1839        break;
1840    }
[18daff9]1841
[f164ae75]1842    if (rtems_bdbuf_tracer)
1843    {
1844      rtems_bdbuf_show_users ("get", bd);
1845      rtems_bdbuf_show_usage ();
1846    }
[eb649786]1847  }
1848
[0d15414e]1849  rtems_bdbuf_unlock_cache ();
[3899a537]1850
[6d612944]1851  *bd_ptr = bd;
[0d15414e]1852
[f164ae75]1853  return sc;
[e51bd96]1854}
1855
[c21c850e]1856/**
1857 * Call back handler called by the low level driver when the transfer has
1858 * completed. This function may be invoked from interrupt handler.
[e51bd96]1859 *
[c21c850e]1860 * @param arg Arbitrary argument specified in block device request
1861 *            structure (in this case - pointer to the appropriate
1862 *            block device request structure).
1863 * @param status I/O completion status
[e51bd96]1864 */
1865static void
[5c587596]1866rtems_bdbuf_transfer_done (void* arg, rtems_status_code status)
[e51bd96]1867{
[3899a537]1868  rtems_blkdev_request* req = (rtems_blkdev_request*) arg;
1869
1870  req->status = status;
1871
1872  rtems_event_send (req->io_task, RTEMS_BDBUF_TRANSFER_SYNC);
[e51bd96]1873}
1874
[5c587596]1875static rtems_status_code
[7d4a859]1876rtems_bdbuf_execute_transfer_request (rtems_disk_device    *dd,
1877                                      rtems_blkdev_request *req,
1878                                      bool                  cache_locked)
[6d612944]1879{
[5c587596]1880  rtems_status_code sc = RTEMS_SUCCESSFUL;
1881  int result = 0;
1882  uint32_t transfer_index = 0;
[e7fb54e]1883  bool wake_transfer_waiters = false;
1884  bool wake_buffer_waiters = false;
[18daff9]1885
[5c587596]1886  if (cache_locked)
[0d15414e]1887    rtems_bdbuf_unlock_cache ();
[3899a537]1888
[5c587596]1889  result = dd->ioctl (dd->phys_dev, RTEMS_BLKIO_REQUEST, req);
[18daff9]1890
[5c587596]1891  if (result == 0)
1892  {
1893    rtems_bdbuf_wait_for_event (RTEMS_BDBUF_TRANSFER_SYNC);
1894    sc = req->status;
1895  }
1896  else
1897    sc = RTEMS_IO_ERROR;
[3899a537]1898
[5c587596]1899  rtems_bdbuf_lock_cache ();
1900
[9f527308]1901  /* Statistics */
1902  if (req->req == RTEMS_BLKDEV_REQ_READ)
1903  {
1904    dd->stats.read_blocks += req->bufnum;
1905    if (sc != RTEMS_SUCCESSFUL)
1906      ++dd->stats.read_errors;
1907  }
1908  else
1909  {
1910    dd->stats.write_blocks += req->bufnum;
1911    ++dd->stats.write_transfers;
1912    if (sc != RTEMS_SUCCESSFUL)
1913      ++dd->stats.write_errors;
1914  }
1915
[5c587596]1916  for (transfer_index = 0; transfer_index < req->bufnum; ++transfer_index)
1917  {
1918    rtems_bdbuf_buffer *bd = req->bufs [transfer_index].user;
[e7fb54e]1919    bool waiters = bd->waiters;
[5c587596]1920
1921    if (waiters)
[e7fb54e]1922      wake_transfer_waiters = true;
[3899a537]1923    else
[e7fb54e]1924      wake_buffer_waiters = true;
[18daff9]1925
[5c587596]1926    rtems_bdbuf_group_release (bd);
[0ebfac19]1927
[e7fb54e]1928    if (sc == RTEMS_SUCCESSFUL && bd->state == RTEMS_BDBUF_STATE_TRANSFER)
1929      rtems_bdbuf_make_cached_and_add_to_lru_list (bd);
[5c587596]1930    else
[e7fb54e]1931      rtems_bdbuf_discard_buffer (bd);
[6d612944]1932
[5c587596]1933    if (rtems_bdbuf_tracer)
1934      rtems_bdbuf_show_users ("transfer", bd);
1935  }
[6d612944]1936
[e7fb54e]1937  if (wake_transfer_waiters)
[5c587596]1938    rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
[6d612944]1939
[e7fb54e]1940  if (wake_buffer_waiters)
[5c587596]1941    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
[945884fe]1942
[5c587596]1943  if (!cache_locked)
1944    rtems_bdbuf_unlock_cache ();
[0ebfac19]1945
[5c587596]1946  if (sc == RTEMS_SUCCESSFUL || sc == RTEMS_UNSATISFIED)
1947    return sc;
1948  else
1949    return RTEMS_IO_ERROR;
[6d612944]1950}
1951
[39ee704e]1952static rtems_status_code
[7d4a859]1953rtems_bdbuf_execute_read_request (rtems_disk_device  *dd,
1954                                  rtems_bdbuf_buffer *bd,
1955                                  uint32_t            transfer_count)
[6d612944]1956{
1957  rtems_blkdev_request *req = NULL;
[39ee704e]1958  rtems_blkdev_bnum media_block = bd->block;
1959  uint32_t media_blocks_per_block = dd->media_blocks_per_block;
1960  uint32_t block_size = dd->block_size;
1961  uint32_t transfer_index = 1;
[6d612944]1962
1963  /*
1964   * TODO: This type of request structure is wrong and should be removed.
1965   */
1966#define bdbuf_alloc(size) __builtin_alloca (size)
1967
1968  req = bdbuf_alloc (sizeof (rtems_blkdev_request) +
[39ee704e]1969                     sizeof (rtems_blkdev_sg_buffer) * transfer_count);
1970
1971  req->req = RTEMS_BLKDEV_REQ_READ;
1972  req->req_done = rtems_bdbuf_transfer_done;
1973  req->done_arg = req;
1974  req->io_task = rtems_task_self ();
1975  req->status = RTEMS_RESOURCE_IN_USE;
1976  req->bufnum = 0;
1977
1978  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
1979
1980  req->bufs [0].user   = bd;
1981  req->bufs [0].block  = media_block;
1982  req->bufs [0].length = block_size;
1983  req->bufs [0].buffer = bd->buffer;
1984
1985  if (rtems_bdbuf_tracer)
1986    rtems_bdbuf_show_users ("read", bd);
1987
1988  while (transfer_index < transfer_count)
1989  {
1990    media_block += media_blocks_per_block;
1991
1992    bd = rtems_bdbuf_get_buffer_for_read_ahead (dd, media_block);
1993
1994    if (bd == NULL)
1995      break;
1996
1997    rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
1998
1999    req->bufs [transfer_index].user   = bd;
2000    req->bufs [transfer_index].block  = media_block;
2001    req->bufs [transfer_index].length = block_size;
2002    req->bufs [transfer_index].buffer = bd->buffer;
2003
2004    if (rtems_bdbuf_tracer)
2005      rtems_bdbuf_show_users ("read", bd);
2006
2007    ++transfer_index;
2008  }
2009
2010  req->bufnum = transfer_index;
2011
2012  return rtems_bdbuf_execute_transfer_request (dd, req, true);
2013}
2014
2015static bool
2016rtems_bdbuf_is_read_ahead_active (const rtems_disk_device *dd)
2017{
2018  return !rtems_chain_is_node_off_chain (&dd->read_ahead.node);
2019}
2020
2021static void
2022rtems_bdbuf_read_ahead_cancel (rtems_disk_device *dd)
2023{
2024  if (rtems_bdbuf_is_read_ahead_active (dd))
2025  {
2026    rtems_chain_extract_unprotected (&dd->read_ahead.node);
2027    rtems_chain_set_off_chain (&dd->read_ahead.node);
2028  }
2029}
2030
2031static void
2032rtems_bdbuf_read_ahead_reset (rtems_disk_device *dd)
2033{
2034  rtems_bdbuf_read_ahead_cancel (dd);
2035  dd->read_ahead.trigger = RTEMS_DISK_READ_AHEAD_NO_TRIGGER;
2036}
2037
2038static void
2039rtems_bdbuf_check_read_ahead_trigger (rtems_disk_device *dd,
2040                                      rtems_blkdev_bnum  block)
2041{
2042  if (dd->read_ahead.trigger == block
2043      && !rtems_bdbuf_is_read_ahead_active (dd))
2044  {
2045    rtems_status_code sc;
2046    rtems_chain_control *chain = &bdbuf_cache.read_ahead_chain;
2047
2048    rtems_chain_append_unprotected (chain, &dd->read_ahead.node);
2049    sc = rtems_event_send (bdbuf_cache.read_ahead_task,
2050                           RTEMS_BDBUF_READ_AHEAD_WAKE_UP);
2051    if (sc != RTEMS_SUCCESSFUL)
2052      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_RA_WAKE_UP);
2053  }
2054}
2055
2056static void
2057rtems_bdbuf_set_read_ahead_trigger (rtems_disk_device *dd,
2058                                    rtems_blkdev_bnum  block)
2059{
2060  if (dd->read_ahead.trigger != block)
2061  {
2062    rtems_bdbuf_read_ahead_cancel (dd);
2063    dd->read_ahead.trigger = block + 1;
2064    dd->read_ahead.next = block + 2;
2065  }
2066}
2067
2068rtems_status_code
2069rtems_bdbuf_read (rtems_disk_device   *dd,
2070                  rtems_blkdev_bnum    block,
2071                  rtems_bdbuf_buffer **bd_ptr)
2072{
2073  rtems_status_code     sc = RTEMS_SUCCESSFUL;
2074  rtems_bdbuf_buffer   *bd = NULL;
2075  rtems_blkdev_bnum     media_block;
[18daff9]2076
[6d612944]2077  rtems_bdbuf_lock_cache ();
2078
[f164ae75]2079  sc = rtems_bdbuf_get_media_block (dd, block, &media_block);
2080  if (sc == RTEMS_SUCCESSFUL)
[6d612944]2081  {
[f164ae75]2082    if (rtems_bdbuf_tracer)
2083      printf ("bdbuf:read: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
2084              media_block + dd->start, block, (unsigned) dd->dev);
2085
[39ee704e]2086    rtems_bdbuf_check_read_ahead_trigger (dd, block);
2087    bd = rtems_bdbuf_get_buffer_for_access (dd, media_block);
2088    switch (bd->state)
[5c587596]2089    {
[39ee704e]2090      case RTEMS_BDBUF_STATE_CACHED:
[9f527308]2091        ++dd->stats.read_hits;
[39ee704e]2092        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
2093        break;
2094      case RTEMS_BDBUF_STATE_MODIFIED:
[9f527308]2095        ++dd->stats.read_hits;
[39ee704e]2096        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
2097        break;
2098      case RTEMS_BDBUF_STATE_EMPTY:
[9f527308]2099        ++dd->stats.read_misses;
[39ee704e]2100        rtems_bdbuf_set_read_ahead_trigger (dd, block);
2101        sc = rtems_bdbuf_execute_read_request (dd, bd, 1);
2102        if (sc == RTEMS_SUCCESSFUL)
2103        {
[f164ae75]2104          rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
[39ee704e]2105          rtems_chain_extract_unprotected (&bd->link);
2106          rtems_bdbuf_group_obtain (bd);
2107        }
2108        else
2109        {
2110          bd = NULL;
2111        }
2112        break;
2113      default:
2114        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_4);
2115        break;
[5c587596]2116    }
[eb649786]2117  }
[18daff9]2118
[0d15414e]2119  rtems_bdbuf_unlock_cache ();
[0ebfac19]2120
[f164ae75]2121  *bd_ptr = bd;
2122
[5c587596]2123  return sc;
[3899a537]2124}
[e51bd96]2125
[6d612944]2126static rtems_status_code
2127rtems_bdbuf_check_bd_and_lock_cache (rtems_bdbuf_buffer *bd, const char *kind)
[e51bd96]2128{
[3899a537]2129  if (bd == NULL)
2130    return RTEMS_INVALID_ADDRESS;
[eb649786]2131  if (rtems_bdbuf_tracer)
[3899a537]2132  {
[6d612944]2133    printf ("bdbuf:%s: %" PRIu32 "\n", kind, bd->block);
2134    rtems_bdbuf_show_users (kind, bd);
[3899a537]2135  }
[6d612944]2136  rtems_bdbuf_lock_cache();
2137
2138  return RTEMS_SUCCESSFUL;
2139}
2140
2141rtems_status_code
2142rtems_bdbuf_release (rtems_bdbuf_buffer *bd)
2143{
2144  rtems_status_code sc = RTEMS_SUCCESSFUL;
2145
2146  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release");
2147  if (sc != RTEMS_SUCCESSFUL)
2148    return sc;
[0d15414e]2149
[6d612944]2150  switch (bd->state)
2151  {
[5c587596]2152    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
[6d612944]2153      rtems_bdbuf_add_to_lru_list_after_access (bd);
2154      break;
[5c587596]2155    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
[e7fb54e]2156    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2157      rtems_bdbuf_discard_buffer_after_access (bd);
[5c587596]2158      break;
[6d612944]2159    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2160      rtems_bdbuf_add_to_modified_list_after_access (bd);
2161      break;
2162    default:
2163      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_0);
2164      break;
[3899a537]2165  }
[18daff9]2166
[eb649786]2167  if (rtems_bdbuf_tracer)
2168    rtems_bdbuf_show_usage ();
[18daff9]2169
[0d15414e]2170  rtems_bdbuf_unlock_cache ();
[e51bd96]2171
[3899a537]2172  return RTEMS_SUCCESSFUL;
2173}
[e51bd96]2174
2175rtems_status_code
[6d612944]2176rtems_bdbuf_release_modified (rtems_bdbuf_buffer *bd)
[e51bd96]2177{
[6d612944]2178  rtems_status_code sc = RTEMS_SUCCESSFUL;
[3899a537]2179
[6d612944]2180  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release modified");
2181  if (sc != RTEMS_SUCCESSFUL)
2182    return sc;
[048dcd2b]2183
[6d612944]2184  switch (bd->state)
2185  {
[5c587596]2186    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2187    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
[6d612944]2188    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2189      rtems_bdbuf_add_to_modified_list_after_access (bd);
2190      break;
[e7fb54e]2191    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2192      rtems_bdbuf_discard_buffer_after_access (bd);
2193      break;
[6d612944]2194    default:
2195      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_6);
2196      break;
2197  }
[18daff9]2198
[eb649786]2199  if (rtems_bdbuf_tracer)
2200    rtems_bdbuf_show_usage ();
[18daff9]2201
[0d15414e]2202  rtems_bdbuf_unlock_cache ();
[048dcd2b]2203
[3899a537]2204  return RTEMS_SUCCESSFUL;
[e51bd96]2205}
2206
2207rtems_status_code
[6d612944]2208rtems_bdbuf_sync (rtems_bdbuf_buffer *bd)
[e51bd96]2209{
[6d612944]2210  rtems_status_code sc = RTEMS_SUCCESSFUL;
[6c9b390]2211
[6d612944]2212  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "sync");
2213  if (sc != RTEMS_SUCCESSFUL)
2214    return sc;
[e51bd96]2215
[6d612944]2216  switch (bd->state)
[3899a537]2217  {
[5c587596]2218    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2219    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
[6d612944]2220    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
[e7fb54e]2221      rtems_bdbuf_sync_after_access (bd);
2222      break;
2223    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2224      rtems_bdbuf_discard_buffer_after_access (bd);
[6d612944]2225      break;
2226    default:
2227      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_5);
2228      break;
2229  }
[18daff9]2230
[6d612944]2231  if (rtems_bdbuf_tracer)
2232    rtems_bdbuf_show_usage ();
[3899a537]2233
[0d15414e]2234  rtems_bdbuf_unlock_cache ();
[18daff9]2235
[3899a537]2236  return RTEMS_SUCCESSFUL;
[e51bd96]2237}
2238
2239rtems_status_code
[40284de]2240rtems_bdbuf_syncdev (rtems_disk_device *dd)
[e51bd96]2241{
[eb649786]2242  if (rtems_bdbuf_tracer)
[796967c]2243    printf ("bdbuf:syncdev: %08x\n", (unsigned) dd->dev);
[e51bd96]2244
[3899a537]2245  /*
[0d15414e]2246   * Take the sync lock before locking the cache. Once we have the sync lock we
2247   * can lock the cache. If another thread has the sync lock it will cause this
2248   * thread to block until it owns the sync lock then it can own the cache. The
2249   * sync lock can only be obtained with the cache unlocked.
[3899a537]2250   */
[0d15414e]2251  rtems_bdbuf_lock_sync ();
[18daff9]2252  rtems_bdbuf_lock_cache ();
[e51bd96]2253
[c21c850e]2254  /*
[0d15414e]2255   * Set the cache to have a sync active for a specific device and let the swap
[c21c850e]2256   * out task know the id of the requester to wake when done.
2257   *
2258   * The swap out task will negate the sync active flag when no more buffers
[0d15414e]2259   * for the device are held on the "modified for sync" queues.
[c21c850e]2260   */
[0d15414e]2261  bdbuf_cache.sync_active    = true;
2262  bdbuf_cache.sync_requester = rtems_task_self ();
[796967c]2263  bdbuf_cache.sync_device    = dd;
[18daff9]2264
[3899a537]2265  rtems_bdbuf_wake_swapper ();
[0d15414e]2266  rtems_bdbuf_unlock_cache ();
[6d612944]2267  rtems_bdbuf_wait_for_event (RTEMS_BDBUF_TRANSFER_SYNC);
[0d15414e]2268  rtems_bdbuf_unlock_sync ();
[6d612944]2269
2270  return RTEMS_SUCCESSFUL;
[e51bd96]2271}
2272
[3899a537]2273/**
[0d15414e]2274 * Swapout transfer to the driver. The driver will break this I/O into groups
2275 * of consecutive write requests is multiple consecutive buffers are required
[8aa608df]2276 * by the driver. The cache is not locked.
[c21c850e]2277 *
[0d15414e]2278 * @param transfer The transfer transaction.
[3899a537]2279 */
2280static void
[0d15414e]2281rtems_bdbuf_swapout_write (rtems_bdbuf_swapout_transfer* transfer)
[e51bd96]2282{
[a518defe]2283  rtems_chain_node *node;
[18daff9]2284
[eb649786]2285  if (rtems_bdbuf_tracer)
[796967c]2286    printf ("bdbuf:swapout transfer: %08x\n", (unsigned) transfer->dd->dev);
[3899a537]2287
2288  /*
[7baa484]2289   * If there are buffers to transfer to the media transfer them.
[3899a537]2290   */
[0d15414e]2291  if (!rtems_chain_is_empty (&transfer->bds))
[3899a537]2292  {
2293    /*
[5c587596]2294     * The last block number used when the driver only supports
2295     * continuous blocks in a single request.
[3899a537]2296     */
[5c587596]2297    uint32_t last_block = 0;
[3899a537]2298
[7d4a859]2299    rtems_disk_device *dd = transfer->dd;
[f12249f]2300    uint32_t media_blocks_per_block = dd->media_blocks_per_block;
2301    bool need_continuous_blocks =
2302      (dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_MULTISECTOR_CONT) != 0;
[18daff9]2303
[5c587596]2304    /*
2305     * Take as many buffers as configured and pass to the driver. Note, the
2306     * API to the drivers has an array of buffers and if a chain was passed
2307     * we could have just passed the list. If the driver API is updated it
2308     * should be possible to make this change with little effect in this
2309     * code. The array that is passed is broken in design and should be
2310     * removed. Merging members of a struct into the first member is
2311     * trouble waiting to happen.
2312     */
2313    transfer->write_req->status = RTEMS_RESOURCE_IN_USE;
2314    transfer->write_req->bufnum = 0;
[3899a537]2315
[462ee70f]2316    while ((node = rtems_chain_get_unprotected(&transfer->bds)) != NULL)
[5c587596]2317    {
[a518defe]2318      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2319      bool                write = false;
[eb649786]2320
[5c587596]2321      /*
2322       * If the device only accepts sequential buffers and this is not the
2323       * first buffer (the first is always sequential, and the buffer is not
2324       * sequential then put the buffer back on the transfer chain and write
2325       * the committed buffers.
2326       */
[eb649786]2327
[5c587596]2328      if (rtems_bdbuf_tracer)
2329        printf ("bdbuf:swapout write: bd:%" PRIu32 ", bufnum:%" PRIu32 " mode:%s\n",
2330                bd->block, transfer->write_req->bufnum,
[f12249f]2331                need_continuous_blocks ? "MULTI" : "SCAT");
[5c587596]2332
[f12249f]2333      if (need_continuous_blocks && transfer->write_req->bufnum &&
2334          bd->block != last_block + media_blocks_per_block)
[5c587596]2335      {
[462ee70f]2336        rtems_chain_prepend_unprotected (&transfer->bds, &bd->link);
[5c587596]2337        write = true;
2338      }
2339      else
2340      {
2341        rtems_blkdev_sg_buffer* buf;
2342        buf = &transfer->write_req->bufs[transfer->write_req->bufnum];
2343        transfer->write_req->bufnum++;
2344        buf->user   = bd;
2345        buf->block  = bd->block;
2346        buf->length = dd->block_size;
2347        buf->buffer = bd->buffer;
2348        last_block  = bd->block;
2349      }
[18daff9]2350
[5c587596]2351      /*
2352       * Perform the transfer if there are no more buffers, or the transfer
2353       * size has reached the configured max. value.
2354       */
[eb649786]2355
[5c587596]2356      if (rtems_chain_is_empty (&transfer->bds) ||
2357          (transfer->write_req->bufnum >= bdbuf_config.max_write_blocks))
2358        write = true;
[eb649786]2359
[5c587596]2360      if (write)
2361      {
2362        rtems_bdbuf_execute_transfer_request (dd, transfer->write_req, false);
[3899a537]2363
[5c587596]2364        transfer->write_req->status = RTEMS_RESOURCE_IN_USE;
2365        transfer->write_req->bufnum = 0;
[3899a537]2366      }
[5c587596]2367    }
[18daff9]2368
[796967c]2369    /*
2370     * If sync'ing and the deivce is capability of handling a sync IO control
2371     * call perform the call.
2372     */
2373    if (transfer->syncing &&
2374        (dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_SYNC))
[8aa608df]2375    {
[796967c]2376      /* int result = */ dd->ioctl (dd->phys_dev, RTEMS_BLKDEV_REQ_SYNC, NULL);
2377      /* How should the error be handled ? */
[8aa608df]2378    }
[0d15414e]2379  }
2380}
2381
2382/**
2383 * Process the modified list of buffers. There is a sync or modified list that
2384 * needs to be handled so we have a common function to do the work.
2385 *
[796967c]2386 * @param dd_ptr Pointer to the device to handle. If BDBUF_INVALID_DEV no
2387 * device is selected so select the device of the first buffer to be written to
2388 * disk.
[0d15414e]2389 * @param chain The modified chain to process.
2390 * @param transfer The chain to append buffers to be written too.
2391 * @param sync_active If true this is a sync operation so expire all timers.
2392 * @param update_timers If true update the timers.
2393 * @param timer_delta It update_timers is true update the timers by this
2394 *                    amount.
2395 */
2396static void
[7d4a859]2397rtems_bdbuf_swapout_modified_processing (rtems_disk_device  **dd_ptr,
[0d15414e]2398                                         rtems_chain_control* chain,
2399                                         rtems_chain_control* transfer,
2400                                         bool                 sync_active,
2401                                         bool                 update_timers,
2402                                         uint32_t             timer_delta)
2403{
2404  if (!rtems_chain_is_empty (chain))
2405  {
2406    rtems_chain_node* node = rtems_chain_head (chain);
[8aa608df]2407    bool              sync_all;
2408   
[0d15414e]2409    node = node->next;
2410
[8aa608df]2411    /*
2412     * A sync active with no valid dev means sync all.
2413     */
[796967c]2414    if (sync_active && (*dd_ptr == BDBUF_INVALID_DEV))
[8aa608df]2415      sync_all = true;
2416    else
2417      sync_all = false;
2418   
[0d15414e]2419    while (!rtems_chain_is_tail (chain, node))
2420    {
2421      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
[18daff9]2422
[0d15414e]2423      /*
2424       * Check if the buffer's hold timer has reached 0. If a sync is active
[8aa608df]2425       * or someone waits for a buffer written force all the timers to 0.
[0d15414e]2426       *
2427       * @note Lots of sync requests will skew this timer. It should be based
2428       *       on TOD to be accurate. Does it matter ?
2429       */
[796967c]2430      if (sync_all || (sync_active && (*dd_ptr == bd->dd))
[8aa608df]2431          || rtems_bdbuf_has_buffer_waiters ())
[0d15414e]2432        bd->hold_timer = 0;
[18daff9]2433
[0d15414e]2434      if (bd->hold_timer)
2435      {
2436        if (update_timers)
2437        {
2438          if (bd->hold_timer > timer_delta)
2439            bd->hold_timer -= timer_delta;
2440          else
2441            bd->hold_timer = 0;
2442        }
2443
2444        if (bd->hold_timer)
2445        {
2446          node = node->next;
2447          continue;
2448        }
2449      }
2450
2451      /*
[796967c]2452       * This assumes we can set it to BDBUF_INVALID_DEV which is just an
[0d15414e]2453       * assumption. Cannot use the transfer list being empty the sync dev
2454       * calls sets the dev to use.
2455       */
[796967c]2456      if (*dd_ptr == BDBUF_INVALID_DEV)
2457        *dd_ptr = bd->dd;
[0d15414e]2458
[796967c]2459      if (bd->dd == *dd_ptr)
[0d15414e]2460      {
2461        rtems_chain_node* next_node = node->next;
2462        rtems_chain_node* tnode = rtems_chain_tail (transfer);
[18daff9]2463
[0d15414e]2464        /*
2465         * The blocks on the transfer list are sorted in block order. This
2466         * means multi-block transfers for drivers that require consecutive
2467         * blocks perform better with sorted blocks and for real disks it may
2468         * help lower head movement.
2469         */
2470
[6d612944]2471        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
[0d15414e]2472
[89a84c0]2473        rtems_chain_extract_unprotected (node);
[0d15414e]2474
2475        tnode = tnode->previous;
[18daff9]2476
[0d15414e]2477        while (node && !rtems_chain_is_head (transfer, tnode))
2478        {
2479          rtems_bdbuf_buffer* tbd = (rtems_bdbuf_buffer*) tnode;
2480
2481          if (bd->block > tbd->block)
2482          {
[462ee70f]2483            rtems_chain_insert_unprotected (tnode, node);
[0d15414e]2484            node = NULL;
2485          }
2486          else
2487            tnode = tnode->previous;
2488        }
[18daff9]2489
[0d15414e]2490        if (node)
[462ee70f]2491          rtems_chain_prepend_unprotected (transfer, node);
[18daff9]2492
[0d15414e]2493        node = next_node;
2494      }
2495      else
2496      {
2497        node = node->next;
2498      }
2499    }
2500  }
2501}
2502
2503/**
2504 * Process the cache's modified buffers. Check the sync list first then the
2505 * modified list extracting the buffers suitable to be written to disk. We have
2506 * a device at a time. The task level loop will repeat this operation while
2507 * there are buffers to be written. If the transfer fails place the buffers
2508 * back on the modified list and try again later. The cache is unlocked while
2509 * the buffers are being written to disk.
2510 *
2511 * @param timer_delta It update_timers is true update the timers by this
2512 *                    amount.
2513 * @param update_timers If true update the timers.
2514 * @param transfer The transfer transaction data.
2515 *
2516 * @retval true Buffers where written to disk so scan again.
2517 * @retval false No buffers where written to disk.
2518 */
2519static bool
2520rtems_bdbuf_swapout_processing (unsigned long                 timer_delta,
2521                                bool                          update_timers,
2522                                rtems_bdbuf_swapout_transfer* transfer)
2523{
2524  rtems_bdbuf_swapout_worker* worker;
2525  bool                        transfered_buffers = false;
2526
2527  rtems_bdbuf_lock_cache ();
2528
2529  /*
2530   * If a sync is active do not use a worker because the current code does not
2531   * cleaning up after. We need to know the buffers have been written when
[8aa608df]2532   * syncing to release sync lock and currently worker threads do not return to
2533   * here. We do not know the worker is the last in a sequence of sync writes
2534   * until after we have it running so we do not know to tell it to release the
2535   * lock. The simplest solution is to get the main swap out task perform all
2536   * sync operations.
[0d15414e]2537   */
2538  if (bdbuf_cache.sync_active)
2539    worker = NULL;
2540  else
2541  {
2542    worker = (rtems_bdbuf_swapout_worker*)
[89a84c0]2543      rtems_chain_get_unprotected (&bdbuf_cache.swapout_workers);
[0d15414e]2544    if (worker)
2545      transfer = &worker->transfer;
[3899a537]2546  }
[18daff9]2547
[0d15414e]2548  rtems_chain_initialize_empty (&transfer->bds);
[796967c]2549  transfer->dd = BDBUF_INVALID_DEV;
[8aa608df]2550  transfer->syncing = bdbuf_cache.sync_active;
2551 
[0d15414e]2552  /*
2553   * When the sync is for a device limit the sync to that device. If the sync
2554   * is for a buffer handle process the devices in the order on the sync
[b5b07cad]2555   * list. This means the dev is BDBUF_INVALID_DEV.
[0d15414e]2556   */
2557  if (bdbuf_cache.sync_active)
[796967c]2558    transfer->dd = bdbuf_cache.sync_device;
[8aa608df]2559   
[0d15414e]2560  /*
2561   * If we have any buffers in the sync queue move them to the modified
2562   * list. The first sync buffer will select the device we use.
2563   */
[796967c]2564  rtems_bdbuf_swapout_modified_processing (&transfer->dd,
[0d15414e]2565                                           &bdbuf_cache.sync,
2566                                           &transfer->bds,
2567                                           true, false,
2568                                           timer_delta);
2569
2570  /*
2571   * Process the cache's modified list.
2572   */
[796967c]2573  rtems_bdbuf_swapout_modified_processing (&transfer->dd,
[0d15414e]2574                                           &bdbuf_cache.modified,
2575                                           &transfer->bds,
2576                                           bdbuf_cache.sync_active,
2577                                           update_timers,
2578                                           timer_delta);
[3899a537]2579
[0d15414e]2580  /*
2581   * We have all the buffers that have been modified for this device so the
2582   * cache can be unlocked because the state of each buffer has been set to
2583   * TRANSFER.
2584   */
2585  rtems_bdbuf_unlock_cache ();
2586
2587  /*
2588   * If there are buffers to transfer to the media transfer them.
2589   */
2590  if (!rtems_chain_is_empty (&transfer->bds))
2591  {
2592    if (worker)
2593    {
2594      rtems_status_code sc = rtems_event_send (worker->id,
2595                                               RTEMS_BDBUF_SWAPOUT_SYNC);
2596      if (sc != RTEMS_SUCCESSFUL)
2597        rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE);
2598    }
2599    else
2600    {
2601      rtems_bdbuf_swapout_write (transfer);
2602    }
[18daff9]2603
[0d15414e]2604    transfered_buffers = true;
2605  }
[18daff9]2606
[0d15414e]2607  if (bdbuf_cache.sync_active && !transfered_buffers)
[c21c850e]2608  {
[0d15414e]2609    rtems_id sync_requester;
2610    rtems_bdbuf_lock_cache ();
2611    sync_requester = bdbuf_cache.sync_requester;
2612    bdbuf_cache.sync_active = false;
2613    bdbuf_cache.sync_requester = 0;
2614    rtems_bdbuf_unlock_cache ();
[c21c850e]2615    if (sync_requester)
2616      rtems_event_send (sync_requester, RTEMS_BDBUF_TRANSFER_SYNC);
2617  }
[18daff9]2618
[0d15414e]2619  return transfered_buffers;
[e51bd96]2620}
2621
[3899a537]2622/**
[0d15414e]2623 * Allocate the write request and initialise it for good measure.
[c21c850e]2624 *
[0d15414e]2625 * @return rtems_blkdev_request* The write reference memory.
[e51bd96]2626 */
[0d15414e]2627static rtems_blkdev_request*
2628rtems_bdbuf_swapout_writereq_alloc (void)
[e51bd96]2629{
[3899a537]2630  /*
2631   * @note chrisj The rtems_blkdev_request and the array at the end is a hack.
2632   * I am disappointment at finding code like this in RTEMS. The request should
2633   * have been a rtems_chain_control. Simple, fast and less storage as the node
2634   * is already part of the buffer structure.
2635   */
[0d15414e]2636  rtems_blkdev_request* write_req =
[3899a537]2637    malloc (sizeof (rtems_blkdev_request) +
[6d612944]2638            (bdbuf_config.max_write_blocks * sizeof (rtems_blkdev_sg_buffer)));
[3899a537]2639
2640  if (!write_req)
2641    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM);
2642
2643  write_req->req = RTEMS_BLKDEV_REQ_WRITE;
[5c587596]2644  write_req->req_done = rtems_bdbuf_transfer_done;
[3899a537]2645  write_req->done_arg = write_req;
2646  write_req->io_task = rtems_task_self ();
2647
[0d15414e]2648  return write_req;
2649}
2650
2651/**
2652 * The swapout worker thread body.
2653 *
2654 * @param arg A pointer to the worker thread's private data.
2655 * @return rtems_task Not used.
2656 */
2657static rtems_task
2658rtems_bdbuf_swapout_worker_task (rtems_task_argument arg)
2659{
2660  rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) arg;
2661
2662  while (worker->enabled)
2663  {
[6d612944]2664    rtems_bdbuf_wait_for_event (RTEMS_BDBUF_SWAPOUT_SYNC);
[0d15414e]2665
2666    rtems_bdbuf_swapout_write (&worker->transfer);
2667
2668    rtems_bdbuf_lock_cache ();
2669
2670    rtems_chain_initialize_empty (&worker->transfer.bds);
[796967c]2671    worker->transfer.dd = BDBUF_INVALID_DEV;
[0d15414e]2672
[89a84c0]2673    rtems_chain_append_unprotected (&bdbuf_cache.swapout_workers, &worker->link);
[18daff9]2674
[0d15414e]2675    rtems_bdbuf_unlock_cache ();
2676  }
2677
2678  free (worker->transfer.write_req);
2679  free (worker);
2680
2681  rtems_task_delete (RTEMS_SELF);
2682}
2683
2684/**
2685 * Open the swapout worker threads.
2686 */
2687static void
2688rtems_bdbuf_swapout_workers_open (void)
2689{
2690  rtems_status_code sc;
[e9d50ab]2691  size_t            w;
[18daff9]2692
[0d15414e]2693  rtems_bdbuf_lock_cache ();
[18daff9]2694
[6d612944]2695  for (w = 0; w < bdbuf_config.swapout_workers; w++)
[0d15414e]2696  {
2697    rtems_bdbuf_swapout_worker* worker;
2698
2699    worker = malloc (sizeof (rtems_bdbuf_swapout_worker));
2700    if (!worker)
2701      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM);
2702
[89a84c0]2703    rtems_chain_append_unprotected (&bdbuf_cache.swapout_workers, &worker->link);
[0d15414e]2704    worker->enabled = true;
2705    worker->transfer.write_req = rtems_bdbuf_swapout_writereq_alloc ();
[18daff9]2706
[0d15414e]2707    rtems_chain_initialize_empty (&worker->transfer.bds);
[796967c]2708    worker->transfer.dd = BDBUF_INVALID_DEV;
[0d15414e]2709
[b6911069]2710    sc = rtems_bdbuf_create_task (rtems_build_name('B', 'D', 'o', 'a' + w),
2711                                  bdbuf_config.swapout_worker_priority,
2712                                  RTEMS_BDBUF_SWAPOUT_WORKER_TASK_PRIORITY_DEFAULT,
2713                                  rtems_bdbuf_swapout_worker_task,
2714                                  (rtems_task_argument) worker,
2715                                  &worker->id);
[0d15414e]2716    if (sc != RTEMS_SUCCESSFUL)
2717      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WK_CREATE);
2718  }
[18daff9]2719
[0d15414e]2720  rtems_bdbuf_unlock_cache ();
2721}
2722
2723/**
2724 * Close the swapout worker threads.
2725 */
2726static void
2727rtems_bdbuf_swapout_workers_close (void)
2728{
2729  rtems_chain_node* node;
[18daff9]2730
[0d15414e]2731  rtems_bdbuf_lock_cache ();
[18daff9]2732
[0d15414e]2733  node = rtems_chain_first (&bdbuf_cache.swapout_workers);
2734  while (!rtems_chain_is_tail (&bdbuf_cache.swapout_workers, node))
2735  {
2736    rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) node;
2737    worker->enabled = false;
2738    rtems_event_send (worker->id, RTEMS_BDBUF_SWAPOUT_SYNC);
2739    node = rtems_chain_next (node);
2740  }
[18daff9]2741
[0d15414e]2742  rtems_bdbuf_unlock_cache ();
2743}
2744
2745/**
2746 * Body of task which takes care on flushing modified buffers to the disk.
2747 *
2748 * @param arg A pointer to the global cache data. Use the global variable and
2749 *            not this.
2750 * @return rtems_task Not used.
2751 */
2752static rtems_task
2753rtems_bdbuf_swapout_task (rtems_task_argument arg)
2754{
2755  rtems_bdbuf_swapout_transfer transfer;
2756  uint32_t                     period_in_ticks;
[721fe34]2757  const uint32_t               period_in_msecs = bdbuf_config.swapout_period;
[0d15414e]2758  uint32_t                     timer_delta;
2759
2760  transfer.write_req = rtems_bdbuf_swapout_writereq_alloc ();
2761  rtems_chain_initialize_empty (&transfer.bds);
[796967c]2762  transfer.dd = BDBUF_INVALID_DEV;
[8aa608df]2763  transfer.syncing = false;
[0d15414e]2764
2765  /*
2766   * Localise the period.
2767   */
[26fb4aa]2768  period_in_ticks = RTEMS_MICROSECONDS_TO_TICKS (period_in_msecs * 1000);
[3899a537]2769
2770  /*
[c21c850e]2771   * This is temporary. Needs to be changed to use the real time clock.
[3899a537]2772   */
2773  timer_delta = period_in_msecs;
2774
[0d15414e]2775  /*
2776   * Create the worker threads.
2777   */
2778  rtems_bdbuf_swapout_workers_open ();
[18daff9]2779
[0d15414e]2780  while (bdbuf_cache.swapout_enabled)
[3899a537]2781  {
[0d15414e]2782    rtems_event_set   out;
2783    rtems_status_code sc;
[e51bd96]2784
[0ebfac19]2785    /*
[3899a537]2786     * Only update the timers once in the processing cycle.
2787     */
[4f971343]2788    bool update_timers = true;
[18daff9]2789
[3899a537]2790    /*
[c21c850e]2791     * If we write buffers to any disk perform a check again. We only write a
[0d15414e]2792     * single device at a time and the cache may have more than one device's
[c21c850e]2793     * buffers modified waiting to be written.
[0ebfac19]2794     */
[4f971343]2795    bool transfered_buffers;
[3899a537]2796
2797    do
[e51bd96]2798    {
[4f971343]2799      transfered_buffers = false;
[3899a537]2800
2801      /*
[0d15414e]2802       * Extact all the buffers we find for a specific device. The device is
2803       * the first one we find on a modified list. Process the sync queue of
2804       * buffers first.
[3899a537]2805       */
[0d15414e]2806      if (rtems_bdbuf_swapout_processing (timer_delta,
2807                                          update_timers,
2808                                          &transfer))
[3899a537]2809      {
[0d15414e]2810        transfered_buffers = true;
[3899a537]2811      }
[18daff9]2812
[3899a537]2813      /*
2814       * Only update the timers once.
2815       */
[4f971343]2816      update_timers = false;
[e51bd96]2817    }
[3899a537]2818    while (transfered_buffers);
2819
2820    sc = rtems_event_receive (RTEMS_BDBUF_SWAPOUT_SYNC,
2821                              RTEMS_EVENT_ALL | RTEMS_WAIT,
2822                              period_in_ticks,
2823                              &out);
2824
2825    if ((sc != RTEMS_SUCCESSFUL) && (sc != RTEMS_TIMEOUT))
2826      rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
2827  }
2828
[0d15414e]2829  rtems_bdbuf_swapout_workers_close ();
[18daff9]2830
[0d15414e]2831  free (transfer.write_req);
[3899a537]2832
2833  rtems_task_delete (RTEMS_SELF);
[e51bd96]2834}
[e7fb54e]2835
2836static void
2837rtems_bdbuf_purge_list (rtems_chain_control *purge_list)
2838{
2839  bool wake_buffer_waiters = false;
2840  rtems_chain_node *node = NULL;
2841
[89a84c0]2842  while ((node = rtems_chain_get_unprotected (purge_list)) != NULL)
[e7fb54e]2843  {
2844    rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
2845
2846    if (bd->waiters == 0)
2847      wake_buffer_waiters = true;
2848
2849    rtems_bdbuf_discard_buffer (bd);
2850  }
2851
2852  if (wake_buffer_waiters)
2853    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
2854}
2855
2856static void
2857rtems_bdbuf_gather_for_purge (rtems_chain_control *purge_list,
[796967c]2858                              const rtems_disk_device *dd)
[e7fb54e]2859{
2860  rtems_bdbuf_buffer *stack [RTEMS_BDBUF_AVL_MAX_HEIGHT];
2861  rtems_bdbuf_buffer **prev = stack;
2862  rtems_bdbuf_buffer *cur = bdbuf_cache.tree;
2863
2864  *prev = NULL;
2865
2866  while (cur != NULL)
2867  {
[796967c]2868    if (cur->dd == dd)
[e7fb54e]2869    {
2870      switch (cur->state)
2871      {
2872        case RTEMS_BDBUF_STATE_FREE:
2873        case RTEMS_BDBUF_STATE_EMPTY:
2874        case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2875        case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
2876          break;
2877        case RTEMS_BDBUF_STATE_SYNC:
2878          rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
2879          /* Fall through */
2880        case RTEMS_BDBUF_STATE_MODIFIED:
2881          rtems_bdbuf_group_release (cur);
2882          /* Fall through */
2883        case RTEMS_BDBUF_STATE_CACHED:
[89a84c0]2884          rtems_chain_extract_unprotected (&cur->link);
2885          rtems_chain_append_unprotected (purge_list, &cur->link);
[e7fb54e]2886          break;
2887        case RTEMS_BDBUF_STATE_TRANSFER:
2888          rtems_bdbuf_set_state (cur, RTEMS_BDBUF_STATE_TRANSFER_PURGED);
2889          break;
2890        case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2891        case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2892        case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2893          rtems_bdbuf_set_state (cur, RTEMS_BDBUF_STATE_ACCESS_PURGED);
2894          break;
2895        default:
2896          rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_STATE_11);
2897      }
2898    }
2899
2900    if (cur->avl.left != NULL)
2901    {
2902      /* Left */
2903      ++prev;
2904      *prev = cur;
2905      cur = cur->avl.left;
2906    }
2907    else if (cur->avl.right != NULL)
2908    {
2909      /* Right */
2910      ++prev;
2911      *prev = cur;
2912      cur = cur->avl.right;
2913    }
2914    else
2915    {
[3c462734]2916      while (*prev != NULL
2917             && (cur == (*prev)->avl.right || (*prev)->avl.right == NULL))
[e7fb54e]2918      {
2919        /* Up */
2920        cur = *prev;
2921        --prev;
2922      }
2923      if (*prev != NULL)
2924        /* Right */
2925        cur = (*prev)->avl.right;
2926      else
2927        /* Finished */
2928        cur = NULL;
2929    }
2930  }
2931}
2932
[796967c]2933void
[40284de]2934rtems_bdbuf_purge_dev (rtems_disk_device *dd)
[e7fb54e]2935{
2936  rtems_chain_control purge_list;
2937
2938  rtems_chain_initialize_empty (&purge_list);
2939  rtems_bdbuf_lock_cache ();
[39ee704e]2940  rtems_bdbuf_read_ahead_reset (dd);
[796967c]2941  rtems_bdbuf_gather_for_purge (&purge_list, dd);
[e7fb54e]2942  rtems_bdbuf_purge_list (&purge_list);
2943  rtems_bdbuf_unlock_cache ();
2944}
[b467782b]2945
2946rtems_status_code
2947rtems_bdbuf_set_block_size (rtems_disk_device *dd, uint32_t block_size)
2948{
2949  rtems_status_code sc = RTEMS_SUCCESSFUL;
2950
2951  rtems_bdbuf_lock_cache ();
2952
2953  if (block_size > 0)
2954  {
2955    size_t bds_per_group = rtems_bdbuf_bds_per_group (block_size);
2956
2957    if (bds_per_group != 0)
2958    {
2959      int block_to_media_block_shift = 0;
2960      uint32_t media_blocks_per_block = block_size / dd->media_block_size;
2961      uint32_t one = 1;
2962
2963      while ((one << block_to_media_block_shift) < media_blocks_per_block)
2964      {
2965        ++block_to_media_block_shift;
2966      }
2967
2968      if ((dd->media_block_size << block_to_media_block_shift) != block_size)
2969        block_to_media_block_shift = -1;
2970
2971      dd->block_size = block_size;
[73c09b3b]2972      dd->block_count = dd->size / media_blocks_per_block;
2973      dd->media_blocks_per_block = media_blocks_per_block;
[b467782b]2974      dd->block_to_media_block_shift = block_to_media_block_shift;
2975      dd->bds_per_group = bds_per_group;
[39ee704e]2976
2977      rtems_bdbuf_read_ahead_reset (dd);
[b467782b]2978    }
2979    else
2980    {
2981      sc = RTEMS_INVALID_NUMBER;
2982    }
2983  }
2984  else
2985  {
2986    sc = RTEMS_INVALID_NUMBER;
2987  }
2988
2989  rtems_bdbuf_unlock_cache ();
2990
2991  return sc;
2992}
[39ee704e]2993
2994static rtems_task
2995rtems_bdbuf_read_ahead_task (rtems_task_argument arg)
2996{
2997  rtems_chain_control *chain = &bdbuf_cache.read_ahead_chain;
2998
2999  while (bdbuf_cache.read_ahead_enabled)
3000  {
3001    rtems_chain_node *node;
3002
3003    rtems_bdbuf_wait_for_event (RTEMS_BDBUF_READ_AHEAD_WAKE_UP);
3004    rtems_bdbuf_lock_cache ();
3005
3006    while ((node = rtems_chain_get_unprotected (chain)) != NULL)
3007    {
3008      rtems_disk_device *dd = (rtems_disk_device *)
3009        ((char *) node - offsetof (rtems_disk_device, read_ahead.node));
3010      rtems_blkdev_bnum block = dd->read_ahead.next;
3011      rtems_blkdev_bnum media_block = 0;
3012      rtems_status_code sc =
3013        rtems_bdbuf_get_media_block (dd, block, &media_block);
3014
3015      rtems_chain_set_off_chain (&dd->read_ahead.node);
3016
3017      if (sc == RTEMS_SUCCESSFUL)
3018      {
3019        rtems_bdbuf_buffer *bd =
3020          rtems_bdbuf_get_buffer_for_read_ahead (dd, media_block);
3021
3022        if (bd != NULL)
3023        {
3024          uint32_t transfer_count = dd->block_count - block;
3025          uint32_t max_transfer_count = bdbuf_config.max_read_ahead_blocks;
3026
3027          if (transfer_count >= max_transfer_count)
3028          {
3029            transfer_count = max_transfer_count;
[71092f7]3030            dd->read_ahead.trigger = block + transfer_count / 2;
[f29d969]3031            dd->read_ahead.next = block + transfer_count;
[39ee704e]3032          }
3033          else
3034          {
3035            dd->read_ahead.trigger = RTEMS_DISK_READ_AHEAD_NO_TRIGGER;
3036          }
3037
[9f527308]3038          ++dd->stats.read_ahead_transfers;
[39ee704e]3039          rtems_bdbuf_execute_read_request (dd, bd, transfer_count);
3040        }
3041      }
3042      else
3043      {
3044        dd->read_ahead.trigger = RTEMS_DISK_READ_AHEAD_NO_TRIGGER;
3045      }
3046    }
3047
3048    rtems_bdbuf_unlock_cache ();
3049  }
3050
3051  rtems_task_delete (RTEMS_SELF);
3052}
[9f527308]3053
3054void rtems_bdbuf_get_device_stats (const rtems_disk_device *dd,
3055                                   rtems_blkdev_stats      *stats)
3056{
3057  rtems_bdbuf_lock_cache ();
3058  *stats = dd->stats;
3059  rtems_bdbuf_unlock_cache ();
3060}
3061
3062void rtems_bdbuf_reset_device_stats (rtems_disk_device *dd)
3063{
3064  rtems_bdbuf_lock_cache ();
3065  memset (&dd->stats, 0, sizeof(dd->stats));
3066  rtems_bdbuf_unlock_cache ();
3067}
Note: See TracBrowser for help on using the repository browser.