Changeset 6d612944 in rtems


Ignore:
Timestamp:
Nov 20, 2009, 6:33:38 AM (10 years ago)
Author:
Thomas Doerfler <Thomas.Doerfler@…>
Branches:
4.10, 4.11, master
Children:
4eee8434
Parents:
b5b51fc
Message:

numerous changes

Location:
cpukit
Files:
4 edited

Legend:

Unmodified
Added
Removed
  • cpukit/ChangeLog

    rb5b51fc r6d612944  
     12009-11-18      Thomas Doefler <Thomas.Doerfler@embedded-brains.de>
     2
     3        * libblock/include/rtems/blkdev.h, libblock/include/bdbuf.h,
     4        libblock/src/bdbuf.c: various changes
     5
    162009-11-19      Ralf Corsépius <ralf.corsepius@rtems.org>
    27
  • cpukit/libblock/include/rtems/bdbuf.h

    rb5b51fc r6d612944  
    1414 *    Rewritten to remove score mutex access. Fixes many performance
    1515 *    issues.
    16       Change to support demand driven variable buffer sizes.
     16 *    Change to support demand driven variable buffer sizes.
     17 *
     18 * Copyright (c) 2009 embedded brains GmbH.
    1719 *
    1820 * @(#) bdbuf.h,v 1.9 2005/02/02 00:06:18 joel Exp
     
    4547 *
    4648 * The Block Device Buffer Management implements a cache between the disk
    47  * devices and file systems. The code provides read ahead and write queuing to
    48  * the drivers and fast cache look up using an AVL tree.
     49 * devices and file systems.  The code provides read ahead and write queuing to
     50 * the drivers and fast cache look-up using an AVL tree.
    4951 *
    5052 * The block size used by a file system can be set at runtime and must be a
    51  * multiple of the disk device block size. The disk device's physical block
    52  * size is called the media block size. The file system can set the block size
    53  * it uses to a larger multiple of the media block size. The driver must be
     53 * multiple of the disk device block size.  The disk device's physical block
     54 * size is called the media block size.  The file system can set the block size
     55 * it uses to a larger multiple of the media block size.  The driver must be
    5456 * able to handle buffers sizes larger than one media block.
    5557 *
    5658 * The user configures the amount of memory to be used as buffers in the cache,
    57  * and the minimum and maximum buffer size. The cache will allocate additional
    58  * memory for the buffer descriptors and groups. There are enough buffer
     59 * and the minimum and maximum buffer size.  The cache will allocate additional
     60 * memory for the buffer descriptors and groups.  There are enough buffer
    5961 * descriptors allocated so all the buffer memory can be used as minimum sized
    6062 * buffers.
    6163 *
    62  * The cache is a single pool of buffers. The buffer memory is divided into
     64 * The cache is a single pool of buffers.  The buffer memory is divided into
    6365 * groups where the size of buffer memory allocated to a group is the maximum
    64  * buffer size. A group's memory can be divided down into small buffer sizes
    65  * that are a multiple of 2 of the minimum buffer size. A group is the minumum
    66  * allocation unit for buffers of a specific size. If a buffer of maximum size
    67  * is request the group will have a single buffer. If a buffer of minium size
     66 * buffer size.  A group's memory can be divided down into small buffer sizes
     67 * that are a multiple of 2 of the minimum buffer size.  A group is the minimum
     68 * allocation unit for buffers of a specific size.  If a buffer of maximum size
     69 * is request the group will have a single buffer.  If a buffer of minimum size
    6870 * is requested the group is divided into minimum sized buffers and the
    69  * remaining buffers are held ready for use. A group keeps track of which
     71 * remaining buffers are held ready for use.  A group keeps track of which
    7072 * buffers are with a file system or driver and groups who have buffer in use
    71  * cannot be realloced. Groups with no buffers in use can be taken and
    72  * realloced to a new size. This is how buffers of different sizes move around
     73 * cannot be realloced.  Groups with no buffers in use can be taken and
     74 * realloced to a new size.  This is how buffers of different sizes move around
    7375 * the cache.
    7476
    75  * The buffers are held in various lists in the cache. All buffers follow this
     77 * The buffers are held in various lists in the cache.  All buffers follow this
    7678 * state machine:
    7779 *                                 
    7880 * @dot
    79  * digraph g {
    80  *   ready [label="Ready\nRead Ahead"];
    81  *   transfer [label="Transfer"];
    82  *   accessed [label="Accessed\nAccessed Modified"];
    83  *   modified [label="Modified\nSynchronized"];
    84  *   cached [label="Cached"];
    85  *   ready -> transfer [label="Read\nRead Ahead"];
    86  *   transfer -> ready [label="Read Ahead Complete"];
    87  *   ready -> accessed [label="Get"];
    88  *   transfer -> accessed [label="Read or Write\nComplete"];
    89  *   transfer -> cached [label="Read or Write\nComplete"];
    90  *   accessed -> cached [label="Release"];
    91  *   cached -> accessed [label="Get"];
    92  *   modified -> accessed [label="Get"];
    93  *   accessed -> modified [label="Modified"];
    94  *   accessed -> transfer [label="Swap"];
     81 * digraph state {
     82 *   e [label="EMPTY",style="filled",fillcolor="aquamarine"];
     83 *   f [label="FRESH",style="filled",fillcolor="seagreen"];
     84 *   c [label="CACHED",style="filled",fillcolor="chartreuse"];
     85 *   a [label="ACCESS",style="filled",fillcolor="royalblue"];
     86 *   am [label="ACCESS MODIFIED",style="filled",fillcolor="royalblue"];
     87 *   t [label="TRANSFER",style="filled",fillcolor="red"];
     88 *   s [label="SYNC",style="filled",fillcolor="red"];
     89 *   m [label="MODIFIED",style="filled",fillcolor="gold"];
     90 *   i [label="INITIAL"];
     91 *   
     92 *   legend_transfer [label="Transfer Wake-Up",fontcolor="red",shape="none"];
     93 *   legend_access [label="Access Wake-Up",fontcolor="royalblue",shape="none"];
     94 *   
     95 *   i -> e [label="Init"];
     96 *   e -> f [label="Buffer Recycle"];
     97 *   f -> a [label="Get"];
     98 *   f -> t [label="Read\nRead Ahead"];
     99 *   c -> e [label="Reallocate\nBlock Size Changed"];
     100 *   c -> a [label="Get\nRead"];
     101 *   c -> f [label="Buffer Recycle"];
     102 *   t -> c [label="Write Transfer Done\nRead Transfer Done\nRead Ahead Transfer Done",color="red",fontcolor="red"];
     103 *   m -> t [label="Swapout"];
     104 *   m -> s [label="Block Size Changed"];
     105 *   m -> am [label="Get\nRead"];
     106 *   a -> m [label="Release Modified",color="royalblue",fontcolor="royalblue"];
     107 *   a -> s [label="Sync",color="royalblue",fontcolor="royalblue"];
     108 *   a -> c [label="Release",color="royalblue",fontcolor="royalblue"];
     109 *   am -> m [label="Release\nRelease Modified",color="royalblue",fontcolor="royalblue"];
     110 *   am -> s [label="Sync",color="royalblue",fontcolor="royalblue"];
     111 *   s -> t [label="Swapout"];
    95112 * }
    96113 * @enddot
    97114 *         
    98  * Empty buffers are added to the ready list and removed from this queue when a
    99  * caller requests a buffer. This is referred to as getting a buffer in the
    100  * code and the event get in the state diagram. The buffer is assigned to a
    101  * block and inserted to the AVL based on the block/device key. If the block is
    102  * to be read by the user and not in the cache (ready) it is transfered from
    103  * the disk into memory. If no ready buffers exist the buffer is taken from the
    104  * LRU list. If no buffers are on the LRU list the modified list is check. If
    105  * no buffers are on the modified list the request blocks. If buffers are on
    106  * the modified list the buffers hold timer is expired and the swap out task
    107  * woken.
    108  *
    109  * A block being accessed is given to the file system layer and not accessable
    110  * to another requester until released back to the cache. The same goes to a
    111  * buffer in the transfer state. The transfer state means being read or
    112  * written. If the file system has modifed the block and releases it as
     115 * Empty or cached buffers are added to the LRU list and removed from this
     116 * queue when a caller requests a buffer.  This is referred to as getting a
     117 * buffer in the code and the event get in the state diagram.  The buffer is
     118 * assigned to a block and inserted to the AVL based on the block/device key.
     119 * If the block is to be read by the user and not in the cache it is transfered
     120 * from the disk into memory.  If no buffers are on the LRU list the modified
     121 * list is checked.  If buffers are on the modified the swap out task will be
     122 * woken.  The request blocks until a buffer is available for recycle. 
     123 *
     124 * A block being accessed is given to the file system layer and not accessible
     125 * to another requester until released back to the cache.  The same goes to a
     126 * buffer in the transfer state.  The transfer state means being read or
     127 * written.  If the file system has modifed the block and releases it as
    113128 * modified it placed on the cache's modified list and a hold timer
    114  * initialised. The buffer is held for the hold time before being written to
    115  * disk. Buffers are held for a configurable period of time on the modified
     129 * initialised.  The buffer is held for the hold time before being written to
     130 * disk.  Buffers are held for a configurable period of time on the modified
    116131 * list as a write sets the state to transfer and this locks the buffer out
    117  * from the file system until the write completes. Buffers are often accessed
     132 * from the file system until the write completes.  Buffers are often accessed
    118133 * and modified in a series of small updates so if sent to the disk when
    119134 * released as modified the user would have to block waiting until it had been
    120  * written. This would be a performance problem.
    121  *
    122  * The code performs mulitple block reads and writes. Multiple block reads or
    123  * read ahead increases performance with hardware that supports it. It also
    124  * helps with a large cache as the disk head movement is reduced. It how-ever
     135 * written.  This would be a performance problem.
     136 *
     137 * The code performs multiple block reads and writes. Multiple block reads or
     138 * read ahead increases performance with hardware that supports it.  It also
     139 * helps with a large cache as the disk head movement is reduced.  It however
    125140 * is a speculative operation so excessive use can remove valuable and needed
    126  * blocks from the cache. The get call knows if a read is a for the file system
    127  * or if it is a read ahead get. If the get is for a read ahead block and the
    128  * block is already in the cache or no ready buffers are available the read
    129  * ahead is stopped. The transfer occurs with the blocks so far. If a buffer is
    130  * in the read ahead state and release it is placed on the ready list rather
    131  * than the LRU list. This means these buffers are used before buffers used by
    132  * the file system.
     141 * blocks from the cache.
    133142 *
    134143 * The cache has the following lists of buffers:
    135  *  - @c ready: Empty buffers created when the pool is initialised.
    136  *  - @c modified: Buffers waiting to be written to disk.
    137  *  - @c sync: Buffers to be synced to disk.
    138  *  - @c lru: Accessed buffers released in least recently used order.
    139  *
    140  * The cache scans the ready list then the LRU list for a suitable buffer in
    141  * this order. A suitable buffer is one that matches the same allocation size
    142  * as the device the buffer is for. The a buffer's group has no buffers in use
    143  * with the file system or driver the group is reallocated. This means the
    144  * buffers in the group are invalidated, resized and placed on the ready queue.
    145  * There is a performance issue with this design. The reallocation of a group
    146  * may forced recently accessed buffers out of the cache when they should
    147  * not. The design should be change to have groups on a LRU list if they have
    148  * no buffers in use.
     144 *  - LRU: Accessed or transfered buffers released in least recently used
     145 *  order.  Empty buffers will be placed to the front.
     146 *  - Modified: Buffers waiting to be written to disk.
     147 *  - Sync: Buffers to be synchronized with the disk.
     148 *
     149 * A cache look-up will be performed to find a suitable buffer.  A suitable
     150 * buffer is one that matches the same allocation size as the device the buffer
     151 * is for.  The a buffer's group has no buffers in use with the file system or
     152 * driver the group is reallocated.  This means the buffers in the group are
     153 * invalidated, resized and placed on the LRU queue.  There is a performance
     154 * issue with this design.  The reallocation of a group may forced recently
     155 * accessed buffers out of the cache when they should not.  The design should be
     156 * change to have groups on a LRU list if they have no buffers in use.
    149157 *
    150158 * @{
     
    152160
    153161/**
    154  * State of a buffer in the cache.
     162 * State of a buffer of the cache.
    155163 */
    156164typedef enum
    157165{
    158   RTEMS_BDBUF_STATE_EMPTY = 0,            /**< Not in use. */
    159   RTEMS_BDBUF_STATE_READ_AHEAD = 1,       /**< Holds read ahead data only */
    160   RTEMS_BDBUF_STATE_CACHED = 2,           /**< In the cache and available */
    161   RTEMS_BDBUF_STATE_ACCESS = 3,           /**< The user has the buffer */
    162   RTEMS_BDBUF_STATE_MODIFIED = 4,         /**< In the cache but modified */
    163   RTEMS_BDBUF_STATE_ACCESS_MODIFIED = 5,  /**< With the user but modified */
    164   RTEMS_BDBUF_STATE_SYNC = 6,             /**< Requested to be sync'ed */
    165   RTEMS_BDBUF_STATE_TRANSFER = 7          /**< Being transferred to or from disk */
     166  /**
     167   * Not in the cache.  Not in a list.  Not in use.
     168   */
     169  RTEMS_BDBUF_STATE_EMPTY = 0,
     170
     171  /**
     172   * In the cache.  Not in a list.  In use by a get or read request.
     173   */
     174  RTEMS_BDBUF_STATE_FRESH,
     175
     176  /**
     177   * In the cache.  In the LRU list.  Not in use.
     178   */
     179  RTEMS_BDBUF_STATE_CACHED,          /**< In the cache and available */
     180
     181  /**
     182   * In the cache.  Not in a list.  In use by an upper layer.
     183   */
     184  RTEMS_BDBUF_STATE_ACCESS,
     185
     186  /**
     187   * In the cache.  Not in a list.  In use by an upper layer.
     188   */
     189  RTEMS_BDBUF_STATE_ACCESS_MODIFIED,
     190
     191  /**
     192   * In the cache.  In the modified list.  Not in use.
     193   */
     194  RTEMS_BDBUF_STATE_MODIFIED,
     195
     196  /**
     197   * In the cache.  In the sync list.  Not in use.
     198   */
     199  RTEMS_BDBUF_STATE_SYNC,
     200
     201  /**
     202   * In the cache.  Not in a list.  In use by the block device driver.
     203   */
     204  RTEMS_BDBUF_STATE_TRANSFER
    166205} rtems_bdbuf_buf_state;
    167206
     
    267306 * than this defined max. This stops thrashing in the cache.
    268307 */
    269 #define RTEMS_BDBUF_MAX_READ_AHEAD_BLOCKS_DEFAULT    32
     308#define RTEMS_BDBUF_MAX_READ_AHEAD_BLOCKS_DEFAULT    0
    270309
    271310/**
  • cpukit/libblock/include/rtems/blkdev.h

    rb5b51fc r6d612944  
    9999 * The block device request structure is used to read or write a number of
    100100 * blocks from or to the device.
     101 *
     102 * TODO: The use of these req blocks is not a great design. The req is a
     103 *       struct with a single 'bufs' declared in the req struct and the
     104 *       others are added in the outer level struct. This relies on the
     105 *       structs joining as a single array and that assumes the compiler
     106 *       packs the structs. Why not just place on a list ? The BD has a
     107 *       node that can be used.
    101108 */
    102109typedef struct rtems_blkdev_request {
  • cpukit/libblock/src/bdbuf.c

    rb5b51fc r6d612944  
    1919 *    Rewritten to remove score mutex access. Fixes many performance
    2020 *    issues.
     21 *
     22 * Copyright (c) 2009 embedded brains GmbH.
    2123 *
    2224 * @(#) bdbuf.c,v 1.14 2004/04/17 08:15:17 ralf Exp
     
    118120  rtems_bdbuf_buffer* tree;              /**< Buffer descriptor lookup AVL tree
    119121                                          * root. There is only one. */
    120   rtems_chain_control ready;             /**< Free buffers list, read-ahead, or
    121                                           * resized group buffers. */
    122122  rtems_chain_control lru;               /**< Least recently used list */
    123123  rtems_chain_control modified;          /**< Modified buffers list */
     
    143143  (((uint32_t)'B' << 24) | ((uint32_t)(n) & (uint32_t)0x00FFFFFF))
    144144
    145 #define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_1 RTEMS_BLKDEV_FATAL_ERROR(1)
    146 #define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_2 RTEMS_BLKDEV_FATAL_ERROR(2)
    147 #define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_3 RTEMS_BLKDEV_FATAL_ERROR(3)
    148 #define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_4 RTEMS_BLKDEV_FATAL_ERROR(4)
    149 #define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_5 RTEMS_BLKDEV_FATAL_ERROR(5)
    150 #define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_6 RTEMS_BLKDEV_FATAL_ERROR(6)
    151 #define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_7 RTEMS_BLKDEV_FATAL_ERROR(7)
    152 #define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_8 RTEMS_BLKDEV_FATAL_ERROR(8)
    153 #define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_9 RTEMS_BLKDEV_FATAL_ERROR(9)
     145#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_3      RTEMS_BLKDEV_FATAL_ERROR(1)
     146#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_4      RTEMS_BLKDEV_FATAL_ERROR(2)
     147#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_5      RTEMS_BLKDEV_FATAL_ERROR(3)
     148#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_6      RTEMS_BLKDEV_FATAL_ERROR(4)
     149#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_7      RTEMS_BLKDEV_FATAL_ERROR(5)
     150#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_8      RTEMS_BLKDEV_FATAL_ERROR(6)
     151#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_9      RTEMS_BLKDEV_FATAL_ERROR(7)
     152#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_10      RTEMS_BLKDEV_FATAL_ERROR(8)
     153#define RTEMS_BLKDEV_FATAL_BDBUF_CACHE_RM      RTEMS_BLKDEV_FATAL_ERROR(9)
    154154#define RTEMS_BLKDEV_FATAL_BDBUF_SWAPOUT       RTEMS_BLKDEV_FATAL_ERROR(10)
    155155#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK     RTEMS_BLKDEV_FATAL_ERROR(11)
     
    168168#define BLKDEV_FATAL_BDBUF_SWAPOUT_RE          RTEMS_BLKDEV_FATAL_ERROR(24)
    169169#define BLKDEV_FATAL_BDBUF_SWAPOUT_TS          RTEMS_BLKDEV_FATAL_ERROR(25)
     170#define RTEMS_BLKDEV_FATAL_BDBUF_WAIT_EVNT     RTEMS_BLKDEV_FATAL_ERROR(26)
     171#define RTEMS_BLKDEV_FATAL_BDBUF_RECYCLE       RTEMS_BLKDEV_FATAL_ERROR(27)
     172#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_0       RTEMS_BLKDEV_FATAL_ERROR(28)
     173#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_1       RTEMS_BLKDEV_FATAL_ERROR(29)
     174#define RTEMS_BLKDEV_FATAL_BDBUF_STATE_2       RTEMS_BLKDEV_FATAL_ERROR(30)
     175#define RTEMS_BLKDEV_FATAL_BDBUF_DISK_REL      RTEMS_BLKDEV_FATAL_ERROR(31)
    170176
    171177/**
     
    261267    total += bdbuf_cache.groups[group].users;
    262268  printf ("bdbuf:group users=%lu", total);
    263   val = rtems_bdbuf_list_count (&bdbuf_cache.ready);
    264   printf (", ready=%lu", val);
    265   total = val;
    266269  val = rtems_bdbuf_list_count (&bdbuf_cache.lru);
    267270  printf (", lru=%lu", val);
    268   total += val;
     271  total = val;
    269272  val = rtems_bdbuf_list_count (&bdbuf_cache.modified);
    270273  printf (", mod=%lu", val);
     
    286289{
    287290  const char* states[] =
    288     { "EM", "RA", "CH", "AC", "MD", "AM", "SY", "TR" };
     291    { "EM", "FR", "CH", "AC", "AM", "MD", "SY", "TR" };
    289292
    290293  printf ("bdbuf:users: %15s: [%" PRIu32 " (%s)] %td:%td = %" PRIu32 " %s\n",
     
    298301#else
    299302#define rtems_bdbuf_tracer (0)
    300 #define rtems_bdbuf_show_usage()
    301 #define rtems_bdbuf_show_users(_w, _b)
     303#define rtems_bdbuf_show_usage() ((void) 0)
     304#define rtems_bdbuf_show_users(_w, _b) ((void) 0)
    302305#endif
    303306
     
    310313#define RTEMS_BDBUF_AVL_MAX_HEIGHT (32)
    311314#endif
     315
     316static void
     317rtems_bdbuf_fatal (rtems_bdbuf_buf_state state, uint32_t error)
     318{
     319  rtems_fatal_error_occurred ((((uint32_t) state) << 16) | error);
     320}
    312321
    313322/**
     
    797806}
    798807
     808static void
     809rtems_bdbuf_set_state (rtems_bdbuf_buffer *bd, rtems_bdbuf_buf_state state)
     810{
     811  bd->state = state;
     812}
     813
    799814/**
    800815 * Change the block number for the block size to the block number for the media
     
    811826                         size_t            media_block_size)
    812827{
    813   return (((uint64_t) block) * block_size) / media_block_size;
     828  return (rtems_blkdev_bnum)
     829    ((((uint64_t) block) * block_size) / media_block_size);
    814830}
    815831
     
    881897}
    882898
     899static void
     900rtems_bdbuf_group_obtain (rtems_bdbuf_buffer *bd)
     901{
     902  ++bd->group->users;
     903}
     904
     905static void
     906rtems_bdbuf_group_release (rtems_bdbuf_buffer *bd)
     907{
     908  --bd->group->users;
     909}
     910
    883911static rtems_mode
    884912rtems_bdbuf_disable_preemption (void)
     
    907935 * Wait until woken. Semaphores are used so a number of tasks can wait and can
    908936 * be woken at once. Task events would require we maintain a list of tasks to
    909  * be woken and this would require storgage and we do not know the number of
     937 * be woken and this would require storage and we do not know the number of
    910938 * tasks that could be waiting.
    911939 *
     
    918946 * The function assumes the cache is locked on entry and it will be locked on
    919947 * exit.
    920  *
    921  * @param sema The semaphore to block on and wait.
    922  * @param waiters The wait counter for this semaphore.
    923  */
    924 static void
    925 rtems_bdbuf_wait (rtems_bdbuf_waiters* waiters)
     948 */
     949static void
     950rtems_bdbuf_anonymous_wait (rtems_bdbuf_waiters *waiters)
    926951{
    927952  rtems_status_code sc;
     
    965990}
    966991
     992static void
     993rtems_bdbuf_wait (rtems_bdbuf_buffer *bd, rtems_bdbuf_waiters *waiters)
     994{
     995  rtems_bdbuf_group_obtain (bd);
     996  ++bd->waiters;
     997  rtems_bdbuf_anonymous_wait (waiters);
     998  --bd->waiters;
     999  rtems_bdbuf_group_release (bd);
     1000}
     1001
    9671002/**
    9681003 * Wake a blocked resource. The resource has a counter that lets us know if
     
    9821017}
    9831018
    984 /**
    985  * Add a buffer descriptor to the modified list. This modified list is treated
    986  * a litte differently to the other lists. To access it you must have the cache
    987  * locked and this is assumed to be the case on entry to this call.
    988  *
    989  * If the cache has a device being sync'ed and the bd is for that device the
    990  * call must block and wait until the sync is over before adding the bd to the
    991  * modified list. Once a sync happens for a device no bd's can be added the
    992  * modified list. The disk image is forced to be snapshot at that moment in
    993  * time.
    994  *
    995  * @note Do not lower the group user count as the modified list is a user of
    996  * the buffer.
    997  *
    998  * @param bd The bd to queue to the cache's modified list.
    999  */
    1000 static void
    1001 rtems_bdbuf_append_modified (rtems_bdbuf_buffer* bd)
    1002 {
    1003   /*
    1004    * If the cache has a device being sync'ed check if this bd is for that
    1005    * device. If it is unlock the cache and block on the sync lock. Once we have
    1006    * the sync lock release it.
    1007    */
    1008   if (bdbuf_cache.sync_active && (bdbuf_cache.sync_device == bd->dev))
    1009   {
    1010     rtems_bdbuf_unlock_cache ();
    1011     /* Wait for the sync lock */
    1012     rtems_bdbuf_lock_sync ();
    1013     rtems_bdbuf_unlock_sync ();
    1014     rtems_bdbuf_lock_cache ();
    1015   }
    1016      
    1017   bd->state = RTEMS_BDBUF_STATE_MODIFIED;
    1018 
    1019   rtems_chain_append (&bdbuf_cache.modified, &bd->link);
    1020 }
    1021 
    1022 /**
    1023  * Wait the swapper task.
    1024  */
    10251019static void
    10261020rtems_bdbuf_wake_swapper (void)
     
    10321026}
    10331027
     1028static bool
     1029rtems_bdbuf_has_buffer_waiters (void)
     1030{
     1031  return bdbuf_cache.buffer_waiters.count;
     1032}
     1033
     1034static void
     1035rtems_bdbuf_add_to_modified_list_after_access (rtems_bdbuf_buffer *bd)
     1036{
     1037  if (bdbuf_cache.sync_active && bdbuf_cache.sync_device == bd->dev)
     1038  {
     1039    rtems_bdbuf_unlock_cache ();
     1040
     1041    /*
     1042     * Wait for the sync lock.
     1043     */
     1044    rtems_bdbuf_lock_sync ();
     1045
     1046    rtems_bdbuf_unlock_sync ();
     1047    rtems_bdbuf_lock_cache ();
     1048  }
     1049
     1050  /*
     1051   * Only the first modified release sets the timer and any further user
     1052   * accesses do not change the timer value which should move down. This
     1053   * assumes the user's hold of the buffer is much less than the time on the
     1054   * modified list. Resetting the timer on each access which could result in a
     1055   * buffer never getting to 0 and never being forced onto disk. This raises a
     1056   * difficult question. Is a snapshot of a block that is changing better than
     1057   * nothing being written ? We have tended to think we should hold changes for
     1058   * only a specific period of time even if still changing and get onto disk
     1059   * and letting the file system try and recover this position if it can.
     1060   */
     1061  if (bd->state == RTEMS_BDBUF_STATE_ACCESS)
     1062    bd->hold_timer = bdbuf_config.swap_block_hold;
     1063     
     1064  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_MODIFIED);
     1065
     1066  rtems_chain_append (&bdbuf_cache.modified, &bd->link);
     1067
     1068  if (bd->waiters)
     1069    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
     1070  else if (rtems_bdbuf_has_buffer_waiters ())
     1071    rtems_bdbuf_wake_swapper ();
     1072}
     1073
     1074static void
     1075rtems_bdbuf_add_to_lru_list_after_access (rtems_bdbuf_buffer *bd)
     1076{
     1077  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_CACHED);
     1078
     1079  rtems_chain_append (&bdbuf_cache.lru, &bd->link);
     1080
     1081  rtems_bdbuf_group_release (bd);
     1082
     1083  if (bd->waiters)
     1084    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
     1085  else
     1086    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
     1087}
     1088
     1089static void
     1090rtems_bdbuf_add_to_sync_list_after_access (rtems_bdbuf_buffer *bd)
     1091{
     1092  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
     1093
     1094  rtems_chain_append (&bdbuf_cache.sync, &bd->link);
     1095
     1096  if (bd->waiters)
     1097    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
     1098}
     1099
    10341100/**
    10351101 * Compute the number of BDs per group for a given buffer size.
     
    10431109  size_t bds_per_size;
    10441110 
    1045   if (size > rtems_bdbuf_configuration.buffer_max)
     1111  if (size > bdbuf_config.buffer_max)
    10461112    return 0;
    10471113 
     
    10561122}
    10571123
     1124static void
     1125rtems_bdbuf_remove_from_cache_and_lru_list (rtems_bdbuf_buffer *bd)
     1126{
     1127  switch (bd->state)
     1128  {
     1129    case RTEMS_BDBUF_STATE_EMPTY:
     1130      break;
     1131    case RTEMS_BDBUF_STATE_CACHED:
     1132      if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
     1133        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_3);
     1134      break;
     1135    default:
     1136      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_10);
     1137  }
     1138 
     1139  rtems_chain_extract (&bd->link);
     1140}
     1141
     1142static void
     1143rtems_bdbuf_make_empty_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
     1144{
     1145  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_EMPTY);
     1146  rtems_chain_prepend (&bdbuf_cache.lru, &bd->link);
     1147}
     1148
    10581149/**
    10591150 * Reallocate a group. The BDs currently allocated in the group are removed
     
    10631154 * @param group The group to reallocate.
    10641155 * @param new_bds_per_group The new count of BDs per group.
    1065  */
    1066 static void
     1156 * @return A buffer of this group.
     1157 */
     1158static rtems_bdbuf_buffer *
    10671159rtems_bdbuf_group_realloc (rtems_bdbuf_group* group, size_t new_bds_per_group)
    10681160{
     
    10811173       b < group->bds_per_group;
    10821174       b++, bd += bufs_per_bd)
    1083   {
    1084     switch (bd->state)
    1085     {
    1086       case RTEMS_BDBUF_STATE_EMPTY:
    1087         break;
    1088       case RTEMS_BDBUF_STATE_CACHED:
    1089       case RTEMS_BDBUF_STATE_READ_AHEAD:
    1090         if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
    1091           rtems_fatal_error_occurred ((((uint32_t) bd->state) << 16) |
    1092                                       RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_1);
    1093         break;
    1094       default:
    1095         rtems_fatal_error_occurred ((((uint32_t) bd->state) << 16) |
    1096                                     RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_8);
    1097     }
    1098    
    1099     rtems_chain_extract (&bd->link);
    1100   }
     1175    rtems_bdbuf_remove_from_cache_and_lru_list (bd);
    11011176 
    11021177  group->bds_per_group = new_bds_per_group;
    11031178  bufs_per_bd = bdbuf_cache.max_bds_per_group / new_bds_per_group;
    11041179 
    1105   for (b = 0, bd = group->bdbuf;
     1180  for (b = 1, bd = group->bdbuf + bufs_per_bd;
    11061181       b < group->bds_per_group;
    11071182       b++, bd += bufs_per_bd)
    1108   {
    1109     bd->state = RTEMS_BDBUF_STATE_EMPTY;
    1110     rtems_chain_prepend (&bdbuf_cache.ready, &bd->link);
    1111   }
    1112 }
    1113 
    1114 /**
    1115  * Get the next BD from the list. This call assumes the cache is locked.
    1116  *
    1117  * @param bds_per_group The number of BDs per block we are need.
    1118  * @param list The list to find the BD on.
    1119  * @return The next BD if found or NULL is none are available.
    1120  */
    1121 static rtems_bdbuf_buffer*
    1122 rtems_bdbuf_get_next_bd (size_t               bds_per_group,
    1123                          rtems_chain_control* list)
    1124 {
    1125   rtems_chain_node* node = rtems_chain_first (list);
    1126   while (!rtems_chain_is_tail (list, node))
    1127   {
    1128     rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
     1183    rtems_bdbuf_make_empty_and_add_to_lru_list (bd);
     1184
     1185  if (b > 1)
     1186    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
     1187
     1188  return group->bdbuf;
     1189}
     1190
     1191static void
     1192rtems_bdbuf_recycle_buffer (rtems_bdbuf_buffer *bd,
     1193                            dev_t               dev,
     1194                            rtems_blkdev_bnum   block)
     1195{
     1196  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_FRESH);
     1197
     1198  bd->dev       = dev;
     1199  bd->block     = block;
     1200  bd->avl.left  = NULL;
     1201  bd->avl.right = NULL;
     1202  bd->error     = 0;
     1203  bd->waiters   = 0;
     1204
     1205  if (rtems_bdbuf_avl_insert (&bdbuf_cache.tree, bd) != 0)
     1206    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_RECYCLE);
     1207}
     1208
     1209static rtems_bdbuf_buffer *
     1210rtems_bdbuf_get_buffer_from_lru_list (dev_t             dev,
     1211                                      rtems_blkdev_bnum block,
     1212                                      size_t            bds_per_group)
     1213{
     1214  rtems_chain_node *node = rtems_chain_first (&bdbuf_cache.lru);
     1215
     1216  while (!rtems_chain_is_tail (&bdbuf_cache.lru, node))
     1217  {
     1218    rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
     1219    rtems_bdbuf_buffer *recycle_bd = NULL;
    11291220
    11301221    if (rtems_bdbuf_tracer)
     
    11391230    if (bd->waiters == 0)
    11401231    {
    1141       /*
    1142        * If this bd is already part of a group that supports the same number of
    1143        * BDs per group return it. If the bd is part of another group check the
    1144        * number of users and if 0 we can take this group and resize it.
    1145        */
    11461232      if (bd->group->bds_per_group == bds_per_group)
    11471233      {
    1148         rtems_chain_extract (node);
    1149         return bd;
     1234        rtems_bdbuf_remove_from_cache_and_lru_list (bd);
     1235
     1236        recycle_bd = bd;
    11501237      }
    1151 
    1152       if (bd->group->users == 0)
    1153       {
    1154         /*
    1155          * We use the group to locate the start of the BDs for this group.
    1156          */
    1157         rtems_bdbuf_group_realloc (bd->group, bds_per_group);
    1158         bd = (rtems_bdbuf_buffer*) rtems_chain_get (&bdbuf_cache.ready);
    1159         return bd;
    1160       }
     1238      else if (bd->group->users == 0)
     1239        recycle_bd = rtems_bdbuf_group_realloc (bd->group, bds_per_group);
     1240    }
     1241
     1242    if (recycle_bd != NULL)
     1243    {
     1244      rtems_bdbuf_recycle_buffer (recycle_bd, dev, block);
     1245
     1246      return recycle_bd;
    11611247    }
    11621248
    11631249    node = rtems_chain_next (node);
    11641250  }
    1165  
     1251
    11661252  return NULL;
    11671253}
     
    12211307
    12221308  rtems_chain_initialize_empty (&bdbuf_cache.swapout_workers);
    1223   rtems_chain_initialize_empty (&bdbuf_cache.ready);
    12241309  rtems_chain_initialize_empty (&bdbuf_cache.lru);
    12251310  rtems_chain_initialize_empty (&bdbuf_cache.modified);
     
    12551340    goto error;
    12561341
    1257   sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'w'),
     1342  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'b'),
    12581343                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
    12591344                               &bdbuf_cache.buffer_waiters.sema);
     
    13081393       b++, bd++, buffer += bdbuf_config.buffer_min)
    13091394  {
    1310     bd->dev        = BDBUF_INVALID_DEV;
    1311     bd->group      = group;
    1312     bd->buffer     = buffer;
    1313     bd->avl.left   = NULL;
    1314     bd->avl.right  = NULL;
    1315     bd->state      = RTEMS_BDBUF_STATE_EMPTY;
    1316     bd->error      = 0;
    1317     bd->waiters    = 0;
    1318     bd->hold_timer = 0;
    1319     bd->references = 0;
    1320     bd->user       = NULL;
     1395    bd->dev    = BDBUF_INVALID_DEV;
     1396    bd->group  = group;
     1397    bd->buffer = buffer;
    13211398   
    1322     rtems_chain_append (&bdbuf_cache.ready, &bd->link);
     1399    rtems_chain_append (&bdbuf_cache.lru, &bd->link);
    13231400
    13241401    if ((b % bdbuf_cache.max_bds_per_group) ==
     
    13361413  {
    13371414    group->bds_per_group = bdbuf_cache.max_bds_per_group;
    1338     group->users = 0;
    13391415    group->bdbuf = bd;
    13401416  }
     
    13471423 
    13481424  sc = rtems_task_create (rtems_build_name('B', 'S', 'W', 'P'),
    1349                           (bdbuf_config.swapout_priority ?
    1350                            bdbuf_config.swapout_priority :
    1351                            RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT),
     1425                          bdbuf_config.swapout_priority ?
     1426                            bdbuf_config.swapout_priority :
     1427                            RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT,
    13521428                          SWAPOUT_TASK_STACK_SIZE,
    13531429                          RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
     
    13921468}
    13931469
    1394 static inline void
    1395 rtems_bdbuf_group_obtain (rtems_bdbuf_buffer *bd)
    1396 {
    1397   ++bd->group->users;
    1398 }
    1399 
    1400 static inline void
    1401 rtems_bdbuf_group_release (rtems_bdbuf_buffer *bd)
    1402 {
    1403   --bd->group->users;
    1404 }
    1405 
    1406 /**
    1407  * Get a buffer for this device and block. This function returns a buffer once
    1408  * placed into the AVL tree. If no buffer is available and it is not a read
    1409  * ahead request and no buffers are waiting to the written to disk wait until a
    1410  * buffer is available. If buffers are waiting to be written to disk and none
    1411  * are available expire the hold timer's of the queued buffers and wake the
    1412  * swap out task. If the buffer is for a read ahead transfer return NULL if
    1413  * there are no buffers available or the buffer is already in the cache.
    1414  *
    1415  * The AVL tree of buffers for the cache is searched and if not found obtain a
    1416  * buffer and insert it into the AVL tree. Buffers are first obtained from the
    1417  * ready list until all empty/ready buffers are used. Once all buffers are in
    1418  * use the LRU list is searched for a buffer of the same group size or a group
    1419  * that has no active buffers in use. A buffer taken from the LRU list is
    1420  * removed from the AVL tree and assigned the new block number. The ready or
    1421  * LRU list buffer is initialised to this device and block. If no buffers are
    1422  * available due to the ready and LRU lists being empty a check is made of the
    1423  * modified list. Buffers may be queued waiting for the hold timer to
    1424  * expire. These buffers should be written to disk and returned to the LRU list
    1425  * where they can be used. If buffers are on the modified list the max. write
    1426  * block size of buffers have their hold timer's expired and the swap out task
    1427  * woken. The caller then blocks on the waiting semaphore and counter. When
    1428  * buffers return from the upper layers (access) or lower driver (transfer) the
    1429  * blocked caller task is woken and this procedure is repeated. The repeat
    1430  * handles a case of a another thread pre-empting getting a buffer first and
    1431  * adding it to the AVL tree.
    1432  *
    1433  * A buffer located in the AVL tree means it is already in the cache and maybe
    1434  * in use somewhere. The buffer can be either:
    1435  *
    1436  * # Cached. Not being accessed or part of a media transfer.
    1437  * # Access or modifed access. Is with an upper layer being accessed.
    1438  * # Transfer. Is with the driver and part of a media transfer.
    1439  *
    1440  * If cached we assign the new state, extract it from any list it maybe part of
    1441  * and return to the user.
    1442  *
    1443  * This function assumes the cache the buffer is being taken from is locked and
    1444  * it will make sure the cache is locked when it returns. The cache will be
    1445  * unlocked if the call could block.
    1446  *
    1447  * Variable sized buffer is handled by groups. A group is the size of the
    1448  * maximum buffer that can be allocated. The group can size in multiples of the
    1449  * minimum buffer size where the mulitples are 1,2,4,8, etc. If the buffer is
    1450  * found in the AVL tree the number of BDs in the group is check and if
    1451  * different the buffer size for the block has changed. The buffer needs to be
    1452  * invalidated.
    1453  *
    1454  * The returned buffer will be a user its group.
    1455  *
    1456  * @param dd The disk device. Has the configured block size.
    1457  * @param bds_per_group The number of BDs in a group for this block.
    1458  * @param block Absolute media block number for the device
    1459  * @param read_ahead The get is for a read ahead buffer if true
    1460  * @return RTEMS status code (if operation completed successfully or error
    1461  *         code if error is occured)
    1462  */
    1463 static rtems_bdbuf_buffer*
    1464 rtems_bdbuf_get_buffer (rtems_disk_device* dd,
    1465                         size_t             bds_per_group,
    1466                         rtems_blkdev_bnum  block,
    1467                         bool               read_ahead)
    1468 {
    1469   dev_t               device = dd->dev;
    1470   rtems_bdbuf_buffer* bd;
    1471   bool                available;
    1472  
    1473   /*
    1474    * Loop until we get a buffer. Under load we could find no buffers are
    1475    * available requiring this task to wait until some become available before
    1476    * proceeding. There is no timeout. If this call is to block and the buffer
    1477    * is for a read ahead buffer return NULL. The read ahead is nice but not
    1478    * that important.
    1479    *
    1480    * The search procedure is repeated as another thread could have pre-empted
    1481    * us while we waited for a buffer, obtained an empty buffer and loaded the
    1482    * AVL tree with the one we are after. In this case we move down and wait for
    1483    * the buffer to return to the cache.
    1484    */
    1485   do
    1486   {
    1487     /*
    1488      * Search for buffer descriptor for this dev/block key.
    1489      */
    1490     bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, device, block);
    1491 
    1492     /*
    1493      * No buffer in the cache for this block. We need to obtain a buffer and
    1494      * this means take a buffer that is ready to use. If all buffers are in use
    1495      * take the least recently used buffer. If there are none then the cache is
    1496      * empty. All the buffers are either queued to be written to disk or with
    1497      * the user. We cannot do much with the buffers with the user how-ever with
    1498      * the modified buffers waiting to be written to disk flush the maximum
    1499      * number transfered in a block to disk. After this all that can be done is
    1500      * to wait for a buffer to return to the cache.
    1501      */
    1502     if (!bd)
    1503     {
    1504       /*
    1505        * Assign new buffer descriptor from the ready list if one is present. If
    1506        * the ready queue is empty get the oldest buffer from LRU list. If the
    1507        * LRU list is empty there are no available buffers check the modified
    1508        * list.
    1509        */
    1510       bd = rtems_bdbuf_get_next_bd (bds_per_group, &bdbuf_cache.ready);
    1511 
    1512       if (!bd)
    1513       {
    1514         /*
    1515          * No unused or read-ahead buffers.
    1516          *
    1517          * If this is a read ahead buffer just return. No need to place further
    1518          * pressure on the cache by reading something that may be needed when
    1519          * we have data in the cache that was needed and may still be in the
    1520          * future.
    1521          */
    1522         if (read_ahead)
    1523           return NULL;
    1524 
    1525         /*
    1526          * Check the LRU list.
    1527          */
    1528         bd = rtems_bdbuf_get_next_bd (bds_per_group, &bdbuf_cache.lru);
    1529        
    1530         if (bd)
    1531         {
    1532           /*
    1533            * Remove the buffer from the AVL tree if the state says it is in the
    1534            * cache or a read ahead buffer. The buffer could be in the empty
    1535            * state as a result of reallocations.
    1536            */
    1537           switch (bd->state)
    1538           {
    1539             case RTEMS_BDBUF_STATE_CACHED:
    1540             case RTEMS_BDBUF_STATE_READ_AHEAD:
    1541               if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
    1542                 rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_2);
    1543               break;
    1544             default:
    1545               break;
    1546           }
    1547         }
     1470static void
     1471rtems_bdbuf_wait_for_event (rtems_event_set event)
     1472{
     1473  rtems_status_code sc = RTEMS_SUCCESSFUL;
     1474  rtems_event_set   out = 0;
     1475 
     1476  sc = rtems_event_receive (event,
     1477                            RTEMS_EVENT_ALL | RTEMS_WAIT,
     1478                            RTEMS_NO_TIMEOUT,
     1479                            &out);
     1480
     1481  if (sc != RTEMS_SUCCESSFUL || out != event)
     1482    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_WAIT_EVNT);
     1483}
     1484
     1485static void
     1486rtems_bdbuf_wait_for_access (rtems_bdbuf_buffer *bd)
     1487{
     1488  while (true)
     1489  {
     1490    switch (bd->state)
     1491    {
     1492      case RTEMS_BDBUF_STATE_FRESH:
     1493        return;
     1494      case RTEMS_BDBUF_STATE_MODIFIED:
     1495        rtems_bdbuf_group_release (bd);
     1496        /* Fall through */
     1497      case RTEMS_BDBUF_STATE_CACHED:
     1498        rtems_chain_extract (&bd->link);
     1499        return;
     1500      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
     1501      case RTEMS_BDBUF_STATE_ACCESS:
     1502        rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
     1503        break;
     1504      case RTEMS_BDBUF_STATE_TRANSFER:
     1505      case RTEMS_BDBUF_STATE_SYNC:
     1506        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
     1507        break;
     1508      default:
     1509        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_7);
     1510    }
     1511  }
     1512}
     1513
     1514static void
     1515rtems_bdbuf_request_sync_for_modified_buffer (rtems_bdbuf_buffer *bd)
     1516{
     1517  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
     1518  rtems_chain_extract (&bd->link);
     1519  rtems_chain_append (&bdbuf_cache.sync, &bd->link);
     1520  rtems_bdbuf_wake_swapper ();
     1521}
     1522
     1523/**
     1524 * @brief Waits until the buffer is ready for recycling.
     1525 *
     1526 * @retval @c true Buffer is valid and may be recycled.
     1527 * @retval @c false Buffer is invalid and has to searched again.
     1528 */
     1529static bool
     1530rtems_bdbuf_wait_for_recycle (rtems_bdbuf_buffer *bd)
     1531{
     1532  while (true)
     1533  {
     1534    switch (bd->state)
     1535    {
     1536      case RTEMS_BDBUF_STATE_EMPTY:
     1537        return true;
     1538      case RTEMS_BDBUF_STATE_MODIFIED:
     1539        rtems_bdbuf_request_sync_for_modified_buffer (bd);
     1540        break;
     1541      case RTEMS_BDBUF_STATE_CACHED:
     1542        if (bd->waiters == 0)
     1543          return true;
    15481544        else
    15491545        {
    15501546          /*
    1551            * If there are buffers on the modified list expire the hold timer
    1552            * and wake the swap out task then wait else just go and wait.
    1553            *
    1554            * The check for an empty list is made so the swapper is only woken
    1555            * when if timers are changed.
     1547           * It is essential that we wait here without a special wait count and
     1548           * without the group in use.  Otherwise we could trigger a wait ping
     1549           * pong with another recycle waiter.  The state of the buffer is
     1550           * arbitrary afterwards.
    15561551           */
    1557           if (!rtems_chain_is_empty (&bdbuf_cache.modified))
    1558           {
    1559             rtems_chain_node* node = rtems_chain_first (&bdbuf_cache.modified);
    1560             uint32_t          write_blocks = 0;
    1561            
    1562             while ((write_blocks < bdbuf_config.max_write_blocks) &&
    1563                    !rtems_chain_is_tail (&bdbuf_cache.modified, node))
    1564             {
    1565               rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
    1566               bd->hold_timer = 0;
    1567               write_blocks++;
    1568               node = rtems_chain_next (node);
    1569             }
    1570 
    1571             rtems_bdbuf_wake_swapper ();
    1572           }
    1573          
    1574           /*
    1575            * Wait for a buffer to be returned to the cache. The buffer will be
    1576            * placed on the LRU list.
    1577            */
    1578           rtems_bdbuf_wait (&bdbuf_cache.buffer_waiters);
     1552          rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
     1553          return false;
    15791554        }
    1580       }
    1581       else
    1582       {
    1583         /*
    1584          * We have a new buffer for this block.
    1585          */
    1586         if ((bd->state != RTEMS_BDBUF_STATE_EMPTY) &&
    1587             (bd->state != RTEMS_BDBUF_STATE_READ_AHEAD))
    1588           rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_3);
    1589 
    1590         if (bd->state == RTEMS_BDBUF_STATE_READ_AHEAD)
    1591         {
    1592           if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
    1593             rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_4);
    1594         }
    1595       }
    1596 
    1597       if (bd)
    1598       {
    1599         bd->dev       = device;
    1600         bd->block     = block;
    1601         bd->avl.left  = NULL;
    1602         bd->avl.right = NULL;
    1603         bd->state     = RTEMS_BDBUF_STATE_EMPTY;
    1604         bd->error     = 0;
    1605         bd->waiters   = 0;
    1606 
    1607         if (rtems_bdbuf_avl_insert (&bdbuf_cache.tree, bd) != 0)
    1608           rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_5);
    1609 
    1610         rtems_bdbuf_group_obtain (bd);
    1611 
    1612         return bd;
    1613       }
    1614     }
    1615     else
    1616     {
    1617       /*
    1618        * We have the buffer for the block from the cache. Check if the buffer
    1619        * in the cache is the same size and the requested size we are after.
    1620        */
     1555      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
     1556      case RTEMS_BDBUF_STATE_ACCESS:
     1557        rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
     1558        break;
     1559      case RTEMS_BDBUF_STATE_TRANSFER:
     1560      case RTEMS_BDBUF_STATE_SYNC:
     1561        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
     1562        break;
     1563      default:
     1564        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_8);
     1565    }
     1566  }
     1567
     1568  return true;
     1569}
     1570
     1571static void
     1572rtems_bdbuf_wait_for_sync_done (rtems_bdbuf_buffer *bd)
     1573{
     1574  while (true)
     1575  {
     1576    switch (bd->state)
     1577    {
     1578      case RTEMS_BDBUF_STATE_CACHED:
     1579      case RTEMS_BDBUF_STATE_MODIFIED:
     1580      case RTEMS_BDBUF_STATE_ACCESS:
     1581      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
     1582        return;
     1583      case RTEMS_BDBUF_STATE_SYNC:
     1584      case RTEMS_BDBUF_STATE_TRANSFER:
     1585        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
     1586        break;
     1587      default:
     1588        rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_9);
     1589    }
     1590  }
     1591}
     1592
     1593static void
     1594rtems_bdbuf_wait_for_buffer (void)
     1595{
     1596  if (!rtems_chain_is_empty (&bdbuf_cache.modified))
     1597    rtems_bdbuf_wake_swapper ();
     1598 
     1599  rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
     1600}
     1601
     1602static rtems_bdbuf_buffer *
     1603rtems_bdbuf_get_buffer_for_read_ahead (dev_t             dev,
     1604                                       rtems_blkdev_bnum block,
     1605                                       size_t            bds_per_group)
     1606{
     1607  rtems_bdbuf_buffer *bd = NULL;
     1608 
     1609  bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dev, block);
     1610
     1611  if (bd == NULL)
     1612  {
     1613    bd = rtems_bdbuf_get_buffer_from_lru_list (dev, block, bds_per_group);
     1614
     1615    if (bd != NULL)
     1616      rtems_bdbuf_group_obtain (bd);
     1617  }
     1618  else
     1619    /*
     1620     * The buffer is in the cache.  So it is already available or in use, and
     1621     * thus no need for a read ahead.
     1622     */
     1623    bd = NULL;
     1624
     1625  return bd;
     1626}
     1627
     1628static rtems_bdbuf_buffer *
     1629rtems_bdbuf_get_buffer_for_access (dev_t             dev,
     1630                                   rtems_blkdev_bnum block,
     1631                                   size_t            bds_per_group)
     1632{
     1633  rtems_bdbuf_buffer *bd = NULL;
     1634 
     1635  do
     1636  {
     1637    bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dev, block);
     1638
     1639    if (bd != NULL)
     1640    {
    16211641      if (bd->group->bds_per_group != bds_per_group)
    16221642      {
    1623         /*
    1624          * Remove the buffer from the AVL tree.
    1625          */
    1626         if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
    1627           rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_2);
    1628         bd->state = RTEMS_BDBUF_STATE_EMPTY;
    1629         rtems_chain_extract (&bd->link);
    1630         rtems_chain_prepend (&bdbuf_cache.ready, &bd->link);
     1643        if (rtems_bdbuf_wait_for_recycle (bd))
     1644        {
     1645          rtems_bdbuf_remove_from_cache_and_lru_list (bd);
     1646          rtems_bdbuf_make_empty_and_add_to_lru_list (bd);
     1647          rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
     1648        }
    16311649        bd = NULL;
    16321650      }
    16331651    }
    1634   }
    1635   while (!bd);
    1636 
    1637   /*
    1638    * If the buffer is for read ahead and it exists in the AVL cache or is being
    1639    * accessed or being transfered then return NULL stopping further read ahead
    1640    * requests.
    1641    */
    1642   if (read_ahead)
    1643     return NULL;
    1644 
    1645   /*
    1646    * Before we wait for the buffer, we have to obtain its group.  This prevents
    1647    * a reallocation.
    1648    */
     1652    else
     1653    {
     1654      bd = rtems_bdbuf_get_buffer_from_lru_list (dev, block, bds_per_group);
     1655
     1656      if (bd == NULL)
     1657        rtems_bdbuf_wait_for_buffer ();
     1658    }
     1659  }
     1660  while (bd == NULL);
     1661
     1662  rtems_bdbuf_wait_for_access (bd);
    16491663  rtems_bdbuf_group_obtain (bd);
    16501664
    1651   /*
    1652    * Loop waiting for the buffer to enter the cached state. If the buffer is in
    1653    * the access or transfer state then wait until it is not.
    1654    */
    1655   available = false;
    1656   while (!available)
    1657   {
    1658     switch (bd->state)
    1659     {
    1660       case RTEMS_BDBUF_STATE_CACHED:
    1661       case RTEMS_BDBUF_STATE_MODIFIED:
    1662       case RTEMS_BDBUF_STATE_READ_AHEAD:
    1663         available = true;
    1664         break;
    1665 
    1666       case RTEMS_BDBUF_STATE_ACCESS:
    1667       case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
    1668         bd->waiters++;
    1669         rtems_bdbuf_wait (&bdbuf_cache.access_waiters);
    1670         bd->waiters--;
    1671         break;
    1672 
    1673       case RTEMS_BDBUF_STATE_SYNC:
    1674       case RTEMS_BDBUF_STATE_TRANSFER:
    1675         bd->waiters++;
    1676         rtems_bdbuf_wait (&bdbuf_cache.transfer_waiters);
    1677         bd->waiters--;
    1678         break;
    1679 
    1680       default:
    1681         rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_6);
    1682     }
    1683   }
    1684 
    1685   /*
    1686    * Buffer is linked to the LRU, modifed, or sync lists. Remove it from there.
    1687    */
    1688   rtems_chain_extract (&bd->link);
    1689 
    1690   /*
    1691    * The modified list is no longer a user of the buffers group.
    1692    */
    1693   if (bd->state == RTEMS_BDBUF_STATE_MODIFIED)
    1694     rtems_bdbuf_group_release (bd);
    1695 
    16961665  return bd;
    16971666}
    16981667
    1699 rtems_status_code
    1700 rtems_bdbuf_get (dev_t                device,
    1701                  rtems_blkdev_bnum    block,
    1702                  rtems_bdbuf_buffer** bdp)
    1703 {
    1704   rtems_disk_device*  dd;
    1705   rtems_bdbuf_buffer* bd;
    1706   rtems_blkdev_bnum   media_block;
    1707   size_t              bds_per_group;
     1668static rtems_status_code
     1669rtems_bdbuf_obtain_disk (dev_t               dev,
     1670                         rtems_blkdev_bnum   block,
     1671                         rtems_disk_device **dd_ptr,
     1672                         rtems_blkdev_bnum  *media_block_ptr,
     1673                         size_t             *bds_per_group_ptr)
     1674{
     1675  rtems_disk_device *dd = NULL;
    17081676
    17091677  if (!bdbuf_cache.initialised)
     
    17131681   * Do not hold the cache lock when obtaining the disk table.
    17141682   */
    1715   dd = rtems_disk_obtain (device);
    1716   if (!dd)
     1683  dd = rtems_disk_obtain (dev);
     1684  if (dd == NULL)
    17171685    return RTEMS_INVALID_ID;
    17181686
    1719   /*
    1720    * Compute the media block number. Drivers work with media block number not
    1721    * the block number a BD may have as this depends on the block size set by
    1722    * the user.
    1723    */
    1724   media_block = rtems_bdbuf_media_block (block,
    1725                                          dd->block_size,
    1726                                          dd->media_block_size);
    1727   if (media_block >= dd->size)
    1728   {
    1729     rtems_disk_release(dd);
    1730     return RTEMS_INVALID_NUMBER;
    1731   }
    1732 
    1733   bds_per_group = rtems_bdbuf_bds_per_group (dd->block_size);
    1734   if (!bds_per_group)
    1735   {
    1736     rtems_disk_release (dd);
    1737     return RTEMS_INVALID_NUMBER;
    1738   }
    1739 
    1740   media_block += dd->start;
     1687  *dd_ptr = dd;
     1688
     1689  if (media_block_ptr != NULL)
     1690  {
     1691    /*
     1692     * Compute the media block number. Drivers work with media block number not
     1693     * the block number a BD may have as this depends on the block size set by
     1694     * the user.
     1695     */
     1696    rtems_blkdev_bnum mb = rtems_bdbuf_media_block (block,
     1697                                                    dd->block_size,
     1698                                                    dd->media_block_size);
     1699    if (mb >= dd->size)
     1700    {
     1701      rtems_disk_release(dd);
     1702      return RTEMS_INVALID_NUMBER;
     1703    }
     1704
     1705    *media_block_ptr = mb + dd->start;
     1706  }
     1707
     1708  if (bds_per_group_ptr != NULL)
     1709  {
     1710    size_t bds_per_group = rtems_bdbuf_bds_per_group (dd->block_size);
     1711
     1712    if (bds_per_group == 0)
     1713    {
     1714      rtems_disk_release (dd);
     1715      return RTEMS_INVALID_NUMBER;
     1716    }
     1717
     1718    *bds_per_group_ptr = bds_per_group;
     1719  }
     1720
     1721  return RTEMS_SUCCESSFUL;
     1722}
     1723
     1724static void
     1725rtems_bdbuf_release_disk (rtems_disk_device *dd)
     1726{
     1727  rtems_status_code sc = RTEMS_SUCCESSFUL;
     1728
     1729  sc = rtems_disk_release (dd);
     1730  if (sc != RTEMS_SUCCESSFUL)
     1731    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_DISK_REL);
     1732}
     1733
     1734rtems_status_code
     1735rtems_bdbuf_get (dev_t                dev,
     1736                 rtems_blkdev_bnum    block,
     1737                 rtems_bdbuf_buffer **bd_ptr)
     1738{
     1739  rtems_status_code   sc = RTEMS_SUCCESSFUL;
     1740  rtems_disk_device  *dd = NULL;
     1741  rtems_bdbuf_buffer *bd = NULL;
     1742  rtems_blkdev_bnum   media_block = 0;
     1743  size_t              bds_per_group = 0;
     1744
     1745  sc = rtems_bdbuf_obtain_disk (dev, block, &dd, &media_block, &bds_per_group);
     1746  if (sc != RTEMS_SUCCESSFUL)
     1747    return sc;
    17411748
    17421749  rtems_bdbuf_lock_cache ();
     
    17471754  if (rtems_bdbuf_tracer)
    17481755    printf ("bdbuf:get: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
    1749             media_block, block, (unsigned) device);
    1750 
    1751   bd = rtems_bdbuf_get_buffer (dd, bds_per_group, media_block, false);
    1752 
    1753   /*
    1754    * To get a modified buffer could be considered a bug in the caller because
    1755    * you should not be getting an already modified buffer but user may have
    1756    * modified a byte in a block then decided to seek the start and write the
    1757    * whole block and the file system will have no record of this so just gets
    1758    * the block to fill.
    1759    */
    1760   if (bd->state != RTEMS_BDBUF_STATE_MODIFIED)
    1761     bd->state = RTEMS_BDBUF_STATE_ACCESS;
    1762   else
    1763     bd->state = RTEMS_BDBUF_STATE_ACCESS_MODIFIED;
     1756            media_block, block, (unsigned) dev);
     1757
     1758  bd = rtems_bdbuf_get_buffer_for_access (dev, media_block, bds_per_group);
     1759
     1760  switch (bd->state)
     1761  {
     1762    case RTEMS_BDBUF_STATE_CACHED:
     1763    case RTEMS_BDBUF_STATE_FRESH:
     1764      rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS);
     1765      break;
     1766    case RTEMS_BDBUF_STATE_MODIFIED:
     1767      /*
     1768       * To get a modified buffer could be considered a bug in the caller
     1769       * because you should not be getting an already modified buffer but user
     1770       * may have modified a byte in a block then decided to seek the start and
     1771       * write the whole block and the file system will have no record of this
     1772       * so just gets the block to fill.
     1773       */
     1774      rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
     1775      break;
     1776    default:
     1777      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_2);
     1778      break;
     1779  }
    17641780 
    17651781  if (rtems_bdbuf_tracer)
     
    17711787  rtems_bdbuf_unlock_cache ();
    17721788
    1773   rtems_disk_release(dd);
    1774 
    1775   *bdp = bd;
     1789  rtems_bdbuf_release_disk (dd);
     1790
     1791  *bd_ptr = bd;
    17761792
    17771793  return RTEMS_SUCCESSFUL;
     
    17991815}
    18001816
    1801 rtems_status_code
    1802 rtems_bdbuf_read (dev_t                device,
    1803                   rtems_blkdev_bnum    block,
    1804                   rtems_bdbuf_buffer** bdp)
    1805 {
    1806   rtems_disk_device*    dd;
    1807   rtems_bdbuf_buffer*   bd = NULL;
    1808   uint32_t              read_ahead_count;
    1809   rtems_blkdev_request* req;
    1810   size_t                bds_per_group;
    1811   rtems_blkdev_bnum     media_block;
    1812   rtems_blkdev_bnum     media_block_count;
    1813   bool                  read_ahead = false;
    1814  
    1815   if (!bdbuf_cache.initialised)
    1816     return RTEMS_NOT_CONFIGURED;
    1817 
    1818   /*
    1819    * @todo This type of request structure is wrong and should be removed.
    1820    */
    1821 #define bdbuf_alloc(size) __builtin_alloca (size)
    1822 
    1823   req = bdbuf_alloc (sizeof (rtems_blkdev_request) +
    1824                      (sizeof ( rtems_blkdev_sg_buffer) *
    1825                       rtems_bdbuf_configuration.max_read_ahead_blocks));
    1826 
    1827   /*
    1828    * Do not hold the cache lock when obtaining the disk table.
    1829    */
    1830   dd = rtems_disk_obtain (device);
    1831   if (!dd)
    1832     return RTEMS_INVALID_ID;
    1833  
    1834   /*
    1835    * Compute the media block number. Drivers work with media block number not
    1836    * the block number a BD may have as this depends on the block size set by
    1837    * the user.
    1838    */
    1839   media_block = rtems_bdbuf_media_block (block,
    1840                                          dd->block_size,
    1841                                          dd->media_block_size);
    1842   if (media_block >= dd->size)
    1843   {
    1844     rtems_disk_release(dd);
    1845     return RTEMS_INVALID_NUMBER;
    1846   }
    1847  
    1848   bds_per_group = rtems_bdbuf_bds_per_group (dd->block_size);
    1849   if (!bds_per_group)
    1850   {
    1851     rtems_disk_release (dd);
    1852     return RTEMS_INVALID_NUMBER;
    1853   }
    1854  
    1855   /*
    1856    * Print the block index relative to the physical disk and the user block
    1857    * number
    1858    */
    1859   if (rtems_bdbuf_tracer)
    1860     printf ("bdbuf:read: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
    1861             media_block + dd->start, block, (unsigned) device);
    1862 
    1863   /*
    1864    * Read the block plus the required number of blocks ahead. The number of
    1865    * blocks to read ahead is configured by the user and limited by the size of
    1866    * the disk or reaching a read ahead block that is also cached.
    1867    *
    1868    * Limit the blocks read by the size of the disk.
    1869    */
    1870   if ((rtems_bdbuf_configuration.max_read_ahead_blocks + media_block) < dd->size)
    1871     read_ahead_count = rtems_bdbuf_configuration.max_read_ahead_blocks;
    1872   else
    1873     read_ahead_count = dd->size - media_block;
    1874 
    1875   media_block_count = dd->block_size / dd->media_block_size;
    1876  
     1817static void
     1818rtems_bdbuf_create_read_request (rtems_blkdev_request *req,
     1819                                 rtems_disk_device    *dd,
     1820                                 rtems_blkdev_bnum     media_block,
     1821                                 size_t                bds_per_group)
     1822{
     1823  rtems_bdbuf_buffer *bd = NULL;
     1824  rtems_blkdev_bnum   media_block_end = dd->start + dd->size;
     1825  rtems_blkdev_bnum   media_block_count = dd->block_size / dd->media_block_size;
     1826  dev_t               dev = dd->dev;
     1827  uint32_t            block_size = dd->block_size;
     1828  uint32_t            transfer_index = 1;
     1829  uint32_t            transfer_count = bdbuf_config.max_read_ahead_blocks + 1;
     1830
     1831  if (media_block_end - media_block < transfer_count)
     1832    transfer_count = media_block_end - media_block;
     1833
     1834  bd = rtems_bdbuf_get_buffer_for_access (dev, media_block, bds_per_group);
     1835
    18771836  req->bufnum = 0;
    18781837
    1879   rtems_bdbuf_lock_cache ();
    1880 
    1881   while (req->bufnum < read_ahead_count)
    1882   {
    1883     /*
    1884      * Get the buffer for the requested block. If the block is cached then
    1885      * return it. If it is not cached transfer the block from the disk media
    1886      * into memory.
    1887      *
    1888      * We need to clean up any buffers allocated and not passed back to the
    1889      * caller.
    1890      */
    1891     bd = rtems_bdbuf_get_buffer (dd, bds_per_group, media_block + dd->start,
    1892                                  read_ahead);
    1893 
    1894     /*
    1895      * Read ahead buffer is in the cache or none available. Read what we
    1896      * can.
    1897      */
    1898     if (!bd)
     1838  req->bufs [0].user   = bd;
     1839  req->bufs [0].block  = media_block;
     1840  req->bufs [0].length = block_size;
     1841  req->bufs [0].buffer = bd->buffer;
     1842
     1843  switch (bd->state)
     1844  {
     1845    case RTEMS_BDBUF_STATE_CACHED:
     1846    case RTEMS_BDBUF_STATE_MODIFIED:
     1847      return;
     1848    case RTEMS_BDBUF_STATE_FRESH:
    18991849      break;
    1900 
    1901     /*
    1902      * Is the block we are interested in the cache ?
    1903      */
    1904     if ((bd->state == RTEMS_BDBUF_STATE_CACHED) ||
    1905         (bd->state == RTEMS_BDBUF_STATE_MODIFIED))
    1906     {
    1907       if (read_ahead)
    1908         rtems_bdbuf_group_release (bd);
    1909 
     1850    default:
     1851      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_1);
    19101852      break;
    1911     }
    1912 
    1913     bd->state = RTEMS_BDBUF_STATE_TRANSFER;
    1914     bd->error = 0;
     1853  }
     1854
     1855  while (transfer_index < transfer_count)
     1856  {
     1857    rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
    19151858
    19161859    if (rtems_bdbuf_tracer)
    19171860      rtems_bdbuf_show_users ("reading", bd);
     1861
     1862    media_block += media_block_count;
     1863
     1864    bd = rtems_bdbuf_get_buffer_for_read_ahead (dev, media_block,
     1865                                                bds_per_group);
     1866
     1867    if (bd == NULL)
     1868      break;
    19181869   
    1919     /*
    1920      * @todo The use of these req blocks is not a great design. The req is a
    1921      *       struct with a single 'bufs' declared in the req struct and the
    1922      *       others are added in the outer level struct. This relies on the
    1923      *       structs joining as a single array and that assumes the compiler
    1924      *       packs the structs. Why not just place on a list ? The BD has a
    1925      *       node that can be used.
    1926      */
    1927     req->bufs[req->bufnum].user   = bd;
    1928     req->bufs[req->bufnum].block  = media_block + dd->start;
    1929     req->bufs[req->bufnum].length = dd->block_size;
    1930     req->bufs[req->bufnum].buffer = bd->buffer;
    1931     req->bufnum++;
    1932 
    1933     /*
    1934      * Move the media block count by the number of media blocks in the
    1935      * disk device's set block size.
    1936      */
    1937     media_block += media_block_count;
    1938 
    1939     /*
    1940      * After the first buffer we have read ahead buffers.
    1941      */
    1942     read_ahead = true;
    1943   }
    1944 
    1945   /*
    1946    * Transfer any requested buffers. If the request count is 0 we have found
    1947    * the block in the cache so return it.
    1948    */
     1870    req->bufs [transfer_index].user   = bd;
     1871    req->bufs [transfer_index].block  = media_block;
     1872    req->bufs [transfer_index].length = block_size;
     1873    req->bufs [transfer_index].buffer = bd->buffer;
     1874
     1875    ++transfer_index;
     1876  }
     1877
     1878  req->bufnum = transfer_index;
     1879}
     1880
     1881static rtems_bdbuf_buffer *
     1882rtems_bdbuf_execute_read_request (rtems_blkdev_request *req,
     1883                                  rtems_disk_device    *dd)
     1884{
    19491885  if (req->bufnum)
    19501886  {
     
    19601896     * of the bdbuf code.
    19611897     */
    1962     rtems_event_set out;
    1963     int             result;
    1964     uint32_t        b;
    1965     bool            wake_transfer;
    1966 
    1967     /*
    1968      * Flush any events.
    1969      */
    1970     rtems_event_receive (RTEMS_BDBUF_TRANSFER_SYNC,
    1971                          RTEMS_EVENT_ALL | RTEMS_NO_WAIT,
    1972                          0, &out);
     1898    int      result = 0;
     1899    int      error = 0;
     1900    uint32_t transfer_index = 0;
     1901    bool     wake_transfer = false;
     1902    bool     wake_buffer = false;
    19731903                         
    19741904    rtems_bdbuf_unlock_cache ();
     
    19831913    result = dd->ioctl (dd, RTEMS_BLKIO_REQUEST, req);
    19841914
    1985     /*
    1986      * Inspection of the DOS FS code shows the result from this function is
    1987      * handled and a buffer must be returned.
    1988      */
    1989     if (result < 0)
    1990     {
    1991       req->error = errno;
    1992       req->status = RTEMS_IO_ERROR;
     1915    if (result == 0)
     1916    {
     1917      rtems_bdbuf_wait_for_event (RTEMS_BDBUF_TRANSFER_SYNC);
     1918      error = req->error;
    19931919    }
    19941920    else
    1995     {
    1996       rtems_status_code sc;
    1997      
    1998       sc = rtems_event_receive (RTEMS_BDBUF_TRANSFER_SYNC,
    1999                                 RTEMS_EVENT_ALL | RTEMS_WAIT,
    2000                                 0, &out);
    2001 
    2002       if (sc != RTEMS_SUCCESSFUL)
    2003         rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
    2004     }
    2005 
    2006     wake_transfer = false;
     1921      error = errno;
    20071922   
    20081923    rtems_bdbuf_lock_cache ();
    20091924
    2010     for (b = 1; b < req->bufnum; b++)
    2011     {
    2012       bd = req->bufs[b].user;
    2013       if (!bd->error)
    2014         bd->error = req->error;
    2015       bd->state = RTEMS_BDBUF_STATE_READ_AHEAD;
    2016 
    2017       rtems_bdbuf_group_release (bd);
     1925    for (transfer_index = 0; transfer_index < req->bufnum; ++transfer_index)
     1926    {
     1927      rtems_bdbuf_buffer *bd = req->bufs [transfer_index].user;
     1928      bool waiters = bd->waiters;
     1929
     1930      rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_CACHED);
     1931
     1932      if (waiters)
     1933        wake_transfer = true;
     1934
     1935      bd->error = error;
    20181936
    20191937      if (rtems_bdbuf_tracer)
    20201938        rtems_bdbuf_show_users ("read-ahead", bd);
    20211939
    2022       rtems_chain_prepend (&bdbuf_cache.ready, &bd->link);
    2023 
    2024       /*
    2025        * If there is an error remove the BD from the AVL tree as it is invalid,
    2026        * then wake any threads that may be waiting. A thread may have been
    2027        * waiting for this block and assumed it was in the tree.
    2028        */
    2029       if (bd->error)
     1940      if (transfer_index > 0)
    20301941      {
    2031         bd->state = RTEMS_BDBUF_STATE_EMPTY;
    2032         if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
    2033           rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_9);
     1942        /*
     1943         * This is a read ahead buffer.
     1944         */
     1945
     1946        rtems_bdbuf_group_release (bd);
     1947
     1948        if (!waiters)
     1949          wake_buffer = true;
     1950
     1951        if (error == 0 || waiters)
     1952          rtems_chain_append (&bdbuf_cache.lru, &bd->link);
     1953        else
     1954        {
     1955          rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_EMPTY);
     1956          rtems_chain_prepend (&bdbuf_cache.lru, &bd->link);
     1957          if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
     1958            rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CACHE_RM);
     1959        }
    20341960      }
    2035 
    2036       if (bd->waiters)
    2037         wake_transfer = true;
    20381961    }
    20391962
    20401963    if (wake_transfer)
    20411964      rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
    2042     else
     1965
     1966    if (wake_buffer)
    20431967      rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
    2044    
    2045     bd = req->bufs[0].user;
    2046 
    2047     if (rtems_bdbuf_tracer)
    2048       rtems_bdbuf_show_users ("read-done", bd);
    2049   }
    2050 
    2051   if (bd->state != RTEMS_BDBUF_STATE_MODIFIED)
    2052     bd->state = RTEMS_BDBUF_STATE_ACCESS;
    2053   else
    2054     bd->state = RTEMS_BDBUF_STATE_ACCESS_MODIFIED;
     1968  }
     1969
     1970  return req->bufs [0].user;
     1971}
     1972
     1973rtems_status_code
     1974rtems_bdbuf_read (dev_t                dev,
     1975                  rtems_blkdev_bnum    block,
     1976                  rtems_bdbuf_buffer **bd_ptr)
     1977{
     1978  rtems_status_code     sc = RTEMS_SUCCESSFUL;
     1979  rtems_disk_device    *dd = NULL;
     1980  rtems_bdbuf_buffer   *bd = NULL;
     1981  rtems_blkdev_request *req = NULL;
     1982  rtems_blkdev_bnum     media_block = 0;
     1983  size_t                bds_per_group = 0;
     1984
     1985  sc = rtems_bdbuf_obtain_disk (dev, block, &dd, &media_block, &bds_per_group);
     1986  if (sc != RTEMS_SUCCESSFUL)
     1987    return sc;
     1988
     1989  /*
     1990   * TODO: This type of request structure is wrong and should be removed.
     1991   */
     1992#define bdbuf_alloc(size) __builtin_alloca (size)
     1993
     1994  req = bdbuf_alloc (sizeof (rtems_blkdev_request) +
     1995                     sizeof ( rtems_blkdev_sg_buffer) *
     1996                      (bdbuf_config.max_read_ahead_blocks + 1));
     1997 
     1998  if (rtems_bdbuf_tracer)
     1999    printf ("bdbuf:read: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
     2000            media_block + dd->start, block, (unsigned) dev);
     2001
     2002  rtems_bdbuf_lock_cache ();
     2003  rtems_bdbuf_create_read_request (req, dd, media_block, bds_per_group);
     2004
     2005  bd = rtems_bdbuf_execute_read_request (req, dd);
     2006
     2007  switch (bd->state)
     2008  {
     2009    case RTEMS_BDBUF_STATE_CACHED:
     2010      rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS);
     2011      break;
     2012    case RTEMS_BDBUF_STATE_MODIFIED:
     2013      rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
     2014      break;
     2015    default:
     2016      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_4);
     2017      break;
     2018  }
    20552019
    20562020  if (rtems_bdbuf_tracer)
     
    20612025 
    20622026  rtems_bdbuf_unlock_cache ();
    2063   rtems_disk_release (dd);
    2064 
    2065   *bdp = bd;
     2027  rtems_bdbuf_release_disk (dd);
     2028
     2029  *bd_ptr = bd;
    20662030
    20672031  return RTEMS_SUCCESSFUL;
    20682032}
    20692033
    2070 rtems_status_code
    2071 rtems_bdbuf_release (rtems_bdbuf_buffer* bd)
     2034static rtems_status_code
     2035rtems_bdbuf_check_bd_and_lock_cache (rtems_bdbuf_buffer *bd, const char *kind)
    20722036{
    20732037  if (!bdbuf_cache.initialised)
    20742038    return RTEMS_NOT_CONFIGURED;
    2075 
    20762039  if (bd == NULL)
    20772040    return RTEMS_INVALID_ADDRESS;
    2078 
    2079   rtems_bdbuf_lock_cache ();
    2080 
    20812041  if (rtems_bdbuf_tracer)
    2082     printf ("bdbuf:release: %" PRIu32 "\n", bd->block);
    2083  
    2084   if (bd->state == RTEMS_BDBUF_STATE_ACCESS_MODIFIED)
    2085   {
    2086     rtems_bdbuf_append_modified (bd);
    2087   }
    2088   else
    2089   {
    2090     bd->state = RTEMS_BDBUF_STATE_CACHED;
    2091     rtems_chain_append (&bdbuf_cache.lru, &bd->link);
    2092 
    2093     rtems_bdbuf_group_release (bd);
    2094   }
    2095  
    2096   if (rtems_bdbuf_tracer)
    2097     rtems_bdbuf_show_users ("release", bd);
    2098  
    2099   /*
    2100    * If there are threads waiting to access the buffer wake them. Wake any
    2101    * waiters if this buffer is placed back onto the LRU queue.
    2102    */
    2103   if (bd->waiters)
    2104     rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
    2105   else
    2106     rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
     2042  {
     2043    printf ("bdbuf:%s: %" PRIu32 "\n", kind, bd->block);
     2044    rtems_bdbuf_show_users (kind, bd);
     2045  }
     2046  rtems_bdbuf_lock_cache();
     2047
     2048  return RTEMS_SUCCESSFUL;
     2049}
     2050
     2051rtems_status_code
     2052rtems_bdbuf_release (rtems_bdbuf_buffer *bd)
     2053{
     2054  rtems_status_code sc = RTEMS_SUCCESSFUL;
     2055
     2056  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release");
     2057  if (sc != RTEMS_SUCCESSFUL)
     2058    return sc;
     2059
     2060  switch (bd->state)
     2061  {
     2062    case RTEMS_BDBUF_STATE_ACCESS:
     2063      rtems_bdbuf_add_to_lru_list_after_access (bd);
     2064      break;
     2065    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
     2066      rtems_bdbuf_add_to_modified_list_after_access (bd);
     2067      break;
     2068    default:
     2069      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_0);
     2070      break;
     2071  }
    21072072 
    21082073  if (rtems_bdbuf_tracer)
     
    21152080
    21162081rtems_status_code
    2117 rtems_bdbuf_release_modified (rtems_bdbuf_buffer* bd)
    2118 {
    2119   if (!bdbuf_cache.initialised)
    2120     return RTEMS_NOT_CONFIGURED;
    2121 
    2122   if (!bd)
    2123     return RTEMS_INVALID_ADDRESS;
    2124 
    2125   rtems_bdbuf_lock_cache ();
    2126 
    2127   if (rtems_bdbuf_tracer)
    2128     printf ("bdbuf:release modified: %" PRIu32 "\n", bd->block);
    2129 
    2130   bd->hold_timer = rtems_bdbuf_configuration.swap_block_hold;
    2131  
    2132   if (rtems_bdbuf_tracer)
    2133     rtems_bdbuf_show_users ("release-modified", bd);
    2134  
    2135   rtems_bdbuf_append_modified (bd);
    2136 
    2137   if (bd->waiters)
    2138     rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
     2082rtems_bdbuf_release_modified (rtems_bdbuf_buffer *bd)
     2083{
     2084  rtems_status_code sc = RTEMS_SUCCESSFUL;
     2085
     2086  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release modified");
     2087  if (sc != RTEMS_SUCCESSFUL)
     2088    return sc;
     2089
     2090  switch (bd->state)
     2091  {
     2092    case RTEMS_BDBUF_STATE_ACCESS:
     2093    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
     2094      rtems_bdbuf_add_to_modified_list_after_access (bd);
     2095      break;
     2096    default:
     2097      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_6);
     2098      break;
     2099  }
    21392100 
    21402101  if (rtems_bdbuf_tracer)
     
    21472108
    21482109rtems_status_code
    2149 rtems_bdbuf_sync (rtems_bdbuf_buffer* bd)
    2150 {
    2151   bool available;
    2152 
     2110rtems_bdbuf_sync (rtems_bdbuf_buffer *bd)
     2111{
     2112  rtems_status_code sc = RTEMS_SUCCESSFUL;
     2113
     2114  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "sync");
     2115  if (sc != RTEMS_SUCCESSFUL)
     2116    return sc;
     2117
     2118  switch (bd->state)
     2119  {
     2120    case RTEMS_BDBUF_STATE_ACCESS:
     2121    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
     2122      rtems_bdbuf_add_to_sync_list_after_access (bd);
     2123      break;
     2124    default:
     2125      rtems_bdbuf_fatal (bd->state, RTEMS_BLKDEV_FATAL_BDBUF_STATE_5);
     2126      break;
     2127  }
     2128 
    21532129  if (rtems_bdbuf_tracer)
    2154     printf ("bdbuf:sync: %" PRIu32 "\n", bd->block);
    2155  
    2156   if (!bdbuf_cache.initialised)
    2157     return RTEMS_NOT_CONFIGURED;
    2158 
    2159   if (!bd)
    2160     return RTEMS_INVALID_ADDRESS;
    2161 
    2162   rtems_bdbuf_lock_cache ();
    2163 
    2164   bd->state = RTEMS_BDBUF_STATE_SYNC;
    2165 
    2166   rtems_chain_append (&bdbuf_cache.sync, &bd->link);
    2167 
    2168   if (bd->waiters)
    2169     rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
     2130    rtems_bdbuf_show_usage ();
    21702131
    21712132  rtems_bdbuf_wake_swapper ();
    2172 
    2173   available = false;
    2174   while (!available)
    2175   {
    2176     switch (bd->state)
    2177     {
    2178       case RTEMS_BDBUF_STATE_CACHED:
    2179       case RTEMS_BDBUF_STATE_READ_AHEAD:
    2180       case RTEMS_BDBUF_STATE_MODIFIED:
    2181       case RTEMS_BDBUF_STATE_ACCESS:
    2182       case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
    2183         available = true;
    2184         break;
    2185 
    2186       case RTEMS_BDBUF_STATE_SYNC:
    2187       case RTEMS_BDBUF_STATE_TRANSFER:
    2188         bd->waiters++;
    2189         rtems_bdbuf_wait (&bdbuf_cache.transfer_waiters);
    2190         bd->waiters--;
    2191         break;
    2192 
    2193       default:
    2194         rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY_7);
    2195     }
    2196   }
     2133  rtems_bdbuf_wait_for_sync_done (bd);
     2134
     2135  /*
     2136   * If no one intercepts the sync, we created a cached buffer which may be
     2137   * recycled.
     2138   */
     2139  if (bd->state == RTEMS_BDBUF_STATE_CACHED && bd->waiters == 0)
     2140    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
    21972141
    21982142  rtems_bdbuf_unlock_cache ();
     
    22042148rtems_bdbuf_syncdev (dev_t dev)
    22052149{
    2206   rtems_disk_device*  dd;
    2207   rtems_status_code   sc;
    2208   rtems_event_set     out;
     2150  rtems_status_code  sc = RTEMS_SUCCESSFUL;
     2151  rtems_disk_device *dd = NULL;
    22092152
    22102153  if (rtems_bdbuf_tracer)
    22112154    printf ("bdbuf:syncdev: %08x\n", (unsigned) dev);
    22122155
    2213   if (!bdbuf_cache.initialised)
    2214     return RTEMS_NOT_CONFIGURED;
    2215 
    2216   /*
    2217    * Do not hold the cache lock when obtaining the disk table.
    2218    */
    2219   dd = rtems_disk_obtain (dev);
    2220   if (!dd)
    2221     return RTEMS_INVALID_ID;
     2156  sc = rtems_bdbuf_obtain_disk (dev, 0, &dd, NULL, NULL);
     2157  if (sc != RTEMS_SUCCESSFUL)
     2158    return sc;
    22222159
    22232160  /*
     
    22272164   * sync lock can only be obtained with the cache unlocked.
    22282165   */
    2229  
    22302166  rtems_bdbuf_lock_sync ();
    22312167  rtems_bdbuf_lock_cache (); 
     
    22442180  rtems_bdbuf_wake_swapper ();
    22452181  rtems_bdbuf_unlock_cache ();
    2246  
    2247   sc = rtems_event_receive (RTEMS_BDBUF_TRANSFER_SYNC,
    2248                             RTEMS_EVENT_ALL | RTEMS_WAIT,
    2249                             0, &out);
    2250 
    2251   if (sc != RTEMS_SUCCESSFUL)
    2252     rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
    2253      
     2182  rtems_bdbuf_wait_for_event (RTEMS_BDBUF_TRANSFER_SYNC);
    22542183  rtems_bdbuf_unlock_sync ();
    2255  
    2256   return rtems_disk_release (dd);
     2184  rtems_bdbuf_release_disk (dd);
     2185
     2186  return RTEMS_SUCCESSFUL;
    22572187}
    22582188
     
    23752305
    23762306        if (rtems_chain_is_empty (&transfer->bds) ||
    2377             (transfer->write_req->bufnum >= rtems_bdbuf_configuration.max_write_blocks))
     2307            (transfer->write_req->bufnum >= bdbuf_config.max_write_blocks))
    23782308          write = true;
    23792309
     
    23992329            {
    24002330              bd = transfer->write_req->bufs[b].user;
    2401               bd->state  = RTEMS_BDBUF_STATE_MODIFIED;
     2331              rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_MODIFIED);
    24022332              bd->error = errno;
    24032333
     
    24132343          else
    24142344          {
    2415             rtems_status_code sc = 0;
    2416             rtems_event_set   out;
    2417 
    2418             sc = rtems_event_receive (RTEMS_BDBUF_TRANSFER_SYNC,
    2419                                       RTEMS_EVENT_ALL | RTEMS_WAIT,
    2420                                       0, &out);
    2421 
    2422             if (sc != RTEMS_SUCCESSFUL)
    2423               rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
     2345            rtems_bdbuf_wait_for_event (RTEMS_BDBUF_TRANSFER_SYNC);
    24242346
    24252347            rtems_bdbuf_lock_cache ();
     
    24282350            {
    24292351              bd = transfer->write_req->bufs[b].user;
    2430               bd->state = RTEMS_BDBUF_STATE_CACHED;
     2352              rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_CACHED);
    24312353              bd->error = 0;
    24322354
     
    25012423      /*
    25022424       * Check if the buffer's hold timer has reached 0. If a sync is active
    2503        * force all the timers to 0.
     2425       * or someone waits for a buffer force all the timers to 0.
    25042426       *
    25052427       * @note Lots of sync requests will skew this timer. It should be based
    25062428       *       on TOD to be accurate. Does it matter ?
    25072429       */
    2508       if (sync_active)
     2430      if (sync_active || rtems_bdbuf_has_buffer_waiters ())
    25092431        bd->hold_timer = 0;
    25102432 
     
    25462468         */
    25472469
    2548         bd->state = RTEMS_BDBUF_STATE_TRANSFER;
     2470        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
    25492471
    25502472        rtems_chain_extract (node);
     
    27122634  rtems_blkdev_request* write_req =
    27132635    malloc (sizeof (rtems_blkdev_request) +
    2714             (rtems_bdbuf_configuration.max_write_blocks *
    2715              sizeof (rtems_blkdev_sg_buffer)));
     2636            (bdbuf_config.max_write_blocks * sizeof (rtems_blkdev_sg_buffer)));
    27162637
    27172638  if (!write_req)
     
    27392660  while (worker->enabled)
    27402661  {
    2741     rtems_event_set   out;
    2742     rtems_status_code sc;
    2743    
    2744     sc = rtems_event_receive (RTEMS_BDBUF_SWAPOUT_SYNC,
    2745                               RTEMS_EVENT_ALL | RTEMS_WAIT,
    2746                               RTEMS_NO_TIMEOUT,
    2747                               &out);
    2748 
    2749     if (sc != RTEMS_SUCCESSFUL)
    2750       rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
     2662    rtems_bdbuf_wait_for_event (RTEMS_BDBUF_SWAPOUT_SYNC);
    27512663
    27522664    rtems_bdbuf_swapout_write (&worker->transfer);
     
    27792691  rtems_bdbuf_lock_cache ();
    27802692 
    2781   for (w = 0; w < rtems_bdbuf_configuration.swapout_workers; w++)
     2693  for (w = 0; w < bdbuf_config.swapout_workers; w++)
    27822694  {
    27832695    rtems_bdbuf_swapout_worker* worker;
     
    27952707
    27962708    sc = rtems_task_create (rtems_build_name('B', 'D', 'o', 'a' + w),
    2797                             (rtems_bdbuf_configuration.swapout_priority ?
    2798                              rtems_bdbuf_configuration.swapout_priority :
     2709                            (bdbuf_config.swapout_priority ?
     2710                             bdbuf_config.swapout_priority :
    27992711                             RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT),
    28002712                            SWAPOUT_TASK_STACK_SIZE,
Note: See TracChangeset for help on using the changeset viewer.