source: rtems/cpukit/libblock/src/bdbuf.c @ 3bb9c61c

4.115
Last change on this file since 3bb9c61c was 3bb9c61c, checked in by Sebastian Huber <sebastian.huber@…>, on 11/20/14 at 08:03:50

bdbuf: Use rtems_cache_get_data_line_size()

  • Property mode set to 100644
File size: 85.5 KB
Line 
1/**
2 * @file
3 *
4 * @ingroup rtems_bdbuf
5 *
6 * Block device buffer management.
7 */
8
9/*
10 * Disk I/O buffering
11 * Buffer managment
12 *
13 * Copyright (C) 2001 OKTET Ltd., St.-Peterburg, Russia
14 * Author: Andrey G. Ivanov <Andrey.Ivanov@oktet.ru>
15 *         Victor V. Vengerov <vvv@oktet.ru>
16 *         Alexander Kukuta <kam@oktet.ru>
17 *
18 * Copyright (C) 2008,2009 Chris Johns <chrisj@rtems.org>
19 *    Rewritten to remove score mutex access. Fixes many performance
20 *    issues.
21 *
22 * Copyright (c) 2009-2012 embedded brains GmbH.
23 */
24
25/**
26 * Set to 1 to enable debug tracing.
27 */
28#define RTEMS_BDBUF_TRACE 0
29
30#if HAVE_CONFIG_H
31#include "config.h"
32#endif
33#include <limits.h>
34#include <errno.h>
35#include <stdio.h>
36#include <string.h>
37#include <inttypes.h>
38#include <pthread.h>
39
40#include <rtems.h>
41#include <rtems/error.h>
42#include <rtems/malloc.h>
43
44#include "rtems/bdbuf.h"
45
46#define BDBUF_INVALID_DEV NULL
47
48/*
49 * Simpler label for this file.
50 */
51#define bdbuf_config rtems_bdbuf_configuration
52
53/**
54 * A swapout transfer transaction data. This data is passed to a worked thread
55 * to handle the write phase of the transfer.
56 */
57typedef struct rtems_bdbuf_swapout_transfer
58{
59  rtems_chain_control   bds;         /**< The transfer list of BDs. */
60  rtems_disk_device    *dd;          /**< The device the transfer is for. */
61  bool                  syncing;     /**< The data is a sync'ing. */
62  rtems_blkdev_request  write_req;   /**< The write request. */
63} rtems_bdbuf_swapout_transfer;
64
65/**
66 * Swapout worker thread. These are available to take processing from the
67 * main swapout thread and handle the I/O operation.
68 */
69typedef struct rtems_bdbuf_swapout_worker
70{
71  rtems_chain_node             link;     /**< The threads sit on a chain when
72                                          * idle. */
73  rtems_id                     id;       /**< The id of the task so we can wake
74                                          * it. */
75  bool                         enabled;  /**< The worker is enabled. */
76  rtems_bdbuf_swapout_transfer transfer; /**< The transfer data for this
77                                          * thread. */
78} rtems_bdbuf_swapout_worker;
79
80#if defined(RTEMS_BDBUF_USE_PTHREAD)
81typedef pthread_mutex_t rtems_bdbuf_lock_type;
82#else
83typedef rtems_id rtems_bdbuf_lock_type;
84#endif
85
86/**
87 * Buffer waiters synchronization.
88 */
89typedef struct rtems_bdbuf_waiters {
90  unsigned       count;
91#if defined(RTEMS_BDBUF_USE_PTHREAD)
92  pthread_cond_t cond_var;
93#else
94  rtems_id       sema;
95#endif
96} rtems_bdbuf_waiters;
97
98/**
99 * The BD buffer cache.
100 */
101typedef struct rtems_bdbuf_cache
102{
103  rtems_id            swapout;           /**< Swapout task ID */
104  bool                swapout_enabled;   /**< Swapout is only running if
105                                          * enabled. Set to false to kill the
106                                          * swap out task. It deletes itself. */
107  rtems_chain_control swapout_free_workers; /**< The work threads for the swapout
108                                             * task. */
109
110  rtems_bdbuf_buffer* bds;               /**< Pointer to table of buffer
111                                          * descriptors. */
112  void*               buffers;           /**< The buffer's memory. */
113  size_t              buffer_min_count;  /**< Number of minimum size buffers
114                                          * that fit the buffer memory. */
115  size_t              max_bds_per_group; /**< The number of BDs of minimum
116                                          * buffer size that fit in a group. */
117  uint32_t            flags;             /**< Configuration flags. */
118
119  rtems_bdbuf_lock_type lock;            /**< The cache lock. It locks all
120                                          * cache data, BD and lists. */
121  rtems_bdbuf_lock_type sync_lock;       /**< Sync calls block writes. */
122  bool                sync_active;       /**< True if a sync is active. */
123  rtems_id            sync_requester;    /**< The sync requester. */
124  rtems_disk_device  *sync_device;       /**< The device to sync and
125                                          * BDBUF_INVALID_DEV not a device
126                                          * sync. */
127
128  rtems_bdbuf_buffer* tree;              /**< Buffer descriptor lookup AVL tree
129                                          * root. There is only one. */
130  rtems_chain_control lru;               /**< Least recently used list */
131  rtems_chain_control modified;          /**< Modified buffers list */
132  rtems_chain_control sync;              /**< Buffers to sync list */
133
134  rtems_bdbuf_waiters access_waiters;    /**< Wait for a buffer in
135                                          * ACCESS_CACHED, ACCESS_MODIFIED or
136                                          * ACCESS_EMPTY
137                                          * state. */
138  rtems_bdbuf_waiters transfer_waiters;  /**< Wait for a buffer in TRANSFER
139                                          * state. */
140  rtems_bdbuf_waiters buffer_waiters;    /**< Wait for a buffer and no one is
141                                          * available. */
142
143  rtems_bdbuf_swapout_transfer *swapout_transfer;
144  rtems_bdbuf_swapout_worker *swapout_workers;
145
146  size_t              group_count;       /**< The number of groups. */
147  rtems_bdbuf_group*  groups;            /**< The groups. */
148  rtems_id            read_ahead_task;   /**< Read-ahead task */
149  rtems_chain_control read_ahead_chain;  /**< Read-ahead request chain */
150  bool                read_ahead_enabled; /**< Read-ahead enabled */
151  rtems_status_code   init_status;       /**< The initialization status */
152} rtems_bdbuf_cache;
153
154typedef enum {
155  RTEMS_BDBUF_FATAL_CACHE_LOCK,
156  RTEMS_BDBUF_FATAL_CACHE_UNLOCK,
157  RTEMS_BDBUF_FATAL_CACHE_WAIT_2,
158  RTEMS_BDBUF_FATAL_CACHE_WAIT_TO,
159  RTEMS_BDBUF_FATAL_CACHE_WAKE,
160  RTEMS_BDBUF_FATAL_PREEMPT_DIS,
161  RTEMS_BDBUF_FATAL_PREEMPT_RST,
162  RTEMS_BDBUF_FATAL_RA_WAKE_UP,
163  RTEMS_BDBUF_FATAL_RECYCLE,
164  RTEMS_BDBUF_FATAL_SO_WAKE_1,
165  RTEMS_BDBUF_FATAL_SO_WAKE_2,
166  RTEMS_BDBUF_FATAL_STATE_0,
167  RTEMS_BDBUF_FATAL_STATE_2,
168  RTEMS_BDBUF_FATAL_STATE_4,
169  RTEMS_BDBUF_FATAL_STATE_5,
170  RTEMS_BDBUF_FATAL_STATE_6,
171  RTEMS_BDBUF_FATAL_STATE_7,
172  RTEMS_BDBUF_FATAL_STATE_8,
173  RTEMS_BDBUF_FATAL_STATE_9,
174  RTEMS_BDBUF_FATAL_STATE_10,
175  RTEMS_BDBUF_FATAL_STATE_11,
176  RTEMS_BDBUF_FATAL_SWAPOUT_RE,
177  RTEMS_BDBUF_FATAL_SYNC_LOCK,
178  RTEMS_BDBUF_FATAL_SYNC_UNLOCK,
179  RTEMS_BDBUF_FATAL_TREE_RM,
180  RTEMS_BDBUF_FATAL_WAIT_EVNT,
181  RTEMS_BDBUF_FATAL_WAIT_TRANS_EVNT,
182  RTEMS_BDBUF_FATAL_ONCE,
183  RTEMS_BDBUF_FATAL_MTX_ATTR_INIT,
184  RTEMS_BDBUF_FATAL_MTX_ATTR_SETPROTO,
185  RTEMS_BDBUF_FATAL_CV_WAIT,
186  RTEMS_BDBUF_FATAL_CV_BROADCAST
187} rtems_bdbuf_fatal_code;
188
189/**
190 * The events used in this code. These should be system events rather than
191 * application events.
192 */
193#define RTEMS_BDBUF_SWAPOUT_SYNC   RTEMS_EVENT_2
194#define RTEMS_BDBUF_READ_AHEAD_WAKE_UP RTEMS_EVENT_1
195
196/**
197 * Lock semaphore attributes. This is used for locking type mutexes.
198 *
199 * @warning Priority inheritance is on.
200 */
201#define RTEMS_BDBUF_CACHE_LOCK_ATTRIBS \
202  (RTEMS_PRIORITY | RTEMS_BINARY_SEMAPHORE | \
203   RTEMS_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
204
205/**
206 * Waiter semaphore attributes.
207 *
208 * @warning Do not configure as inherit priority. If a driver is in the driver
209 *          initialisation table this locked semaphore will have the IDLE task
210 *          as the holder and a blocking task will raise the priority of the
211 *          IDLE task which can cause unsual side effects.
212 */
213#define RTEMS_BDBUF_CACHE_WAITER_ATTRIBS \
214  (RTEMS_PRIORITY | RTEMS_SIMPLE_BINARY_SEMAPHORE | \
215   RTEMS_NO_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
216
217/**
218 * Waiter timeout. Set to non-zero to find some info on a waiter that is
219 * waiting too long.
220 */
221#define RTEMS_BDBUF_WAIT_TIMEOUT RTEMS_NO_TIMEOUT
222#if !defined (RTEMS_BDBUF_WAIT_TIMEOUT)
223#define RTEMS_BDBUF_WAIT_TIMEOUT \
224  (RTEMS_MICROSECONDS_TO_TICKS (20000000))
225#endif
226
227static rtems_task rtems_bdbuf_swapout_task(rtems_task_argument arg);
228
229static rtems_task rtems_bdbuf_read_ahead_task(rtems_task_argument arg);
230
231/**
232 * The Buffer Descriptor cache.
233 */
234static rtems_bdbuf_cache bdbuf_cache;
235
236static pthread_once_t rtems_bdbuf_once_state = PTHREAD_ONCE_INIT;
237
238#if RTEMS_BDBUF_TRACE
239/**
240 * If true output the trace message.
241 */
242bool rtems_bdbuf_tracer;
243
244/**
245 * Return the number of items on the list.
246 *
247 * @param list The chain control.
248 * @return uint32_t The number of items on the list.
249 */
250uint32_t
251rtems_bdbuf_list_count (rtems_chain_control* list)
252{
253  rtems_chain_node* node = rtems_chain_first (list);
254  uint32_t          count = 0;
255  while (!rtems_chain_is_tail (list, node))
256  {
257    count++;
258    node = rtems_chain_next (node);
259  }
260  return count;
261}
262
263/**
264 * Show the usage for the bdbuf cache.
265 */
266void
267rtems_bdbuf_show_usage (void)
268{
269  uint32_t group;
270  uint32_t total = 0;
271  uint32_t val;
272
273  for (group = 0; group < bdbuf_cache.group_count; group++)
274    total += bdbuf_cache.groups[group].users;
275  printf ("bdbuf:group users=%lu", total);
276  val = rtems_bdbuf_list_count (&bdbuf_cache.lru);
277  printf (", lru=%lu", val);
278  total = val;
279  val = rtems_bdbuf_list_count (&bdbuf_cache.modified);
280  printf (", mod=%lu", val);
281  total += val;
282  val = rtems_bdbuf_list_count (&bdbuf_cache.sync);
283  printf (", sync=%lu", val);
284  total += val;
285  printf (", total=%lu\n", total);
286}
287
288/**
289 * Show the users for a group of a bd.
290 *
291 * @param where A label to show the context of output.
292 * @param bd The bd to show the users of.
293 */
294void
295rtems_bdbuf_show_users (const char* where, rtems_bdbuf_buffer* bd)
296{
297  const char* states[] =
298    { "FR", "EM", "CH", "AC", "AM", "AE", "AP", "MD", "SY", "TR", "TP" };
299
300  printf ("bdbuf:users: %15s: [%" PRIu32 " (%s)] %td:%td = %" PRIu32 " %s\n",
301          where,
302          bd->block, states[bd->state],
303          bd->group - bdbuf_cache.groups,
304          bd - bdbuf_cache.bds,
305          bd->group->users,
306          bd->group->users > 8 ? "<<<<<<<" : "");
307}
308#else
309#define rtems_bdbuf_tracer (0)
310#define rtems_bdbuf_show_usage() ((void) 0)
311#define rtems_bdbuf_show_users(_w, _b) ((void) 0)
312#endif
313
314/**
315 * The default maximum height of 32 allows for AVL trees having between
316 * 5,704,880 and 4,294,967,295 nodes, depending on order of insertion.  You may
317 * change this compile-time constant as you wish.
318 */
319#ifndef RTEMS_BDBUF_AVL_MAX_HEIGHT
320#define RTEMS_BDBUF_AVL_MAX_HEIGHT (32)
321#endif
322
323static void
324rtems_bdbuf_fatal (rtems_fatal_code error)
325{
326  rtems_fatal (RTEMS_FATAL_SOURCE_BDBUF, error);
327}
328
329static void
330rtems_bdbuf_fatal_with_state (rtems_bdbuf_buf_state state,
331                              rtems_bdbuf_fatal_code error)
332{
333  rtems_bdbuf_fatal ((((uint32_t) state) << 16) | error);
334}
335
336static rtems_status_code
337rtems_bdbuf_lock_create (rtems_name name, rtems_bdbuf_lock_type *lock)
338{
339#if defined(RTEMS_BDBUF_USE_PTHREAD)
340  int                 eno;
341  pthread_mutexattr_t attr;
342
343  (void) name;
344
345  eno = pthread_mutexattr_init (&attr);
346  if (eno != 0)
347    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_MTX_ATTR_INIT);
348
349  eno = pthread_mutexattr_setprotocol (&attr, PTHREAD_PRIO_INHERIT);
350  if (eno != 0)
351    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_MTX_ATTR_SETPROTO);
352
353  eno = pthread_mutex_init (lock, &attr);
354
355  pthread_mutexattr_destroy (&attr);
356
357  if (eno != 0)
358    return RTEMS_UNSATISFIED;
359
360  return RTEMS_SUCCESSFUL;
361#else
362  return rtems_semaphore_create(
363    name,
364    1,
365    RTEMS_BDBUF_CACHE_LOCK_ATTRIBS,
366    0,
367    lock
368  );
369#endif
370}
371
372static void
373rtems_bdbuf_lock_delete (rtems_bdbuf_lock_type *lock)
374{
375#if defined(RTEMS_BDBUF_USE_PTHREAD)
376  pthread_mutex_destroy (lock);
377#else
378  rtems_semaphore_delete (*lock);
379#endif
380}
381
382static rtems_status_code
383rtems_bdbuf_waiter_create (rtems_name name, rtems_bdbuf_waiters *waiter)
384{
385#if defined(RTEMS_BDBUF_USE_PTHREAD)
386  int eno = pthread_cond_init (&waiter->cond_var, NULL);
387  if (eno != 0)
388    return RTEMS_UNSATISFIED;
389
390  return RTEMS_SUCCESSFUL;
391#else
392  return rtems_semaphore_create(
393    name,
394    0,
395    RTEMS_BDBUF_CACHE_WAITER_ATTRIBS,
396    0,
397    &waiter->sema
398  );
399#endif
400}
401
402static void
403rtems_bdbuf_waiter_delete (rtems_bdbuf_waiters *waiter)
404{
405#if defined(RTEMS_BDBUF_USE_PTHREAD)
406  pthread_cond_destroy (&waiter->cond_var);
407#else
408  rtems_semaphore_delete (waiter->sema);
409#endif
410}
411
412/**
413 * Searches for the node with specified dd/block.
414 *
415 * @param root pointer to the root node of the AVL-Tree
416 * @param dd disk device search key
417 * @param block block search key
418 * @retval NULL node with the specified dd/block is not found
419 * @return pointer to the node with specified dd/block
420 */
421static rtems_bdbuf_buffer *
422rtems_bdbuf_avl_search (rtems_bdbuf_buffer** root,
423                        const rtems_disk_device *dd,
424                        rtems_blkdev_bnum    block)
425{
426  rtems_bdbuf_buffer* p = *root;
427
428  while ((p != NULL) && ((p->dd != dd) || (p->block != block)))
429  {
430    if (((uintptr_t) p->dd < (uintptr_t) dd)
431        || ((p->dd == dd) && (p->block < block)))
432    {
433      p = p->avl.right;
434    }
435    else
436    {
437      p = p->avl.left;
438    }
439  }
440
441  return p;
442}
443
444/**
445 * Inserts the specified node to the AVl-Tree.
446 *
447 * @param root pointer to the root node of the AVL-Tree
448 * @param node Pointer to the node to add.
449 * @retval 0 The node added successfully
450 * @retval -1 An error occured
451 */
452static int
453rtems_bdbuf_avl_insert(rtems_bdbuf_buffer** root,
454                       rtems_bdbuf_buffer*  node)
455{
456  const rtems_disk_device *dd = node->dd;
457  rtems_blkdev_bnum block = node->block;
458
459  rtems_bdbuf_buffer*  p = *root;
460  rtems_bdbuf_buffer*  q;
461  rtems_bdbuf_buffer*  p1;
462  rtems_bdbuf_buffer*  p2;
463  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
464  rtems_bdbuf_buffer** buf_prev = buf_stack;
465
466  bool modified = false;
467
468  if (p == NULL)
469  {
470    *root = node;
471    node->avl.left = NULL;
472    node->avl.right = NULL;
473    node->avl.bal = 0;
474    return 0;
475  }
476
477  while (p != NULL)
478  {
479    *buf_prev++ = p;
480
481    if (((uintptr_t) p->dd < (uintptr_t) dd)
482        || ((p->dd == dd) && (p->block < block)))
483    {
484      p->avl.cache = 1;
485      q = p->avl.right;
486      if (q == NULL)
487      {
488        q = node;
489        p->avl.right = q = node;
490        break;
491      }
492    }
493    else if ((p->dd != dd) || (p->block != block))
494    {
495      p->avl.cache = -1;
496      q = p->avl.left;
497      if (q == NULL)
498      {
499        q = node;
500        p->avl.left = q;
501        break;
502      }
503    }
504    else
505    {
506      return -1;
507    }
508
509    p = q;
510  }
511
512  q->avl.left = q->avl.right = NULL;
513  q->avl.bal = 0;
514  modified = true;
515  buf_prev--;
516
517  while (modified)
518  {
519    if (p->avl.cache == -1)
520    {
521      switch (p->avl.bal)
522      {
523        case 1:
524          p->avl.bal = 0;
525          modified = false;
526          break;
527
528        case 0:
529          p->avl.bal = -1;
530          break;
531
532        case -1:
533          p1 = p->avl.left;
534          if (p1->avl.bal == -1) /* simple LL-turn */
535          {
536            p->avl.left = p1->avl.right;
537            p1->avl.right = p;
538            p->avl.bal = 0;
539            p = p1;
540          }
541          else /* double LR-turn */
542          {
543            p2 = p1->avl.right;
544            p1->avl.right = p2->avl.left;
545            p2->avl.left = p1;
546            p->avl.left = p2->avl.right;
547            p2->avl.right = p;
548            if (p2->avl.bal == -1) p->avl.bal = +1; else p->avl.bal = 0;
549            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
550            p = p2;
551          }
552          p->avl.bal = 0;
553          modified = false;
554          break;
555
556        default:
557          break;
558      }
559    }
560    else
561    {
562      switch (p->avl.bal)
563      {
564        case -1:
565          p->avl.bal = 0;
566          modified = false;
567          break;
568
569        case 0:
570          p->avl.bal = 1;
571          break;
572
573        case 1:
574          p1 = p->avl.right;
575          if (p1->avl.bal == 1) /* simple RR-turn */
576          {
577            p->avl.right = p1->avl.left;
578            p1->avl.left = p;
579            p->avl.bal = 0;
580            p = p1;
581          }
582          else /* double RL-turn */
583          {
584            p2 = p1->avl.left;
585            p1->avl.left = p2->avl.right;
586            p2->avl.right = p1;
587            p->avl.right = p2->avl.left;
588            p2->avl.left = p;
589            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
590            if (p2->avl.bal == -1) p1->avl.bal = +1; else p1->avl.bal = 0;
591            p = p2;
592          }
593          p->avl.bal = 0;
594          modified = false;
595          break;
596
597        default:
598          break;
599      }
600    }
601    q = p;
602    if (buf_prev > buf_stack)
603    {
604      p = *--buf_prev;
605
606      if (p->avl.cache == -1)
607      {
608        p->avl.left = q;
609      }
610      else
611      {
612        p->avl.right = q;
613      }
614    }
615    else
616    {
617      *root = p;
618      break;
619    }
620  };
621
622  return 0;
623}
624
625
626/**
627 * Removes the node from the tree.
628 *
629 * @param root Pointer to pointer to the root node
630 * @param node Pointer to the node to remove
631 * @retval 0 Item removed
632 * @retval -1 No such item found
633 */
634static int
635rtems_bdbuf_avl_remove(rtems_bdbuf_buffer**      root,
636                       const rtems_bdbuf_buffer* node)
637{
638  const rtems_disk_device *dd = node->dd;
639  rtems_blkdev_bnum block = node->block;
640
641  rtems_bdbuf_buffer*  p = *root;
642  rtems_bdbuf_buffer*  q;
643  rtems_bdbuf_buffer*  r;
644  rtems_bdbuf_buffer*  s;
645  rtems_bdbuf_buffer*  p1;
646  rtems_bdbuf_buffer*  p2;
647  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
648  rtems_bdbuf_buffer** buf_prev = buf_stack;
649
650  bool modified = false;
651
652  memset (buf_stack, 0, sizeof(buf_stack));
653
654  while (p != NULL)
655  {
656    *buf_prev++ = p;
657
658    if (((uintptr_t) p->dd < (uintptr_t) dd)
659        || ((p->dd == dd) && (p->block < block)))
660    {
661      p->avl.cache = 1;
662      p = p->avl.right;
663    }
664    else if ((p->dd != dd) || (p->block != block))
665    {
666      p->avl.cache = -1;
667      p = p->avl.left;
668    }
669    else
670    {
671      /* node found */
672      break;
673    }
674  }
675
676  if (p == NULL)
677  {
678    /* there is no such node */
679    return -1;
680  }
681
682  q = p;
683
684  buf_prev--;
685  if (buf_prev > buf_stack)
686  {
687    p = *(buf_prev - 1);
688  }
689  else
690  {
691    p = NULL;
692  }
693
694  /* at this moment q - is a node to delete, p is q's parent */
695  if (q->avl.right == NULL)
696  {
697    r = q->avl.left;
698    if (r != NULL)
699    {
700      r->avl.bal = 0;
701    }
702    q = r;
703  }
704  else
705  {
706    rtems_bdbuf_buffer **t;
707
708    r = q->avl.right;
709
710    if (r->avl.left == NULL)
711    {
712      r->avl.left = q->avl.left;
713      r->avl.bal = q->avl.bal;
714      r->avl.cache = 1;
715      *buf_prev++ = q = r;
716    }
717    else
718    {
719      t = buf_prev++;
720      s = r;
721
722      while (s->avl.left != NULL)
723      {
724        *buf_prev++ = r = s;
725        s = r->avl.left;
726        r->avl.cache = -1;
727      }
728
729      s->avl.left = q->avl.left;
730      r->avl.left = s->avl.right;
731      s->avl.right = q->avl.right;
732      s->avl.bal = q->avl.bal;
733      s->avl.cache = 1;
734
735      *t = q = s;
736    }
737  }
738
739  if (p != NULL)
740  {
741    if (p->avl.cache == -1)
742    {
743      p->avl.left = q;
744    }
745    else
746    {
747      p->avl.right = q;
748    }
749  }
750  else
751  {
752    *root = q;
753  }
754
755  modified = true;
756
757  while (modified)
758  {
759    if (buf_prev > buf_stack)
760    {
761      p = *--buf_prev;
762    }
763    else
764    {
765      break;
766    }
767
768    if (p->avl.cache == -1)
769    {
770      /* rebalance left branch */
771      switch (p->avl.bal)
772      {
773        case -1:
774          p->avl.bal = 0;
775          break;
776        case  0:
777          p->avl.bal = 1;
778          modified = false;
779          break;
780
781        case +1:
782          p1 = p->avl.right;
783
784          if (p1->avl.bal >= 0) /* simple RR-turn */
785          {
786            p->avl.right = p1->avl.left;
787            p1->avl.left = p;
788
789            if (p1->avl.bal == 0)
790            {
791              p1->avl.bal = -1;
792              modified = false;
793            }
794            else
795            {
796              p->avl.bal = 0;
797              p1->avl.bal = 0;
798            }
799            p = p1;
800          }
801          else /* double RL-turn */
802          {
803            p2 = p1->avl.left;
804
805            p1->avl.left = p2->avl.right;
806            p2->avl.right = p1;
807            p->avl.right = p2->avl.left;
808            p2->avl.left = p;
809
810            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
811            if (p2->avl.bal == -1) p1->avl.bal = 1; else p1->avl.bal = 0;
812
813            p = p2;
814            p2->avl.bal = 0;
815          }
816          break;
817
818        default:
819          break;
820      }
821    }
822    else
823    {
824      /* rebalance right branch */
825      switch (p->avl.bal)
826      {
827        case +1:
828          p->avl.bal = 0;
829          break;
830
831        case  0:
832          p->avl.bal = -1;
833          modified = false;
834          break;
835
836        case -1:
837          p1 = p->avl.left;
838
839          if (p1->avl.bal <= 0) /* simple LL-turn */
840          {
841            p->avl.left = p1->avl.right;
842            p1->avl.right = p;
843            if (p1->avl.bal == 0)
844            {
845              p1->avl.bal = 1;
846              modified = false;
847            }
848            else
849            {
850              p->avl.bal = 0;
851              p1->avl.bal = 0;
852            }
853            p = p1;
854          }
855          else /* double LR-turn */
856          {
857            p2 = p1->avl.right;
858
859            p1->avl.right = p2->avl.left;
860            p2->avl.left = p1;
861            p->avl.left = p2->avl.right;
862            p2->avl.right = p;
863
864            if (p2->avl.bal == -1) p->avl.bal = 1; else p->avl.bal = 0;
865            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
866
867            p = p2;
868            p2->avl.bal = 0;
869          }
870          break;
871
872        default:
873          break;
874      }
875    }
876
877    if (buf_prev > buf_stack)
878    {
879      q = *(buf_prev - 1);
880
881      if (q->avl.cache == -1)
882      {
883        q->avl.left = p;
884      }
885      else
886      {
887        q->avl.right = p;
888      }
889    }
890    else
891    {
892      *root = p;
893      break;
894    }
895
896  }
897
898  return 0;
899}
900
901static void
902rtems_bdbuf_set_state (rtems_bdbuf_buffer *bd, rtems_bdbuf_buf_state state)
903{
904  bd->state = state;
905}
906
907static rtems_blkdev_bnum
908rtems_bdbuf_media_block (const rtems_disk_device *dd, rtems_blkdev_bnum block)
909{
910  if (dd->block_to_media_block_shift >= 0)
911    return block << dd->block_to_media_block_shift;
912  else
913    /*
914     * Change the block number for the block size to the block number for the media
915     * block size. We have to use 64bit maths. There is no short cut here.
916     */
917    return (rtems_blkdev_bnum)
918      ((((uint64_t) block) * dd->block_size) / dd->media_block_size);
919}
920
921/**
922 * Lock the mutex. A single task can nest calls.
923 *
924 * @param lock The mutex to lock.
925 * @param fatal_error_code The error code if the call fails.
926 */
927static void
928rtems_bdbuf_lock (rtems_bdbuf_lock_type *lock, uint32_t fatal_error_code)
929{
930#if defined(RTEMS_BDBUF_USE_PTHREAD)
931  int eno = pthread_mutex_lock (lock);
932  if (eno != 0)
933    rtems_bdbuf_fatal (fatal_error_code);
934#else
935  rtems_status_code sc = rtems_semaphore_obtain (*lock,
936                                                 RTEMS_WAIT,
937                                                 RTEMS_NO_TIMEOUT);
938  if (sc != RTEMS_SUCCESSFUL)
939    rtems_bdbuf_fatal (fatal_error_code);
940#endif
941}
942
943/**
944 * Unlock the mutex.
945 *
946 * @param lock The mutex to unlock.
947 * @param fatal_error_code The error code if the call fails.
948 */
949static void
950rtems_bdbuf_unlock (rtems_bdbuf_lock_type *lock, uint32_t fatal_error_code)
951{
952#if defined(RTEMS_BDBUF_USE_PTHREAD)
953  int eno = pthread_mutex_unlock (lock);
954  if (eno != 0)
955    rtems_bdbuf_fatal (fatal_error_code);
956#else
957  rtems_status_code sc = rtems_semaphore_release (*lock);
958  if (sc != RTEMS_SUCCESSFUL)
959    rtems_bdbuf_fatal (fatal_error_code);
960#endif
961}
962
963/**
964 * Lock the cache. A single task can nest calls.
965 */
966static void
967rtems_bdbuf_lock_cache (void)
968{
969  rtems_bdbuf_lock (&bdbuf_cache.lock, RTEMS_BDBUF_FATAL_CACHE_LOCK);
970}
971
972/**
973 * Unlock the cache.
974 */
975static void
976rtems_bdbuf_unlock_cache (void)
977{
978  rtems_bdbuf_unlock (&bdbuf_cache.lock, RTEMS_BDBUF_FATAL_CACHE_UNLOCK);
979}
980
981/**
982 * Lock the cache's sync. A single task can nest calls.
983 */
984static void
985rtems_bdbuf_lock_sync (void)
986{
987  rtems_bdbuf_lock (&bdbuf_cache.sync_lock, RTEMS_BDBUF_FATAL_SYNC_LOCK);
988}
989
990/**
991 * Unlock the cache's sync lock. Any blocked writers are woken.
992 */
993static void
994rtems_bdbuf_unlock_sync (void)
995{
996  rtems_bdbuf_unlock (&bdbuf_cache.sync_lock,
997                      RTEMS_BDBUF_FATAL_SYNC_UNLOCK);
998}
999
1000static void
1001rtems_bdbuf_group_obtain (rtems_bdbuf_buffer *bd)
1002{
1003  ++bd->group->users;
1004}
1005
1006static void
1007rtems_bdbuf_group_release (rtems_bdbuf_buffer *bd)
1008{
1009  --bd->group->users;
1010}
1011
1012#if !defined(RTEMS_BDBUF_USE_PTHREAD)
1013static rtems_mode
1014rtems_bdbuf_disable_preemption (void)
1015{
1016  rtems_status_code sc = RTEMS_SUCCESSFUL;
1017  rtems_mode prev_mode = 0;
1018
1019  sc = rtems_task_mode (RTEMS_NO_PREEMPT, RTEMS_PREEMPT_MASK, &prev_mode);
1020  if (sc != RTEMS_SUCCESSFUL)
1021    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_PREEMPT_DIS);
1022
1023  return prev_mode;
1024}
1025
1026static void
1027rtems_bdbuf_restore_preemption (rtems_mode prev_mode)
1028{
1029  rtems_status_code sc = RTEMS_SUCCESSFUL;
1030
1031  sc = rtems_task_mode (prev_mode, RTEMS_ALL_MODE_MASKS, &prev_mode);
1032  if (sc != RTEMS_SUCCESSFUL)
1033    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_PREEMPT_RST);
1034}
1035#endif
1036
1037/**
1038 * Wait until woken. Semaphores are used so a number of tasks can wait and can
1039 * be woken at once. Task events would require we maintain a list of tasks to
1040 * be woken and this would require storage and we do not know the number of
1041 * tasks that could be waiting.
1042 *
1043 * While we have the cache locked we can try and claim the semaphore and
1044 * therefore know when we release the lock to the cache we will block until the
1045 * semaphore is released. This may even happen before we get to block.
1046 *
1047 * A counter is used to save the release call when no one is waiting.
1048 *
1049 * The function assumes the cache is locked on entry and it will be locked on
1050 * exit.
1051 */
1052static void
1053rtems_bdbuf_anonymous_wait (rtems_bdbuf_waiters *waiters)
1054{
1055  /*
1056   * Indicate we are waiting.
1057   */
1058  ++waiters->count;
1059
1060#if defined(RTEMS_BDBUF_USE_PTHREAD)
1061  {
1062    int eno = pthread_cond_wait (&waiters->cond_var, &bdbuf_cache.lock);
1063    if (eno != 0)
1064      rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_CV_WAIT);
1065  }
1066#else
1067  {
1068    rtems_status_code sc;
1069    rtems_mode        prev_mode;
1070
1071    /*
1072     * Disable preemption then unlock the cache and block.  There is no POSIX
1073     * condition variable in the core API so this is a work around.
1074     *
1075     * The issue is a task could preempt after the cache is unlocked because it is
1076     * blocking or just hits that window, and before this task has blocked on the
1077     * semaphore. If the preempting task flushes the queue this task will not see
1078     * the flush and may block for ever or until another transaction flushes this
1079     * semaphore.
1080     */
1081    prev_mode = rtems_bdbuf_disable_preemption();
1082
1083    /*
1084     * Unlock the cache, wait, and lock the cache when we return.
1085     */
1086    rtems_bdbuf_unlock_cache ();
1087
1088    sc = rtems_semaphore_obtain (waiters->sema, RTEMS_WAIT, RTEMS_BDBUF_WAIT_TIMEOUT);
1089
1090    if (sc == RTEMS_TIMEOUT)
1091      rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_CACHE_WAIT_TO);
1092
1093    if (sc != RTEMS_UNSATISFIED)
1094      rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_CACHE_WAIT_2);
1095
1096    rtems_bdbuf_lock_cache ();
1097
1098    rtems_bdbuf_restore_preemption (prev_mode);
1099  }
1100#endif
1101
1102  --waiters->count;
1103}
1104
1105static void
1106rtems_bdbuf_wait (rtems_bdbuf_buffer *bd, rtems_bdbuf_waiters *waiters)
1107{
1108  rtems_bdbuf_group_obtain (bd);
1109  ++bd->waiters;
1110  rtems_bdbuf_anonymous_wait (waiters);
1111  --bd->waiters;
1112  rtems_bdbuf_group_release (bd);
1113}
1114
1115/**
1116 * Wake a blocked resource. The resource has a counter that lets us know if
1117 * there are any waiters.
1118 */
1119static void
1120rtems_bdbuf_wake (rtems_bdbuf_waiters *waiters)
1121{
1122  if (waiters->count > 0)
1123  {
1124#if defined(RTEMS_BDBUF_USE_PTHREAD)
1125    int eno = pthread_cond_broadcast (&waiters->cond_var);
1126    if (eno != 0)
1127      rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_CV_BROADCAST);
1128#else
1129    rtems_status_code sc = rtems_semaphore_flush (waiters->sema);
1130    if (sc != RTEMS_SUCCESSFUL)
1131      rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_CACHE_WAKE);
1132#endif
1133  }
1134}
1135
1136static void
1137rtems_bdbuf_wake_swapper (void)
1138{
1139  rtems_status_code sc = rtems_event_send (bdbuf_cache.swapout,
1140                                           RTEMS_BDBUF_SWAPOUT_SYNC);
1141  if (sc != RTEMS_SUCCESSFUL)
1142    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_SO_WAKE_1);
1143}
1144
1145static bool
1146rtems_bdbuf_has_buffer_waiters (void)
1147{
1148  return bdbuf_cache.buffer_waiters.count;
1149}
1150
1151static void
1152rtems_bdbuf_remove_from_tree (rtems_bdbuf_buffer *bd)
1153{
1154  if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
1155    rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_TREE_RM);
1156}
1157
1158static void
1159rtems_bdbuf_remove_from_tree_and_lru_list (rtems_bdbuf_buffer *bd)
1160{
1161  switch (bd->state)
1162  {
1163    case RTEMS_BDBUF_STATE_FREE:
1164      break;
1165    case RTEMS_BDBUF_STATE_CACHED:
1166      rtems_bdbuf_remove_from_tree (bd);
1167      break;
1168    default:
1169      rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_10);
1170  }
1171
1172  rtems_chain_extract_unprotected (&bd->link);
1173}
1174
1175static void
1176rtems_bdbuf_make_free_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
1177{
1178  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_FREE);
1179  rtems_chain_prepend_unprotected (&bdbuf_cache.lru, &bd->link);
1180}
1181
1182static void
1183rtems_bdbuf_make_empty (rtems_bdbuf_buffer *bd)
1184{
1185  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_EMPTY);
1186}
1187
1188static void
1189rtems_bdbuf_make_cached_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
1190{
1191  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_CACHED);
1192  rtems_chain_append_unprotected (&bdbuf_cache.lru, &bd->link);
1193}
1194
1195static void
1196rtems_bdbuf_discard_buffer (rtems_bdbuf_buffer *bd)
1197{
1198  rtems_bdbuf_make_empty (bd);
1199
1200  if (bd->waiters == 0)
1201  {
1202    rtems_bdbuf_remove_from_tree (bd);
1203    rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1204  }
1205}
1206
1207static void
1208rtems_bdbuf_add_to_modified_list_after_access (rtems_bdbuf_buffer *bd)
1209{
1210  if (bdbuf_cache.sync_active && bdbuf_cache.sync_device == bd->dd)
1211  {
1212    rtems_bdbuf_unlock_cache ();
1213
1214    /*
1215     * Wait for the sync lock.
1216     */
1217    rtems_bdbuf_lock_sync ();
1218
1219    rtems_bdbuf_unlock_sync ();
1220    rtems_bdbuf_lock_cache ();
1221  }
1222
1223  /*
1224   * Only the first modified release sets the timer and any further user
1225   * accesses do not change the timer value which should move down. This
1226   * assumes the user's hold of the buffer is much less than the time on the
1227   * modified list. Resetting the timer on each access which could result in a
1228   * buffer never getting to 0 and never being forced onto disk. This raises a
1229   * difficult question. Is a snapshot of a block that is changing better than
1230   * nothing being written? We have tended to think we should hold changes for
1231   * only a specific period of time even if still changing and get onto disk
1232   * and letting the file system try and recover this position if it can.
1233   */
1234  if (bd->state == RTEMS_BDBUF_STATE_ACCESS_CACHED
1235        || bd->state == RTEMS_BDBUF_STATE_ACCESS_EMPTY)
1236    bd->hold_timer = bdbuf_config.swap_block_hold;
1237
1238  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_MODIFIED);
1239  rtems_chain_append_unprotected (&bdbuf_cache.modified, &bd->link);
1240
1241  if (bd->waiters)
1242    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1243  else if (rtems_bdbuf_has_buffer_waiters ())
1244    rtems_bdbuf_wake_swapper ();
1245}
1246
1247static void
1248rtems_bdbuf_add_to_lru_list_after_access (rtems_bdbuf_buffer *bd)
1249{
1250  rtems_bdbuf_group_release (bd);
1251  rtems_bdbuf_make_cached_and_add_to_lru_list (bd);
1252
1253  if (bd->waiters)
1254    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1255  else
1256    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1257}
1258
1259/**
1260 * Compute the number of BDs per group for a given buffer size.
1261 *
1262 * @param size The buffer size. It can be any size and we scale up.
1263 */
1264static size_t
1265rtems_bdbuf_bds_per_group (size_t size)
1266{
1267  size_t bufs_per_size;
1268  size_t bds_per_size;
1269
1270  if (size > bdbuf_config.buffer_max)
1271    return 0;
1272
1273  bufs_per_size = ((size - 1) / bdbuf_config.buffer_min) + 1;
1274
1275  for (bds_per_size = 1;
1276       bds_per_size < bufs_per_size;
1277       bds_per_size <<= 1)
1278    ;
1279
1280  return bdbuf_cache.max_bds_per_group / bds_per_size;
1281}
1282
1283static void
1284rtems_bdbuf_discard_buffer_after_access (rtems_bdbuf_buffer *bd)
1285{
1286  rtems_bdbuf_group_release (bd);
1287  rtems_bdbuf_discard_buffer (bd);
1288
1289  if (bd->waiters)
1290    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1291  else
1292    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1293}
1294
1295/**
1296 * Reallocate a group. The BDs currently allocated in the group are removed
1297 * from the ALV tree and any lists then the new BD's are prepended to the ready
1298 * list of the cache.
1299 *
1300 * @param group The group to reallocate.
1301 * @param new_bds_per_group The new count of BDs per group.
1302 * @return A buffer of this group.
1303 */
1304static rtems_bdbuf_buffer *
1305rtems_bdbuf_group_realloc (rtems_bdbuf_group* group, size_t new_bds_per_group)
1306{
1307  rtems_bdbuf_buffer* bd;
1308  size_t              b;
1309  size_t              bufs_per_bd;
1310
1311  if (rtems_bdbuf_tracer)
1312    printf ("bdbuf:realloc: %tu: %zd -> %zd\n",
1313            group - bdbuf_cache.groups, group->bds_per_group,
1314            new_bds_per_group);
1315
1316  bufs_per_bd = bdbuf_cache.max_bds_per_group / group->bds_per_group;
1317
1318  for (b = 0, bd = group->bdbuf;
1319       b < group->bds_per_group;
1320       b++, bd += bufs_per_bd)
1321    rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1322
1323  group->bds_per_group = new_bds_per_group;
1324  bufs_per_bd = bdbuf_cache.max_bds_per_group / new_bds_per_group;
1325
1326  for (b = 1, bd = group->bdbuf + bufs_per_bd;
1327       b < group->bds_per_group;
1328       b++, bd += bufs_per_bd)
1329    rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1330
1331  if (b > 1)
1332    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1333
1334  return group->bdbuf;
1335}
1336
1337static void
1338rtems_bdbuf_setup_empty_buffer (rtems_bdbuf_buffer *bd,
1339                                rtems_disk_device  *dd,
1340                                rtems_blkdev_bnum   block)
1341{
1342  bd->dd        = dd ;
1343  bd->block     = block;
1344  bd->avl.left  = NULL;
1345  bd->avl.right = NULL;
1346  bd->waiters   = 0;
1347
1348  if (rtems_bdbuf_avl_insert (&bdbuf_cache.tree, bd) != 0)
1349    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_RECYCLE);
1350
1351  rtems_bdbuf_make_empty (bd);
1352}
1353
1354static rtems_bdbuf_buffer *
1355rtems_bdbuf_get_buffer_from_lru_list (rtems_disk_device *dd,
1356                                      rtems_blkdev_bnum  block)
1357{
1358  rtems_chain_node *node = rtems_chain_first (&bdbuf_cache.lru);
1359
1360  while (!rtems_chain_is_tail (&bdbuf_cache.lru, node))
1361  {
1362    rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
1363    rtems_bdbuf_buffer *empty_bd = NULL;
1364
1365    if (rtems_bdbuf_tracer)
1366      printf ("bdbuf:next-bd: %tu (%td:%" PRId32 ") %zd -> %zd\n",
1367              bd - bdbuf_cache.bds,
1368              bd->group - bdbuf_cache.groups, bd->group->users,
1369              bd->group->bds_per_group, dd->bds_per_group);
1370
1371    /*
1372     * If nobody waits for this BD, we may recycle it.
1373     */
1374    if (bd->waiters == 0)
1375    {
1376      if (bd->group->bds_per_group == dd->bds_per_group)
1377      {
1378        rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1379
1380        empty_bd = bd;
1381      }
1382      else if (bd->group->users == 0)
1383        empty_bd = rtems_bdbuf_group_realloc (bd->group, dd->bds_per_group);
1384    }
1385
1386    if (empty_bd != NULL)
1387    {
1388      rtems_bdbuf_setup_empty_buffer (empty_bd, dd, block);
1389
1390      return empty_bd;
1391    }
1392
1393    node = rtems_chain_next (node);
1394  }
1395
1396  return NULL;
1397}
1398
1399static rtems_status_code
1400rtems_bdbuf_create_task(
1401  rtems_name name,
1402  rtems_task_priority priority,
1403  rtems_task_priority default_priority,
1404  rtems_id *id
1405)
1406{
1407  rtems_status_code sc;
1408  size_t stack_size = bdbuf_config.task_stack_size ?
1409    bdbuf_config.task_stack_size : RTEMS_BDBUF_TASK_STACK_SIZE_DEFAULT;
1410
1411  priority = priority != 0 ? priority : default_priority;
1412
1413  sc = rtems_task_create (name,
1414                          priority,
1415                          stack_size,
1416                          RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
1417                          RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT,
1418                          id);
1419
1420  return sc;
1421}
1422
1423static rtems_bdbuf_swapout_transfer*
1424rtems_bdbuf_swapout_transfer_alloc (void)
1425{
1426  /*
1427   * @note chrisj The rtems_blkdev_request and the array at the end is a hack.
1428   * I am disappointment at finding code like this in RTEMS. The request should
1429   * have been a rtems_chain_control. Simple, fast and less storage as the node
1430   * is already part of the buffer structure.
1431   */
1432  size_t transfer_size = sizeof (rtems_bdbuf_swapout_transfer)
1433    + (bdbuf_config.max_write_blocks * sizeof (rtems_blkdev_sg_buffer));
1434  return calloc (1, transfer_size);
1435}
1436
1437static void
1438rtems_bdbuf_transfer_done (rtems_blkdev_request* req, rtems_status_code status);
1439
1440static void
1441rtems_bdbuf_swapout_transfer_init (rtems_bdbuf_swapout_transfer* transfer,
1442                                   rtems_id id)
1443{
1444  rtems_chain_initialize_empty (&transfer->bds);
1445  transfer->dd = BDBUF_INVALID_DEV;
1446  transfer->syncing = false;
1447  transfer->write_req.req = RTEMS_BLKDEV_REQ_WRITE;
1448  transfer->write_req.done = rtems_bdbuf_transfer_done;
1449  transfer->write_req.io_task = id;
1450}
1451
1452static size_t
1453rtems_bdbuf_swapout_worker_size (void)
1454{
1455  return sizeof (rtems_bdbuf_swapout_worker)
1456    + (bdbuf_config.max_write_blocks * sizeof (rtems_blkdev_sg_buffer));
1457}
1458
1459static rtems_task
1460rtems_bdbuf_swapout_worker_task (rtems_task_argument arg);
1461
1462static rtems_status_code
1463rtems_bdbuf_swapout_workers_create (void)
1464{
1465  rtems_status_code  sc;
1466  size_t             w;
1467  size_t             worker_size;
1468  char              *worker_current;
1469
1470  worker_size = rtems_bdbuf_swapout_worker_size ();
1471  worker_current = calloc (1, bdbuf_config.swapout_workers * worker_size);
1472  sc = worker_current != NULL ? RTEMS_SUCCESSFUL : RTEMS_NO_MEMORY;
1473
1474  bdbuf_cache.swapout_workers = (rtems_bdbuf_swapout_worker *) worker_current;
1475
1476  for (w = 0;
1477       sc == RTEMS_SUCCESSFUL && w < bdbuf_config.swapout_workers;
1478       w++, worker_current += worker_size)
1479  {
1480    rtems_bdbuf_swapout_worker *worker = (rtems_bdbuf_swapout_worker *) worker_current;
1481
1482    sc = rtems_bdbuf_create_task (rtems_build_name('B', 'D', 'o', 'a' + w),
1483                                  bdbuf_config.swapout_worker_priority,
1484                                  RTEMS_BDBUF_SWAPOUT_WORKER_TASK_PRIORITY_DEFAULT,
1485                                  &worker->id);
1486    if (sc == RTEMS_SUCCESSFUL)
1487    {
1488      rtems_bdbuf_swapout_transfer_init (&worker->transfer, worker->id);
1489
1490      rtems_chain_append_unprotected (&bdbuf_cache.swapout_free_workers, &worker->link);
1491      worker->enabled = true;
1492
1493      sc = rtems_task_start (worker->id,
1494                             rtems_bdbuf_swapout_worker_task,
1495                             (rtems_task_argument) worker);
1496    }
1497  }
1498
1499  return sc;
1500}
1501
1502static size_t
1503rtems_bdbuf_read_request_size (uint32_t transfer_count)
1504{
1505  return sizeof (rtems_blkdev_request)
1506    + sizeof (rtems_blkdev_sg_buffer) * transfer_count;
1507}
1508
1509static rtems_status_code
1510rtems_bdbuf_do_init (void)
1511{
1512  rtems_bdbuf_group*  group;
1513  rtems_bdbuf_buffer* bd;
1514  uint8_t*            buffer;
1515  size_t              b;
1516  rtems_status_code   sc;
1517
1518  if (rtems_bdbuf_tracer)
1519    printf ("bdbuf:init\n");
1520
1521  if (rtems_interrupt_is_in_progress())
1522    return RTEMS_CALLED_FROM_ISR;
1523
1524  /*
1525   * Check the configuration table values.
1526   */
1527
1528  if ((bdbuf_config.buffer_max % bdbuf_config.buffer_min) != 0)
1529    return RTEMS_INVALID_NUMBER;
1530
1531  if (rtems_bdbuf_read_request_size (bdbuf_config.max_read_ahead_blocks)
1532      > RTEMS_MINIMUM_STACK_SIZE / 8U)
1533    return RTEMS_INVALID_NUMBER;
1534
1535  bdbuf_cache.sync_device = BDBUF_INVALID_DEV;
1536
1537  rtems_chain_initialize_empty (&bdbuf_cache.swapout_free_workers);
1538  rtems_chain_initialize_empty (&bdbuf_cache.lru);
1539  rtems_chain_initialize_empty (&bdbuf_cache.modified);
1540  rtems_chain_initialize_empty (&bdbuf_cache.sync);
1541  rtems_chain_initialize_empty (&bdbuf_cache.read_ahead_chain);
1542
1543  /*
1544   * Create the locks for the cache.
1545   */
1546
1547  sc = rtems_bdbuf_lock_create (rtems_build_name ('B', 'D', 'C', 'l'),
1548                                &bdbuf_cache.lock);
1549  if (sc != RTEMS_SUCCESSFUL)
1550    goto error;
1551
1552  rtems_bdbuf_lock_cache ();
1553
1554  sc = rtems_bdbuf_lock_create (rtems_build_name ('B', 'D', 'C', 's'),
1555                                &bdbuf_cache.sync_lock);
1556  if (sc != RTEMS_SUCCESSFUL)
1557    goto error;
1558
1559  sc = rtems_bdbuf_waiter_create (rtems_build_name ('B', 'D', 'C', 'a'),
1560                                  &bdbuf_cache.access_waiters);
1561  if (sc != RTEMS_SUCCESSFUL)
1562    goto error;
1563
1564  sc = rtems_bdbuf_waiter_create (rtems_build_name ('B', 'D', 'C', 't'),
1565                                  &bdbuf_cache.transfer_waiters);
1566  if (sc != RTEMS_SUCCESSFUL)
1567    goto error;
1568
1569  sc = rtems_bdbuf_waiter_create (rtems_build_name ('B', 'D', 'C', 'b'),
1570                                  &bdbuf_cache.buffer_waiters);
1571  if (sc != RTEMS_SUCCESSFUL)
1572    goto error;
1573
1574  /*
1575   * Compute the various number of elements in the cache.
1576   */
1577  bdbuf_cache.buffer_min_count =
1578    bdbuf_config.size / bdbuf_config.buffer_min;
1579  bdbuf_cache.max_bds_per_group =
1580    bdbuf_config.buffer_max / bdbuf_config.buffer_min;
1581  bdbuf_cache.group_count =
1582    bdbuf_cache.buffer_min_count / bdbuf_cache.max_bds_per_group;
1583
1584  /*
1585   * Allocate the memory for the buffer descriptors.
1586   */
1587  bdbuf_cache.bds = calloc (sizeof (rtems_bdbuf_buffer),
1588                            bdbuf_cache.buffer_min_count);
1589  if (!bdbuf_cache.bds)
1590    goto error;
1591
1592  /*
1593   * Allocate the memory for the buffer descriptors.
1594   */
1595  bdbuf_cache.groups = calloc (sizeof (rtems_bdbuf_group),
1596                               bdbuf_cache.group_count);
1597  if (!bdbuf_cache.groups)
1598    goto error;
1599
1600  /*
1601   * Allocate memory for buffer memory. The buffer memory will be cache
1602   * aligned. It is possible to free the memory allocated by rtems_memalign()
1603   * with free(). Return 0 if allocated.
1604   *
1605   * The memory allocate allows a
1606   */
1607  if (rtems_memalign ((void **) &bdbuf_cache.buffers,
1608                      rtems_cache_get_data_line_size(),
1609                      bdbuf_cache.buffer_min_count * bdbuf_config.buffer_min) != 0)
1610    goto error;
1611
1612  /*
1613   * The cache is empty after opening so we need to add all the buffers to it
1614   * and initialise the groups.
1615   */
1616  for (b = 0, group = bdbuf_cache.groups,
1617         bd = bdbuf_cache.bds, buffer = bdbuf_cache.buffers;
1618       b < bdbuf_cache.buffer_min_count;
1619       b++, bd++, buffer += bdbuf_config.buffer_min)
1620  {
1621    bd->dd    = BDBUF_INVALID_DEV;
1622    bd->group  = group;
1623    bd->buffer = buffer;
1624
1625    rtems_chain_append_unprotected (&bdbuf_cache.lru, &bd->link);
1626
1627    if ((b % bdbuf_cache.max_bds_per_group) ==
1628        (bdbuf_cache.max_bds_per_group - 1))
1629      group++;
1630  }
1631
1632  for (b = 0,
1633         group = bdbuf_cache.groups,
1634         bd = bdbuf_cache.bds;
1635       b < bdbuf_cache.group_count;
1636       b++,
1637         group++,
1638         bd += bdbuf_cache.max_bds_per_group)
1639  {
1640    group->bds_per_group = bdbuf_cache.max_bds_per_group;
1641    group->bdbuf = bd;
1642  }
1643
1644  /*
1645   * Create and start swapout task.
1646   */
1647
1648  bdbuf_cache.swapout_transfer = rtems_bdbuf_swapout_transfer_alloc ();
1649  if (!bdbuf_cache.swapout_transfer)
1650    goto error;
1651
1652  bdbuf_cache.swapout_enabled = true;
1653
1654  sc = rtems_bdbuf_create_task (rtems_build_name('B', 'S', 'W', 'P'),
1655                                bdbuf_config.swapout_priority,
1656                                RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT,
1657                                &bdbuf_cache.swapout);
1658  if (sc != RTEMS_SUCCESSFUL)
1659    goto error;
1660
1661  rtems_bdbuf_swapout_transfer_init (bdbuf_cache.swapout_transfer, bdbuf_cache.swapout);
1662
1663  sc = rtems_task_start (bdbuf_cache.swapout,
1664                         rtems_bdbuf_swapout_task,
1665                         (rtems_task_argument) bdbuf_cache.swapout_transfer);
1666  if (sc != RTEMS_SUCCESSFUL)
1667    goto error;
1668
1669  if (bdbuf_config.swapout_workers > 0)
1670  {
1671    sc = rtems_bdbuf_swapout_workers_create ();
1672    if (sc != RTEMS_SUCCESSFUL)
1673      goto error;
1674  }
1675
1676  if (bdbuf_config.max_read_ahead_blocks > 0)
1677  {
1678    bdbuf_cache.read_ahead_enabled = true;
1679    sc = rtems_bdbuf_create_task (rtems_build_name('B', 'R', 'D', 'A'),
1680                                  bdbuf_config.read_ahead_priority,
1681                                  RTEMS_BDBUF_READ_AHEAD_TASK_PRIORITY_DEFAULT,
1682                                  &bdbuf_cache.read_ahead_task);
1683    if (sc != RTEMS_SUCCESSFUL)
1684      goto error;
1685
1686    sc = rtems_task_start (bdbuf_cache.read_ahead_task,
1687                           rtems_bdbuf_read_ahead_task,
1688                           0);
1689    if (sc != RTEMS_SUCCESSFUL)
1690      goto error;
1691  }
1692
1693  rtems_bdbuf_unlock_cache ();
1694
1695  return RTEMS_SUCCESSFUL;
1696
1697error:
1698
1699  if (bdbuf_cache.read_ahead_task != 0)
1700    rtems_task_delete (bdbuf_cache.read_ahead_task);
1701
1702  if (bdbuf_cache.swapout != 0)
1703    rtems_task_delete (bdbuf_cache.swapout);
1704
1705  if (bdbuf_cache.swapout_workers)
1706  {
1707    char   *worker_current = (char *) bdbuf_cache.swapout_workers;
1708    size_t  worker_size = rtems_bdbuf_swapout_worker_size ();
1709    size_t  w;
1710
1711    for (w = 0;
1712         w < bdbuf_config.swapout_workers;
1713         w++, worker_current += worker_size)
1714    {
1715      rtems_bdbuf_swapout_worker *worker = (rtems_bdbuf_swapout_worker *) worker_current;
1716
1717      if (worker->id != 0) {
1718        rtems_task_delete (worker->id);
1719      }
1720    }
1721  }
1722
1723  free (bdbuf_cache.buffers);
1724  free (bdbuf_cache.groups);
1725  free (bdbuf_cache.bds);
1726  free (bdbuf_cache.swapout_transfer);
1727  free (bdbuf_cache.swapout_workers);
1728
1729  rtems_bdbuf_waiter_delete (&bdbuf_cache.buffer_waiters);
1730  rtems_bdbuf_waiter_delete (&bdbuf_cache.access_waiters);
1731  rtems_bdbuf_waiter_delete (&bdbuf_cache.transfer_waiters);
1732  rtems_bdbuf_lock_delete (&bdbuf_cache.sync_lock);
1733
1734  if (bdbuf_cache.lock != 0)
1735  {
1736    rtems_bdbuf_unlock_cache ();
1737    rtems_bdbuf_lock_delete (&bdbuf_cache.lock);
1738  }
1739
1740  return RTEMS_UNSATISFIED;
1741}
1742
1743static void
1744rtems_bdbuf_init_once (void)
1745{
1746  bdbuf_cache.init_status = rtems_bdbuf_do_init();
1747}
1748
1749rtems_status_code
1750rtems_bdbuf_init (void)
1751{
1752  int eno = pthread_once (&rtems_bdbuf_once_state, rtems_bdbuf_init_once);
1753
1754  if (eno != 0)
1755    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_ONCE);
1756
1757  return bdbuf_cache.init_status;
1758}
1759
1760static void
1761rtems_bdbuf_wait_for_event (rtems_event_set event)
1762{
1763  rtems_status_code sc = RTEMS_SUCCESSFUL;
1764  rtems_event_set   out = 0;
1765
1766  sc = rtems_event_receive (event,
1767                            RTEMS_EVENT_ALL | RTEMS_WAIT,
1768                            RTEMS_NO_TIMEOUT,
1769                            &out);
1770
1771  if (sc != RTEMS_SUCCESSFUL || out != event)
1772    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_WAIT_EVNT);
1773}
1774
1775static void
1776rtems_bdbuf_wait_for_transient_event (void)
1777{
1778  rtems_status_code sc = RTEMS_SUCCESSFUL;
1779
1780  sc = rtems_event_transient_receive (RTEMS_WAIT, RTEMS_NO_TIMEOUT);
1781  if (sc != RTEMS_SUCCESSFUL)
1782    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_WAIT_TRANS_EVNT);
1783}
1784
1785static void
1786rtems_bdbuf_wait_for_access (rtems_bdbuf_buffer *bd)
1787{
1788  while (true)
1789  {
1790    switch (bd->state)
1791    {
1792      case RTEMS_BDBUF_STATE_MODIFIED:
1793        rtems_bdbuf_group_release (bd);
1794        /* Fall through */
1795      case RTEMS_BDBUF_STATE_CACHED:
1796        rtems_chain_extract_unprotected (&bd->link);
1797        /* Fall through */
1798      case RTEMS_BDBUF_STATE_EMPTY:
1799        return;
1800      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1801      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1802      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1803      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1804        rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
1805        break;
1806      case RTEMS_BDBUF_STATE_SYNC:
1807      case RTEMS_BDBUF_STATE_TRANSFER:
1808      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1809        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1810        break;
1811      default:
1812        rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_7);
1813    }
1814  }
1815}
1816
1817static void
1818rtems_bdbuf_request_sync_for_modified_buffer (rtems_bdbuf_buffer *bd)
1819{
1820  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
1821  rtems_chain_extract_unprotected (&bd->link);
1822  rtems_chain_append_unprotected (&bdbuf_cache.sync, &bd->link);
1823  rtems_bdbuf_wake_swapper ();
1824}
1825
1826/**
1827 * @brief Waits until the buffer is ready for recycling.
1828 *
1829 * @retval @c true Buffer is valid and may be recycled.
1830 * @retval @c false Buffer is invalid and has to searched again.
1831 */
1832static bool
1833rtems_bdbuf_wait_for_recycle (rtems_bdbuf_buffer *bd)
1834{
1835  while (true)
1836  {
1837    switch (bd->state)
1838    {
1839      case RTEMS_BDBUF_STATE_FREE:
1840        return true;
1841      case RTEMS_BDBUF_STATE_MODIFIED:
1842        rtems_bdbuf_request_sync_for_modified_buffer (bd);
1843        break;
1844      case RTEMS_BDBUF_STATE_CACHED:
1845      case RTEMS_BDBUF_STATE_EMPTY:
1846        if (bd->waiters == 0)
1847          return true;
1848        else
1849        {
1850          /*
1851           * It is essential that we wait here without a special wait count and
1852           * without the group in use.  Otherwise we could trigger a wait ping
1853           * pong with another recycle waiter.  The state of the buffer is
1854           * arbitrary afterwards.
1855           */
1856          rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
1857          return false;
1858        }
1859      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1860      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1861      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1862      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1863        rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
1864        break;
1865      case RTEMS_BDBUF_STATE_SYNC:
1866      case RTEMS_BDBUF_STATE_TRANSFER:
1867      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1868        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1869        break;
1870      default:
1871        rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_8);
1872    }
1873  }
1874}
1875
1876static void
1877rtems_bdbuf_wait_for_sync_done (rtems_bdbuf_buffer *bd)
1878{
1879  while (true)
1880  {
1881    switch (bd->state)
1882    {
1883      case RTEMS_BDBUF_STATE_CACHED:
1884      case RTEMS_BDBUF_STATE_EMPTY:
1885      case RTEMS_BDBUF_STATE_MODIFIED:
1886      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1887      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1888      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1889      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1890        return;
1891      case RTEMS_BDBUF_STATE_SYNC:
1892      case RTEMS_BDBUF_STATE_TRANSFER:
1893      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1894        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1895        break;
1896      default:
1897        rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_9);
1898    }
1899  }
1900}
1901
1902static void
1903rtems_bdbuf_wait_for_buffer (void)
1904{
1905  if (!rtems_chain_is_empty (&bdbuf_cache.modified))
1906    rtems_bdbuf_wake_swapper ();
1907
1908  rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
1909}
1910
1911static void
1912rtems_bdbuf_sync_after_access (rtems_bdbuf_buffer *bd)
1913{
1914  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
1915
1916  rtems_chain_append_unprotected (&bdbuf_cache.sync, &bd->link);
1917
1918  if (bd->waiters)
1919    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1920
1921  rtems_bdbuf_wake_swapper ();
1922  rtems_bdbuf_wait_for_sync_done (bd);
1923
1924  /*
1925   * We may have created a cached or empty buffer which may be recycled.
1926   */
1927  if (bd->waiters == 0
1928        && (bd->state == RTEMS_BDBUF_STATE_CACHED
1929          || bd->state == RTEMS_BDBUF_STATE_EMPTY))
1930  {
1931    if (bd->state == RTEMS_BDBUF_STATE_EMPTY)
1932    {
1933      rtems_bdbuf_remove_from_tree (bd);
1934      rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1935    }
1936    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1937  }
1938}
1939
1940static rtems_bdbuf_buffer *
1941rtems_bdbuf_get_buffer_for_read_ahead (rtems_disk_device *dd,
1942                                       rtems_blkdev_bnum  block)
1943{
1944  rtems_bdbuf_buffer *bd = NULL;
1945
1946  bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dd, block);
1947
1948  if (bd == NULL)
1949  {
1950    bd = rtems_bdbuf_get_buffer_from_lru_list (dd, block);
1951
1952    if (bd != NULL)
1953      rtems_bdbuf_group_obtain (bd);
1954  }
1955  else
1956    /*
1957     * The buffer is in the cache.  So it is already available or in use, and
1958     * thus no need for a read ahead.
1959     */
1960    bd = NULL;
1961
1962  return bd;
1963}
1964
1965static rtems_bdbuf_buffer *
1966rtems_bdbuf_get_buffer_for_access (rtems_disk_device *dd,
1967                                   rtems_blkdev_bnum  block)
1968{
1969  rtems_bdbuf_buffer *bd = NULL;
1970
1971  do
1972  {
1973    bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dd, block);
1974
1975    if (bd != NULL)
1976    {
1977      if (bd->group->bds_per_group != dd->bds_per_group)
1978      {
1979        if (rtems_bdbuf_wait_for_recycle (bd))
1980        {
1981          rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1982          rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1983          rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1984        }
1985        bd = NULL;
1986      }
1987    }
1988    else
1989    {
1990      bd = rtems_bdbuf_get_buffer_from_lru_list (dd, block);
1991
1992      if (bd == NULL)
1993        rtems_bdbuf_wait_for_buffer ();
1994    }
1995  }
1996  while (bd == NULL);
1997
1998  rtems_bdbuf_wait_for_access (bd);
1999  rtems_bdbuf_group_obtain (bd);
2000
2001  return bd;
2002}
2003
2004static rtems_status_code
2005rtems_bdbuf_get_media_block (const rtems_disk_device *dd,
2006                             rtems_blkdev_bnum        block,
2007                             rtems_blkdev_bnum       *media_block_ptr)
2008{
2009  rtems_status_code sc = RTEMS_SUCCESSFUL;
2010
2011  if (block < dd->block_count)
2012  {
2013    /*
2014     * Compute the media block number. Drivers work with media block number not
2015     * the block number a BD may have as this depends on the block size set by
2016     * the user.
2017     */
2018    *media_block_ptr = rtems_bdbuf_media_block (dd, block) + dd->start;
2019  }
2020  else
2021  {
2022    sc = RTEMS_INVALID_ID;
2023  }
2024
2025  return sc;
2026}
2027
2028rtems_status_code
2029rtems_bdbuf_get (rtems_disk_device   *dd,
2030                 rtems_blkdev_bnum    block,
2031                 rtems_bdbuf_buffer **bd_ptr)
2032{
2033  rtems_status_code   sc = RTEMS_SUCCESSFUL;
2034  rtems_bdbuf_buffer *bd = NULL;
2035  rtems_blkdev_bnum   media_block;
2036
2037  rtems_bdbuf_lock_cache ();
2038
2039  sc = rtems_bdbuf_get_media_block (dd, block, &media_block);
2040  if (sc == RTEMS_SUCCESSFUL)
2041  {
2042    /*
2043     * Print the block index relative to the physical disk.
2044     */
2045    if (rtems_bdbuf_tracer)
2046      printf ("bdbuf:get: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
2047              media_block, block, (unsigned) dd->dev);
2048
2049    bd = rtems_bdbuf_get_buffer_for_access (dd, media_block);
2050
2051    switch (bd->state)
2052    {
2053      case RTEMS_BDBUF_STATE_CACHED:
2054        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
2055        break;
2056      case RTEMS_BDBUF_STATE_EMPTY:
2057        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_EMPTY);
2058        break;
2059      case RTEMS_BDBUF_STATE_MODIFIED:
2060        /*
2061         * To get a modified buffer could be considered a bug in the caller
2062         * because you should not be getting an already modified buffer but
2063         * user may have modified a byte in a block then decided to seek the
2064         * start and write the whole block and the file system will have no
2065         * record of this so just gets the block to fill.
2066         */
2067        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
2068        break;
2069      default:
2070        rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_2);
2071        break;
2072    }
2073
2074    if (rtems_bdbuf_tracer)
2075    {
2076      rtems_bdbuf_show_users ("get", bd);
2077      rtems_bdbuf_show_usage ();
2078    }
2079  }
2080
2081  rtems_bdbuf_unlock_cache ();
2082
2083  *bd_ptr = bd;
2084
2085  return sc;
2086}
2087
2088/**
2089 * Call back handler called by the low level driver when the transfer has
2090 * completed. This function may be invoked from interrupt handler.
2091 *
2092 * @param arg Arbitrary argument specified in block device request
2093 *            structure (in this case - pointer to the appropriate
2094 *            block device request structure).
2095 * @param status I/O completion status
2096 */
2097static void
2098rtems_bdbuf_transfer_done (rtems_blkdev_request* req, rtems_status_code status)
2099{
2100  req->status = status;
2101
2102  rtems_event_transient_send (req->io_task);
2103}
2104
2105static rtems_status_code
2106rtems_bdbuf_execute_transfer_request (rtems_disk_device    *dd,
2107                                      rtems_blkdev_request *req,
2108                                      bool                  cache_locked)
2109{
2110  rtems_status_code sc = RTEMS_SUCCESSFUL;
2111  uint32_t transfer_index = 0;
2112  bool wake_transfer_waiters = false;
2113  bool wake_buffer_waiters = false;
2114
2115  if (cache_locked)
2116    rtems_bdbuf_unlock_cache ();
2117
2118  /* The return value will be ignored for transfer requests */
2119  dd->ioctl (dd->phys_dev, RTEMS_BLKIO_REQUEST, req);
2120
2121  /* Wait for transfer request completion */
2122  rtems_bdbuf_wait_for_transient_event ();
2123  sc = req->status;
2124
2125  rtems_bdbuf_lock_cache ();
2126
2127  /* Statistics */
2128  if (req->req == RTEMS_BLKDEV_REQ_READ)
2129  {
2130    dd->stats.read_blocks += req->bufnum;
2131    if (sc != RTEMS_SUCCESSFUL)
2132      ++dd->stats.read_errors;
2133  }
2134  else
2135  {
2136    dd->stats.write_blocks += req->bufnum;
2137    ++dd->stats.write_transfers;
2138    if (sc != RTEMS_SUCCESSFUL)
2139      ++dd->stats.write_errors;
2140  }
2141
2142  for (transfer_index = 0; transfer_index < req->bufnum; ++transfer_index)
2143  {
2144    rtems_bdbuf_buffer *bd = req->bufs [transfer_index].user;
2145    bool waiters = bd->waiters;
2146
2147    if (waiters)
2148      wake_transfer_waiters = true;
2149    else
2150      wake_buffer_waiters = true;
2151
2152    rtems_bdbuf_group_release (bd);
2153
2154    if (sc == RTEMS_SUCCESSFUL && bd->state == RTEMS_BDBUF_STATE_TRANSFER)
2155      rtems_bdbuf_make_cached_and_add_to_lru_list (bd);
2156    else
2157      rtems_bdbuf_discard_buffer (bd);
2158
2159    if (rtems_bdbuf_tracer)
2160      rtems_bdbuf_show_users ("transfer", bd);
2161  }
2162
2163  if (wake_transfer_waiters)
2164    rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
2165
2166  if (wake_buffer_waiters)
2167    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
2168
2169  if (!cache_locked)
2170    rtems_bdbuf_unlock_cache ();
2171
2172  if (sc == RTEMS_SUCCESSFUL || sc == RTEMS_UNSATISFIED)
2173    return sc;
2174  else
2175    return RTEMS_IO_ERROR;
2176}
2177
2178static rtems_status_code
2179rtems_bdbuf_execute_read_request (rtems_disk_device  *dd,
2180                                  rtems_bdbuf_buffer *bd,
2181                                  uint32_t            transfer_count)
2182{
2183  rtems_blkdev_request *req = NULL;
2184  rtems_blkdev_bnum media_block = bd->block;
2185  uint32_t media_blocks_per_block = dd->media_blocks_per_block;
2186  uint32_t block_size = dd->block_size;
2187  uint32_t transfer_index = 1;
2188
2189  /*
2190   * TODO: This type of request structure is wrong and should be removed.
2191   */
2192#define bdbuf_alloc(size) __builtin_alloca (size)
2193
2194  req = bdbuf_alloc (rtems_bdbuf_read_request_size (transfer_count));
2195
2196  req->req = RTEMS_BLKDEV_REQ_READ;
2197  req->done = rtems_bdbuf_transfer_done;
2198  req->io_task = rtems_task_self ();
2199  req->bufnum = 0;
2200
2201  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
2202
2203  req->bufs [0].user   = bd;
2204  req->bufs [0].block  = media_block;
2205  req->bufs [0].length = block_size;
2206  req->bufs [0].buffer = bd->buffer;
2207
2208  if (rtems_bdbuf_tracer)
2209    rtems_bdbuf_show_users ("read", bd);
2210
2211  while (transfer_index < transfer_count)
2212  {
2213    media_block += media_blocks_per_block;
2214
2215    bd = rtems_bdbuf_get_buffer_for_read_ahead (dd, media_block);
2216
2217    if (bd == NULL)
2218      break;
2219
2220    rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
2221
2222    req->bufs [transfer_index].user   = bd;
2223    req->bufs [transfer_index].block  = media_block;
2224    req->bufs [transfer_index].length = block_size;
2225    req->bufs [transfer_index].buffer = bd->buffer;
2226
2227    if (rtems_bdbuf_tracer)
2228      rtems_bdbuf_show_users ("read", bd);
2229
2230    ++transfer_index;
2231  }
2232
2233  req->bufnum = transfer_index;
2234
2235  return rtems_bdbuf_execute_transfer_request (dd, req, true);
2236}
2237
2238static bool
2239rtems_bdbuf_is_read_ahead_active (const rtems_disk_device *dd)
2240{
2241  return !rtems_chain_is_node_off_chain (&dd->read_ahead.node);
2242}
2243
2244static void
2245rtems_bdbuf_read_ahead_cancel (rtems_disk_device *dd)
2246{
2247  if (rtems_bdbuf_is_read_ahead_active (dd))
2248  {
2249    rtems_chain_extract_unprotected (&dd->read_ahead.node);
2250    rtems_chain_set_off_chain (&dd->read_ahead.node);
2251  }
2252}
2253
2254static void
2255rtems_bdbuf_read_ahead_reset (rtems_disk_device *dd)
2256{
2257  rtems_bdbuf_read_ahead_cancel (dd);
2258  dd->read_ahead.trigger = RTEMS_DISK_READ_AHEAD_NO_TRIGGER;
2259}
2260
2261static void
2262rtems_bdbuf_check_read_ahead_trigger (rtems_disk_device *dd,
2263                                      rtems_blkdev_bnum  block)
2264{
2265  if (bdbuf_cache.read_ahead_task != 0
2266      && dd->read_ahead.trigger == block
2267      && !rtems_bdbuf_is_read_ahead_active (dd))
2268  {
2269    rtems_status_code sc;
2270    rtems_chain_control *chain = &bdbuf_cache.read_ahead_chain;
2271
2272    if (rtems_chain_is_empty (chain))
2273    {
2274      sc = rtems_event_send (bdbuf_cache.read_ahead_task,
2275                             RTEMS_BDBUF_READ_AHEAD_WAKE_UP);
2276      if (sc != RTEMS_SUCCESSFUL)
2277        rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_RA_WAKE_UP);
2278    }
2279
2280    rtems_chain_append_unprotected (chain, &dd->read_ahead.node);
2281  }
2282}
2283
2284static void
2285rtems_bdbuf_set_read_ahead_trigger (rtems_disk_device *dd,
2286                                    rtems_blkdev_bnum  block)
2287{
2288  if (dd->read_ahead.trigger != block)
2289  {
2290    rtems_bdbuf_read_ahead_cancel (dd);
2291    dd->read_ahead.trigger = block + 1;
2292    dd->read_ahead.next = block + 2;
2293  }
2294}
2295
2296rtems_status_code
2297rtems_bdbuf_read (rtems_disk_device   *dd,
2298                  rtems_blkdev_bnum    block,
2299                  rtems_bdbuf_buffer **bd_ptr)
2300{
2301  rtems_status_code     sc = RTEMS_SUCCESSFUL;
2302  rtems_bdbuf_buffer   *bd = NULL;
2303  rtems_blkdev_bnum     media_block;
2304
2305  rtems_bdbuf_lock_cache ();
2306
2307  sc = rtems_bdbuf_get_media_block (dd, block, &media_block);
2308  if (sc == RTEMS_SUCCESSFUL)
2309  {
2310    if (rtems_bdbuf_tracer)
2311      printf ("bdbuf:read: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
2312              media_block, block, (unsigned) dd->dev);
2313
2314    bd = rtems_bdbuf_get_buffer_for_access (dd, media_block);
2315    switch (bd->state)
2316    {
2317      case RTEMS_BDBUF_STATE_CACHED:
2318        ++dd->stats.read_hits;
2319        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
2320        break;
2321      case RTEMS_BDBUF_STATE_MODIFIED:
2322        ++dd->stats.read_hits;
2323        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
2324        break;
2325      case RTEMS_BDBUF_STATE_EMPTY:
2326        ++dd->stats.read_misses;
2327        rtems_bdbuf_set_read_ahead_trigger (dd, block);
2328        sc = rtems_bdbuf_execute_read_request (dd, bd, 1);
2329        if (sc == RTEMS_SUCCESSFUL)
2330        {
2331          rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
2332          rtems_chain_extract_unprotected (&bd->link);
2333          rtems_bdbuf_group_obtain (bd);
2334        }
2335        else
2336        {
2337          bd = NULL;
2338        }
2339        break;
2340      default:
2341        rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_4);
2342        break;
2343    }
2344
2345    rtems_bdbuf_check_read_ahead_trigger (dd, block);
2346  }
2347
2348  rtems_bdbuf_unlock_cache ();
2349
2350  *bd_ptr = bd;
2351
2352  return sc;
2353}
2354
2355static rtems_status_code
2356rtems_bdbuf_check_bd_and_lock_cache (rtems_bdbuf_buffer *bd, const char *kind)
2357{
2358  if (bd == NULL)
2359    return RTEMS_INVALID_ADDRESS;
2360  if (rtems_bdbuf_tracer)
2361  {
2362    printf ("bdbuf:%s: %" PRIu32 "\n", kind, bd->block);
2363    rtems_bdbuf_show_users (kind, bd);
2364  }
2365  rtems_bdbuf_lock_cache();
2366
2367  return RTEMS_SUCCESSFUL;
2368}
2369
2370rtems_status_code
2371rtems_bdbuf_release (rtems_bdbuf_buffer *bd)
2372{
2373  rtems_status_code sc = RTEMS_SUCCESSFUL;
2374
2375  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release");
2376  if (sc != RTEMS_SUCCESSFUL)
2377    return sc;
2378
2379  switch (bd->state)
2380  {
2381    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2382      rtems_bdbuf_add_to_lru_list_after_access (bd);
2383      break;
2384    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2385    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2386      rtems_bdbuf_discard_buffer_after_access (bd);
2387      break;
2388    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2389      rtems_bdbuf_add_to_modified_list_after_access (bd);
2390      break;
2391    default:
2392      rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_0);
2393      break;
2394  }
2395
2396  if (rtems_bdbuf_tracer)
2397    rtems_bdbuf_show_usage ();
2398
2399  rtems_bdbuf_unlock_cache ();
2400
2401  return RTEMS_SUCCESSFUL;
2402}
2403
2404rtems_status_code
2405rtems_bdbuf_release_modified (rtems_bdbuf_buffer *bd)
2406{
2407  rtems_status_code sc = RTEMS_SUCCESSFUL;
2408
2409  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release modified");
2410  if (sc != RTEMS_SUCCESSFUL)
2411    return sc;
2412
2413  switch (bd->state)
2414  {
2415    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2416    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2417    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2418      rtems_bdbuf_add_to_modified_list_after_access (bd);
2419      break;
2420    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2421      rtems_bdbuf_discard_buffer_after_access (bd);
2422      break;
2423    default:
2424      rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_6);
2425      break;
2426  }
2427
2428  if (rtems_bdbuf_tracer)
2429    rtems_bdbuf_show_usage ();
2430
2431  rtems_bdbuf_unlock_cache ();
2432
2433  return RTEMS_SUCCESSFUL;
2434}
2435
2436rtems_status_code
2437rtems_bdbuf_sync (rtems_bdbuf_buffer *bd)
2438{
2439  rtems_status_code sc = RTEMS_SUCCESSFUL;
2440
2441  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "sync");
2442  if (sc != RTEMS_SUCCESSFUL)
2443    return sc;
2444
2445  switch (bd->state)
2446  {
2447    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2448    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2449    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2450      rtems_bdbuf_sync_after_access (bd);
2451      break;
2452    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2453      rtems_bdbuf_discard_buffer_after_access (bd);
2454      break;
2455    default:
2456      rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_5);
2457      break;
2458  }
2459
2460  if (rtems_bdbuf_tracer)
2461    rtems_bdbuf_show_usage ();
2462
2463  rtems_bdbuf_unlock_cache ();
2464
2465  return RTEMS_SUCCESSFUL;
2466}
2467
2468rtems_status_code
2469rtems_bdbuf_syncdev (rtems_disk_device *dd)
2470{
2471  if (rtems_bdbuf_tracer)
2472    printf ("bdbuf:syncdev: %08x\n", (unsigned) dd->dev);
2473
2474  /*
2475   * Take the sync lock before locking the cache. Once we have the sync lock we
2476   * can lock the cache. If another thread has the sync lock it will cause this
2477   * thread to block until it owns the sync lock then it can own the cache. The
2478   * sync lock can only be obtained with the cache unlocked.
2479   */
2480  rtems_bdbuf_lock_sync ();
2481  rtems_bdbuf_lock_cache ();
2482
2483  /*
2484   * Set the cache to have a sync active for a specific device and let the swap
2485   * out task know the id of the requester to wake when done.
2486   *
2487   * The swap out task will negate the sync active flag when no more buffers
2488   * for the device are held on the "modified for sync" queues.
2489   */
2490  bdbuf_cache.sync_active    = true;
2491  bdbuf_cache.sync_requester = rtems_task_self ();
2492  bdbuf_cache.sync_device    = dd;
2493
2494  rtems_bdbuf_wake_swapper ();
2495  rtems_bdbuf_unlock_cache ();
2496  rtems_bdbuf_wait_for_transient_event ();
2497  rtems_bdbuf_unlock_sync ();
2498
2499  return RTEMS_SUCCESSFUL;
2500}
2501
2502/**
2503 * Swapout transfer to the driver. The driver will break this I/O into groups
2504 * of consecutive write requests is multiple consecutive buffers are required
2505 * by the driver. The cache is not locked.
2506 *
2507 * @param transfer The transfer transaction.
2508 */
2509static void
2510rtems_bdbuf_swapout_write (rtems_bdbuf_swapout_transfer* transfer)
2511{
2512  rtems_chain_node *node;
2513
2514  if (rtems_bdbuf_tracer)
2515    printf ("bdbuf:swapout transfer: %08x\n", (unsigned) transfer->dd->dev);
2516
2517  /*
2518   * If there are buffers to transfer to the media transfer them.
2519   */
2520  if (!rtems_chain_is_empty (&transfer->bds))
2521  {
2522    /*
2523     * The last block number used when the driver only supports
2524     * continuous blocks in a single request.
2525     */
2526    uint32_t last_block = 0;
2527
2528    rtems_disk_device *dd = transfer->dd;
2529    uint32_t media_blocks_per_block = dd->media_blocks_per_block;
2530    bool need_continuous_blocks =
2531      (dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_MULTISECTOR_CONT) != 0;
2532
2533    /*
2534     * Take as many buffers as configured and pass to the driver. Note, the
2535     * API to the drivers has an array of buffers and if a chain was passed
2536     * we could have just passed the list. If the driver API is updated it
2537     * should be possible to make this change with little effect in this
2538     * code. The array that is passed is broken in design and should be
2539     * removed. Merging members of a struct into the first member is
2540     * trouble waiting to happen.
2541     */
2542    transfer->write_req.status = RTEMS_RESOURCE_IN_USE;
2543    transfer->write_req.bufnum = 0;
2544
2545    while ((node = rtems_chain_get_unprotected(&transfer->bds)) != NULL)
2546    {
2547      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2548      bool                write = false;
2549
2550      /*
2551       * If the device only accepts sequential buffers and this is not the
2552       * first buffer (the first is always sequential, and the buffer is not
2553       * sequential then put the buffer back on the transfer chain and write
2554       * the committed buffers.
2555       */
2556
2557      if (rtems_bdbuf_tracer)
2558        printf ("bdbuf:swapout write: bd:%" PRIu32 ", bufnum:%" PRIu32 " mode:%s\n",
2559                bd->block, transfer->write_req.bufnum,
2560                need_continuous_blocks ? "MULTI" : "SCAT");
2561
2562      if (need_continuous_blocks && transfer->write_req.bufnum &&
2563          bd->block != last_block + media_blocks_per_block)
2564      {
2565        rtems_chain_prepend_unprotected (&transfer->bds, &bd->link);
2566        write = true;
2567      }
2568      else
2569      {
2570        rtems_blkdev_sg_buffer* buf;
2571        buf = &transfer->write_req.bufs[transfer->write_req.bufnum];
2572        transfer->write_req.bufnum++;
2573        buf->user   = bd;
2574        buf->block  = bd->block;
2575        buf->length = dd->block_size;
2576        buf->buffer = bd->buffer;
2577        last_block  = bd->block;
2578      }
2579
2580      /*
2581       * Perform the transfer if there are no more buffers, or the transfer
2582       * size has reached the configured max. value.
2583       */
2584
2585      if (rtems_chain_is_empty (&transfer->bds) ||
2586          (transfer->write_req.bufnum >= bdbuf_config.max_write_blocks))
2587        write = true;
2588
2589      if (write)
2590      {
2591        rtems_bdbuf_execute_transfer_request (dd, &transfer->write_req, false);
2592
2593        transfer->write_req.status = RTEMS_RESOURCE_IN_USE;
2594        transfer->write_req.bufnum = 0;
2595      }
2596    }
2597
2598    /*
2599     * If sync'ing and the deivce is capability of handling a sync IO control
2600     * call perform the call.
2601     */
2602    if (transfer->syncing &&
2603        (dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_SYNC))
2604    {
2605      /* int result = */ dd->ioctl (dd->phys_dev, RTEMS_BLKDEV_REQ_SYNC, NULL);
2606      /* How should the error be handled ? */
2607    }
2608  }
2609}
2610
2611/**
2612 * Process the modified list of buffers. There is a sync or modified list that
2613 * needs to be handled so we have a common function to do the work.
2614 *
2615 * @param dd_ptr Pointer to the device to handle. If BDBUF_INVALID_DEV no
2616 * device is selected so select the device of the first buffer to be written to
2617 * disk.
2618 * @param chain The modified chain to process.
2619 * @param transfer The chain to append buffers to be written too.
2620 * @param sync_active If true this is a sync operation so expire all timers.
2621 * @param update_timers If true update the timers.
2622 * @param timer_delta It update_timers is true update the timers by this
2623 *                    amount.
2624 */
2625static void
2626rtems_bdbuf_swapout_modified_processing (rtems_disk_device  **dd_ptr,
2627                                         rtems_chain_control* chain,
2628                                         rtems_chain_control* transfer,
2629                                         bool                 sync_active,
2630                                         bool                 update_timers,
2631                                         uint32_t             timer_delta)
2632{
2633  if (!rtems_chain_is_empty (chain))
2634  {
2635    rtems_chain_node* node = rtems_chain_head (chain);
2636    bool              sync_all;
2637
2638    node = node->next;
2639
2640    /*
2641     * A sync active with no valid dev means sync all.
2642     */
2643    if (sync_active && (*dd_ptr == BDBUF_INVALID_DEV))
2644      sync_all = true;
2645    else
2646      sync_all = false;
2647
2648    while (!rtems_chain_is_tail (chain, node))
2649    {
2650      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2651
2652      /*
2653       * Check if the buffer's hold timer has reached 0. If a sync is active
2654       * or someone waits for a buffer written force all the timers to 0.
2655       *
2656       * @note Lots of sync requests will skew this timer. It should be based
2657       *       on TOD to be accurate. Does it matter ?
2658       */
2659      if (sync_all || (sync_active && (*dd_ptr == bd->dd))
2660          || rtems_bdbuf_has_buffer_waiters ())
2661        bd->hold_timer = 0;
2662
2663      if (bd->hold_timer)
2664      {
2665        if (update_timers)
2666        {
2667          if (bd->hold_timer > timer_delta)
2668            bd->hold_timer -= timer_delta;
2669          else
2670            bd->hold_timer = 0;
2671        }
2672
2673        if (bd->hold_timer)
2674        {
2675          node = node->next;
2676          continue;
2677        }
2678      }
2679
2680      /*
2681       * This assumes we can set it to BDBUF_INVALID_DEV which is just an
2682       * assumption. Cannot use the transfer list being empty the sync dev
2683       * calls sets the dev to use.
2684       */
2685      if (*dd_ptr == BDBUF_INVALID_DEV)
2686        *dd_ptr = bd->dd;
2687
2688      if (bd->dd == *dd_ptr)
2689      {
2690        rtems_chain_node* next_node = node->next;
2691        rtems_chain_node* tnode = rtems_chain_tail (transfer);
2692
2693        /*
2694         * The blocks on the transfer list are sorted in block order. This
2695         * means multi-block transfers for drivers that require consecutive
2696         * blocks perform better with sorted blocks and for real disks it may
2697         * help lower head movement.
2698         */
2699
2700        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
2701
2702        rtems_chain_extract_unprotected (node);
2703
2704        tnode = tnode->previous;
2705
2706        while (node && !rtems_chain_is_head (transfer, tnode))
2707        {
2708          rtems_bdbuf_buffer* tbd = (rtems_bdbuf_buffer*) tnode;
2709
2710          if (bd->block > tbd->block)
2711          {
2712            rtems_chain_insert_unprotected (tnode, node);
2713            node = NULL;
2714          }
2715          else
2716            tnode = tnode->previous;
2717        }
2718
2719        if (node)
2720          rtems_chain_prepend_unprotected (transfer, node);
2721
2722        node = next_node;
2723      }
2724      else
2725      {
2726        node = node->next;
2727      }
2728    }
2729  }
2730}
2731
2732/**
2733 * Process the cache's modified buffers. Check the sync list first then the
2734 * modified list extracting the buffers suitable to be written to disk. We have
2735 * a device at a time. The task level loop will repeat this operation while
2736 * there are buffers to be written. If the transfer fails place the buffers
2737 * back on the modified list and try again later. The cache is unlocked while
2738 * the buffers are being written to disk.
2739 *
2740 * @param timer_delta It update_timers is true update the timers by this
2741 *                    amount.
2742 * @param update_timers If true update the timers.
2743 * @param transfer The transfer transaction data.
2744 *
2745 * @retval true Buffers where written to disk so scan again.
2746 * @retval false No buffers where written to disk.
2747 */
2748static bool
2749rtems_bdbuf_swapout_processing (unsigned long                 timer_delta,
2750                                bool                          update_timers,
2751                                rtems_bdbuf_swapout_transfer* transfer)
2752{
2753  rtems_bdbuf_swapout_worker* worker;
2754  bool                        transfered_buffers = false;
2755
2756  rtems_bdbuf_lock_cache ();
2757
2758  /*
2759   * If a sync is active do not use a worker because the current code does not
2760   * cleaning up after. We need to know the buffers have been written when
2761   * syncing to release sync lock and currently worker threads do not return to
2762   * here. We do not know the worker is the last in a sequence of sync writes
2763   * until after we have it running so we do not know to tell it to release the
2764   * lock. The simplest solution is to get the main swap out task perform all
2765   * sync operations.
2766   */
2767  if (bdbuf_cache.sync_active)
2768    worker = NULL;
2769  else
2770  {
2771    worker = (rtems_bdbuf_swapout_worker*)
2772      rtems_chain_get_unprotected (&bdbuf_cache.swapout_free_workers);
2773    if (worker)
2774      transfer = &worker->transfer;
2775  }
2776
2777  rtems_chain_initialize_empty (&transfer->bds);
2778  transfer->dd = BDBUF_INVALID_DEV;
2779  transfer->syncing = bdbuf_cache.sync_active;
2780
2781  /*
2782   * When the sync is for a device limit the sync to that device. If the sync
2783   * is for a buffer handle process the devices in the order on the sync
2784   * list. This means the dev is BDBUF_INVALID_DEV.
2785   */
2786  if (bdbuf_cache.sync_active)
2787    transfer->dd = bdbuf_cache.sync_device;
2788
2789  /*
2790   * If we have any buffers in the sync queue move them to the modified
2791   * list. The first sync buffer will select the device we use.
2792   */
2793  rtems_bdbuf_swapout_modified_processing (&transfer->dd,
2794                                           &bdbuf_cache.sync,
2795                                           &transfer->bds,
2796                                           true, false,
2797                                           timer_delta);
2798
2799  /*
2800   * Process the cache's modified list.
2801   */
2802  rtems_bdbuf_swapout_modified_processing (&transfer->dd,
2803                                           &bdbuf_cache.modified,
2804                                           &transfer->bds,
2805                                           bdbuf_cache.sync_active,
2806                                           update_timers,
2807                                           timer_delta);
2808
2809  /*
2810   * We have all the buffers that have been modified for this device so the
2811   * cache can be unlocked because the state of each buffer has been set to
2812   * TRANSFER.
2813   */
2814  rtems_bdbuf_unlock_cache ();
2815
2816  /*
2817   * If there are buffers to transfer to the media transfer them.
2818   */
2819  if (!rtems_chain_is_empty (&transfer->bds))
2820  {
2821    if (worker)
2822    {
2823      rtems_status_code sc = rtems_event_send (worker->id,
2824                                               RTEMS_BDBUF_SWAPOUT_SYNC);
2825      if (sc != RTEMS_SUCCESSFUL)
2826        rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_SO_WAKE_2);
2827    }
2828    else
2829    {
2830      rtems_bdbuf_swapout_write (transfer);
2831    }
2832
2833    transfered_buffers = true;
2834  }
2835
2836  if (bdbuf_cache.sync_active && !transfered_buffers)
2837  {
2838    rtems_id sync_requester;
2839    rtems_bdbuf_lock_cache ();
2840    sync_requester = bdbuf_cache.sync_requester;
2841    bdbuf_cache.sync_active = false;
2842    bdbuf_cache.sync_requester = 0;
2843    rtems_bdbuf_unlock_cache ();
2844    if (sync_requester)
2845      rtems_event_transient_send (sync_requester);
2846  }
2847
2848  return transfered_buffers;
2849}
2850
2851/**
2852 * The swapout worker thread body.
2853 *
2854 * @param arg A pointer to the worker thread's private data.
2855 * @return rtems_task Not used.
2856 */
2857static rtems_task
2858rtems_bdbuf_swapout_worker_task (rtems_task_argument arg)
2859{
2860  rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) arg;
2861
2862  while (worker->enabled)
2863  {
2864    rtems_bdbuf_wait_for_event (RTEMS_BDBUF_SWAPOUT_SYNC);
2865
2866    rtems_bdbuf_swapout_write (&worker->transfer);
2867
2868    rtems_bdbuf_lock_cache ();
2869
2870    rtems_chain_initialize_empty (&worker->transfer.bds);
2871    worker->transfer.dd = BDBUF_INVALID_DEV;
2872
2873    rtems_chain_append_unprotected (&bdbuf_cache.swapout_free_workers, &worker->link);
2874
2875    rtems_bdbuf_unlock_cache ();
2876  }
2877
2878  free (worker);
2879
2880  rtems_task_delete (RTEMS_SELF);
2881}
2882
2883/**
2884 * Close the swapout worker threads.
2885 */
2886static void
2887rtems_bdbuf_swapout_workers_close (void)
2888{
2889  rtems_chain_node* node;
2890
2891  rtems_bdbuf_lock_cache ();
2892
2893  node = rtems_chain_first (&bdbuf_cache.swapout_free_workers);
2894  while (!rtems_chain_is_tail (&bdbuf_cache.swapout_free_workers, node))
2895  {
2896    rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) node;
2897    worker->enabled = false;
2898    rtems_event_send (worker->id, RTEMS_BDBUF_SWAPOUT_SYNC);
2899    node = rtems_chain_next (node);
2900  }
2901
2902  rtems_bdbuf_unlock_cache ();
2903}
2904
2905/**
2906 * Body of task which takes care on flushing modified buffers to the disk.
2907 *
2908 * @param arg A pointer to the global cache data. Use the global variable and
2909 *            not this.
2910 * @return rtems_task Not used.
2911 */
2912static rtems_task
2913rtems_bdbuf_swapout_task (rtems_task_argument arg)
2914{
2915  rtems_bdbuf_swapout_transfer* transfer = (rtems_bdbuf_swapout_transfer *) arg;
2916  uint32_t                      period_in_ticks;
2917  const uint32_t                period_in_msecs = bdbuf_config.swapout_period;
2918  uint32_t                      timer_delta;
2919
2920  /*
2921   * Localise the period.
2922   */
2923  period_in_ticks = RTEMS_MICROSECONDS_TO_TICKS (period_in_msecs * 1000);
2924
2925  /*
2926   * This is temporary. Needs to be changed to use the real time clock.
2927   */
2928  timer_delta = period_in_msecs;
2929
2930  while (bdbuf_cache.swapout_enabled)
2931  {
2932    rtems_event_set   out;
2933    rtems_status_code sc;
2934
2935    /*
2936     * Only update the timers once in the processing cycle.
2937     */
2938    bool update_timers = true;
2939
2940    /*
2941     * If we write buffers to any disk perform a check again. We only write a
2942     * single device at a time and the cache may have more than one device's
2943     * buffers modified waiting to be written.
2944     */
2945    bool transfered_buffers;
2946
2947    do
2948    {
2949      transfered_buffers = false;
2950
2951      /*
2952       * Extact all the buffers we find for a specific device. The device is
2953       * the first one we find on a modified list. Process the sync queue of
2954       * buffers first.
2955       */
2956      if (rtems_bdbuf_swapout_processing (timer_delta,
2957                                          update_timers,
2958                                          transfer))
2959      {
2960        transfered_buffers = true;
2961      }
2962
2963      /*
2964       * Only update the timers once.
2965       */
2966      update_timers = false;
2967    }
2968    while (transfered_buffers);
2969
2970    sc = rtems_event_receive (RTEMS_BDBUF_SWAPOUT_SYNC,
2971                              RTEMS_EVENT_ALL | RTEMS_WAIT,
2972                              period_in_ticks,
2973                              &out);
2974
2975    if ((sc != RTEMS_SUCCESSFUL) && (sc != RTEMS_TIMEOUT))
2976      rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_SWAPOUT_RE);
2977  }
2978
2979  rtems_bdbuf_swapout_workers_close ();
2980
2981  free (transfer);
2982
2983  rtems_task_delete (RTEMS_SELF);
2984}
2985
2986static void
2987rtems_bdbuf_purge_list (rtems_chain_control *purge_list)
2988{
2989  bool wake_buffer_waiters = false;
2990  rtems_chain_node *node = NULL;
2991
2992  while ((node = rtems_chain_get_unprotected (purge_list)) != NULL)
2993  {
2994    rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
2995
2996    if (bd->waiters == 0)
2997      wake_buffer_waiters = true;
2998
2999    rtems_bdbuf_discard_buffer (bd);
3000  }
3001
3002  if (wake_buffer_waiters)
3003    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
3004}
3005
3006static void
3007rtems_bdbuf_gather_for_purge (rtems_chain_control *purge_list,
3008                              const rtems_disk_device *dd)
3009{
3010  rtems_bdbuf_buffer *stack [RTEMS_BDBUF_AVL_MAX_HEIGHT];
3011  rtems_bdbuf_buffer **prev = stack;
3012  rtems_bdbuf_buffer *cur = bdbuf_cache.tree;
3013
3014  *prev = NULL;
3015
3016  while (cur != NULL)
3017  {
3018    if (cur->dd == dd)
3019    {
3020      switch (cur->state)
3021      {
3022        case RTEMS_BDBUF_STATE_FREE:
3023        case RTEMS_BDBUF_STATE_EMPTY:
3024        case RTEMS_BDBUF_STATE_ACCESS_PURGED:
3025        case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
3026          break;
3027        case RTEMS_BDBUF_STATE_SYNC:
3028          rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
3029          /* Fall through */
3030        case RTEMS_BDBUF_STATE_MODIFIED:
3031          rtems_bdbuf_group_release (cur);
3032          /* Fall through */
3033        case RTEMS_BDBUF_STATE_CACHED:
3034          rtems_chain_extract_unprotected (&cur->link);
3035          rtems_chain_append_unprotected (purge_list, &cur->link);
3036          break;
3037        case RTEMS_BDBUF_STATE_TRANSFER:
3038          rtems_bdbuf_set_state (cur, RTEMS_BDBUF_STATE_TRANSFER_PURGED);
3039          break;
3040        case RTEMS_BDBUF_STATE_ACCESS_CACHED:
3041        case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
3042        case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
3043          rtems_bdbuf_set_state (cur, RTEMS_BDBUF_STATE_ACCESS_PURGED);
3044          break;
3045        default:
3046          rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_STATE_11);
3047      }
3048    }
3049
3050    if (cur->avl.left != NULL)
3051    {
3052      /* Left */
3053      ++prev;
3054      *prev = cur;
3055      cur = cur->avl.left;
3056    }
3057    else if (cur->avl.right != NULL)
3058    {
3059      /* Right */
3060      ++prev;
3061      *prev = cur;
3062      cur = cur->avl.right;
3063    }
3064    else
3065    {
3066      while (*prev != NULL
3067             && (cur == (*prev)->avl.right || (*prev)->avl.right == NULL))
3068      {
3069        /* Up */
3070        cur = *prev;
3071        --prev;
3072      }
3073      if (*prev != NULL)
3074        /* Right */
3075        cur = (*prev)->avl.right;
3076      else
3077        /* Finished */
3078        cur = NULL;
3079    }
3080  }
3081}
3082
3083static void
3084rtems_bdbuf_do_purge_dev (rtems_disk_device *dd)
3085{
3086  rtems_chain_control purge_list;
3087
3088  rtems_chain_initialize_empty (&purge_list);
3089  rtems_bdbuf_read_ahead_reset (dd);
3090  rtems_bdbuf_gather_for_purge (&purge_list, dd);
3091  rtems_bdbuf_purge_list (&purge_list);
3092}
3093
3094void
3095rtems_bdbuf_purge_dev (rtems_disk_device *dd)
3096{
3097  rtems_bdbuf_lock_cache ();
3098  rtems_bdbuf_do_purge_dev (dd);
3099  rtems_bdbuf_unlock_cache ();
3100}
3101
3102rtems_status_code
3103rtems_bdbuf_set_block_size (rtems_disk_device *dd,
3104                            uint32_t           block_size,
3105                            bool               sync)
3106{
3107  rtems_status_code sc = RTEMS_SUCCESSFUL;
3108
3109  /*
3110   * We do not care about the synchronization status since we will purge the
3111   * device later.
3112   */
3113  if (sync)
3114    rtems_bdbuf_syncdev (dd);
3115
3116  rtems_bdbuf_lock_cache ();
3117
3118  if (block_size > 0)
3119  {
3120    size_t bds_per_group = rtems_bdbuf_bds_per_group (block_size);
3121
3122    if (bds_per_group != 0)
3123    {
3124      int block_to_media_block_shift = 0;
3125      uint32_t media_blocks_per_block = block_size / dd->media_block_size;
3126      uint32_t one = 1;
3127
3128      while ((one << block_to_media_block_shift) < media_blocks_per_block)
3129      {
3130        ++block_to_media_block_shift;
3131      }
3132
3133      if ((dd->media_block_size << block_to_media_block_shift) != block_size)
3134        block_to_media_block_shift = -1;
3135
3136      dd->block_size = block_size;
3137      dd->block_count = dd->size / media_blocks_per_block;
3138      dd->media_blocks_per_block = media_blocks_per_block;
3139      dd->block_to_media_block_shift = block_to_media_block_shift;
3140      dd->bds_per_group = bds_per_group;
3141
3142      rtems_bdbuf_do_purge_dev (dd);
3143    }
3144    else
3145    {
3146      sc = RTEMS_INVALID_NUMBER;
3147    }
3148  }
3149  else
3150  {
3151    sc = RTEMS_INVALID_NUMBER;
3152  }
3153
3154  rtems_bdbuf_unlock_cache ();
3155
3156  return sc;
3157}
3158
3159static rtems_task
3160rtems_bdbuf_read_ahead_task (rtems_task_argument arg)
3161{
3162  rtems_chain_control *chain = &bdbuf_cache.read_ahead_chain;
3163
3164  while (bdbuf_cache.read_ahead_enabled)
3165  {
3166    rtems_chain_node *node;
3167
3168    rtems_bdbuf_wait_for_event (RTEMS_BDBUF_READ_AHEAD_WAKE_UP);
3169    rtems_bdbuf_lock_cache ();
3170
3171    while ((node = rtems_chain_get_unprotected (chain)) != NULL)
3172    {
3173      rtems_disk_device *dd =
3174        RTEMS_CONTAINER_OF (node, rtems_disk_device, read_ahead.node);
3175      rtems_blkdev_bnum block = dd->read_ahead.next;
3176      rtems_blkdev_bnum media_block = 0;
3177      rtems_status_code sc =
3178        rtems_bdbuf_get_media_block (dd, block, &media_block);
3179
3180      rtems_chain_set_off_chain (&dd->read_ahead.node);
3181
3182      if (sc == RTEMS_SUCCESSFUL)
3183      {
3184        rtems_bdbuf_buffer *bd =
3185          rtems_bdbuf_get_buffer_for_read_ahead (dd, media_block);
3186
3187        if (bd != NULL)
3188        {
3189          uint32_t transfer_count = dd->block_count - block;
3190          uint32_t max_transfer_count = bdbuf_config.max_read_ahead_blocks;
3191
3192          if (transfer_count >= max_transfer_count)
3193          {
3194            transfer_count = max_transfer_count;
3195            dd->read_ahead.trigger = block + transfer_count / 2;
3196            dd->read_ahead.next = block + transfer_count;
3197          }
3198          else
3199          {
3200            dd->read_ahead.trigger = RTEMS_DISK_READ_AHEAD_NO_TRIGGER;
3201          }
3202
3203          ++dd->stats.read_ahead_transfers;
3204          rtems_bdbuf_execute_read_request (dd, bd, transfer_count);
3205        }
3206      }
3207      else
3208      {
3209        dd->read_ahead.trigger = RTEMS_DISK_READ_AHEAD_NO_TRIGGER;
3210      }
3211    }
3212
3213    rtems_bdbuf_unlock_cache ();
3214  }
3215
3216  rtems_task_delete (RTEMS_SELF);
3217}
3218
3219void rtems_bdbuf_get_device_stats (const rtems_disk_device *dd,
3220                                   rtems_blkdev_stats      *stats)
3221{
3222  rtems_bdbuf_lock_cache ();
3223  *stats = dd->stats;
3224  rtems_bdbuf_unlock_cache ();
3225}
3226
3227void rtems_bdbuf_reset_device_stats (rtems_disk_device *dd)
3228{
3229  rtems_bdbuf_lock_cache ();
3230  memset (&dd->stats, 0, sizeof(dd->stats));
3231  rtems_bdbuf_unlock_cache ();
3232}
Note: See TracBrowser for help on using the repository browser.