source: rtems/cpukit/libblock/src/bdbuf.c @ 40dcafa

4.115
Last change on this file since 40dcafa was 40dcafa, checked in by Sebastian Huber <sebastian.huber@…>, on 08/02/14 at 14:22:31

Add and use RTEMS_CONTAINER_OF()

  • Property mode set to 100644
File size: 85.8 KB
Line 
1/**
2 * @file
3 *
4 * @ingroup rtems_bdbuf
5 *
6 * Block device buffer management.
7 */
8
9/*
10 * Disk I/O buffering
11 * Buffer managment
12 *
13 * Copyright (C) 2001 OKTET Ltd., St.-Peterburg, Russia
14 * Author: Andrey G. Ivanov <Andrey.Ivanov@oktet.ru>
15 *         Victor V. Vengerov <vvv@oktet.ru>
16 *         Alexander Kukuta <kam@oktet.ru>
17 *
18 * Copyright (C) 2008,2009 Chris Johns <chrisj@rtems.org>
19 *    Rewritten to remove score mutex access. Fixes many performance
20 *    issues.
21 *
22 * Copyright (c) 2009-2012 embedded brains GmbH.
23 */
24
25/**
26 * Set to 1 to enable debug tracing.
27 */
28#define RTEMS_BDBUF_TRACE 0
29
30#if HAVE_CONFIG_H
31#include "config.h"
32#endif
33#include <limits.h>
34#include <errno.h>
35#include <stdio.h>
36#include <string.h>
37#include <inttypes.h>
38#include <pthread.h>
39
40#include <rtems.h>
41#include <rtems/error.h>
42#include <rtems/malloc.h>
43
44#include "rtems/bdbuf.h"
45
46#define BDBUF_INVALID_DEV NULL
47
48/*
49 * Simpler label for this file.
50 */
51#define bdbuf_config rtems_bdbuf_configuration
52
53/**
54 * A swapout transfer transaction data. This data is passed to a worked thread
55 * to handle the write phase of the transfer.
56 */
57typedef struct rtems_bdbuf_swapout_transfer
58{
59  rtems_chain_control   bds;         /**< The transfer list of BDs. */
60  rtems_disk_device    *dd;          /**< The device the transfer is for. */
61  bool                  syncing;     /**< The data is a sync'ing. */
62  rtems_blkdev_request  write_req;   /**< The write request. */
63} rtems_bdbuf_swapout_transfer;
64
65/**
66 * Swapout worker thread. These are available to take processing from the
67 * main swapout thread and handle the I/O operation.
68 */
69typedef struct rtems_bdbuf_swapout_worker
70{
71  rtems_chain_node             link;     /**< The threads sit on a chain when
72                                          * idle. */
73  rtems_id                     id;       /**< The id of the task so we can wake
74                                          * it. */
75  bool                         enabled;  /**< The worker is enabled. */
76  rtems_bdbuf_swapout_transfer transfer; /**< The transfer data for this
77                                          * thread. */
78} rtems_bdbuf_swapout_worker;
79
80#if defined(RTEMS_BDBUF_USE_PTHREAD)
81typedef pthread_mutex_t rtems_bdbuf_lock_type;
82#else
83typedef rtems_id rtems_bdbuf_lock_type;
84#endif
85
86/**
87 * Buffer waiters synchronization.
88 */
89typedef struct rtems_bdbuf_waiters {
90  unsigned       count;
91#if defined(RTEMS_BDBUF_USE_PTHREAD)
92  pthread_cond_t cond_var;
93#else
94  rtems_id       sema;
95#endif
96} rtems_bdbuf_waiters;
97
98/**
99 * The BD buffer cache.
100 */
101typedef struct rtems_bdbuf_cache
102{
103  rtems_id            swapout;           /**< Swapout task ID */
104  bool                swapout_enabled;   /**< Swapout is only running if
105                                          * enabled. Set to false to kill the
106                                          * swap out task. It deletes itself. */
107  rtems_chain_control swapout_free_workers; /**< The work threads for the swapout
108                                             * task. */
109
110  rtems_bdbuf_buffer* bds;               /**< Pointer to table of buffer
111                                          * descriptors. */
112  void*               buffers;           /**< The buffer's memory. */
113  size_t              buffer_min_count;  /**< Number of minimum size buffers
114                                          * that fit the buffer memory. */
115  size_t              max_bds_per_group; /**< The number of BDs of minimum
116                                          * buffer size that fit in a group. */
117  uint32_t            flags;             /**< Configuration flags. */
118
119  rtems_bdbuf_lock_type lock;            /**< The cache lock. It locks all
120                                          * cache data, BD and lists. */
121  rtems_bdbuf_lock_type sync_lock;       /**< Sync calls block writes. */
122  bool                sync_active;       /**< True if a sync is active. */
123  rtems_id            sync_requester;    /**< The sync requester. */
124  rtems_disk_device  *sync_device;       /**< The device to sync and
125                                          * BDBUF_INVALID_DEV not a device
126                                          * sync. */
127
128  rtems_bdbuf_buffer* tree;              /**< Buffer descriptor lookup AVL tree
129                                          * root. There is only one. */
130  rtems_chain_control lru;               /**< Least recently used list */
131  rtems_chain_control modified;          /**< Modified buffers list */
132  rtems_chain_control sync;              /**< Buffers to sync list */
133
134  rtems_bdbuf_waiters access_waiters;    /**< Wait for a buffer in
135                                          * ACCESS_CACHED, ACCESS_MODIFIED or
136                                          * ACCESS_EMPTY
137                                          * state. */
138  rtems_bdbuf_waiters transfer_waiters;  /**< Wait for a buffer in TRANSFER
139                                          * state. */
140  rtems_bdbuf_waiters buffer_waiters;    /**< Wait for a buffer and no one is
141                                          * available. */
142
143  rtems_bdbuf_swapout_transfer *swapout_transfer;
144  rtems_bdbuf_swapout_worker *swapout_workers;
145
146  size_t              group_count;       /**< The number of groups. */
147  rtems_bdbuf_group*  groups;            /**< The groups. */
148  rtems_id            read_ahead_task;   /**< Read-ahead task */
149  rtems_chain_control read_ahead_chain;  /**< Read-ahead request chain */
150  bool                read_ahead_enabled; /**< Read-ahead enabled */
151  rtems_status_code   init_status;       /**< The initialization status */
152} rtems_bdbuf_cache;
153
154typedef enum {
155  RTEMS_BDBUF_FATAL_CACHE_LOCK,
156  RTEMS_BDBUF_FATAL_CACHE_UNLOCK,
157  RTEMS_BDBUF_FATAL_CACHE_WAIT_2,
158  RTEMS_BDBUF_FATAL_CACHE_WAIT_TO,
159  RTEMS_BDBUF_FATAL_CACHE_WAKE,
160  RTEMS_BDBUF_FATAL_PREEMPT_DIS,
161  RTEMS_BDBUF_FATAL_PREEMPT_RST,
162  RTEMS_BDBUF_FATAL_RA_WAKE_UP,
163  RTEMS_BDBUF_FATAL_RECYCLE,
164  RTEMS_BDBUF_FATAL_SO_WAKE_1,
165  RTEMS_BDBUF_FATAL_SO_WAKE_2,
166  RTEMS_BDBUF_FATAL_STATE_0,
167  RTEMS_BDBUF_FATAL_STATE_2,
168  RTEMS_BDBUF_FATAL_STATE_4,
169  RTEMS_BDBUF_FATAL_STATE_5,
170  RTEMS_BDBUF_FATAL_STATE_6,
171  RTEMS_BDBUF_FATAL_STATE_7,
172  RTEMS_BDBUF_FATAL_STATE_8,
173  RTEMS_BDBUF_FATAL_STATE_9,
174  RTEMS_BDBUF_FATAL_STATE_10,
175  RTEMS_BDBUF_FATAL_STATE_11,
176  RTEMS_BDBUF_FATAL_SWAPOUT_RE,
177  RTEMS_BDBUF_FATAL_SYNC_LOCK,
178  RTEMS_BDBUF_FATAL_SYNC_UNLOCK,
179  RTEMS_BDBUF_FATAL_TREE_RM,
180  RTEMS_BDBUF_FATAL_WAIT_EVNT,
181  RTEMS_BDBUF_FATAL_WAIT_TRANS_EVNT,
182  RTEMS_BDBUF_FATAL_ONCE,
183  RTEMS_BDBUF_FATAL_MTX_ATTR_INIT,
184  RTEMS_BDBUF_FATAL_MTX_ATTR_SETPROTO,
185  RTEMS_BDBUF_FATAL_CV_WAIT,
186  RTEMS_BDBUF_FATAL_CV_BROADCAST
187} rtems_bdbuf_fatal_code;
188
189/**
190 * The events used in this code. These should be system events rather than
191 * application events.
192 */
193#define RTEMS_BDBUF_SWAPOUT_SYNC   RTEMS_EVENT_2
194#define RTEMS_BDBUF_READ_AHEAD_WAKE_UP RTEMS_EVENT_1
195
196/**
197 * Lock semaphore attributes. This is used for locking type mutexes.
198 *
199 * @warning Priority inheritance is on.
200 */
201#define RTEMS_BDBUF_CACHE_LOCK_ATTRIBS \
202  (RTEMS_PRIORITY | RTEMS_BINARY_SEMAPHORE | \
203   RTEMS_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
204
205/**
206 * Waiter semaphore attributes.
207 *
208 * @warning Do not configure as inherit priority. If a driver is in the driver
209 *          initialisation table this locked semaphore will have the IDLE task
210 *          as the holder and a blocking task will raise the priority of the
211 *          IDLE task which can cause unsual side effects.
212 */
213#define RTEMS_BDBUF_CACHE_WAITER_ATTRIBS \
214  (RTEMS_PRIORITY | RTEMS_SIMPLE_BINARY_SEMAPHORE | \
215   RTEMS_NO_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
216
217/**
218 * Waiter timeout. Set to non-zero to find some info on a waiter that is
219 * waiting too long.
220 */
221#define RTEMS_BDBUF_WAIT_TIMEOUT RTEMS_NO_TIMEOUT
222#if !defined (RTEMS_BDBUF_WAIT_TIMEOUT)
223#define RTEMS_BDBUF_WAIT_TIMEOUT \
224  (RTEMS_MICROSECONDS_TO_TICKS (20000000))
225#endif
226
227static rtems_task rtems_bdbuf_swapout_task(rtems_task_argument arg);
228
229static rtems_task rtems_bdbuf_read_ahead_task(rtems_task_argument arg);
230
231/**
232 * The Buffer Descriptor cache.
233 */
234static rtems_bdbuf_cache bdbuf_cache;
235
236static pthread_once_t rtems_bdbuf_once_state = PTHREAD_ONCE_INIT;
237
238#if RTEMS_BDBUF_TRACE
239/**
240 * If true output the trace message.
241 */
242bool rtems_bdbuf_tracer;
243
244/**
245 * Return the number of items on the list.
246 *
247 * @param list The chain control.
248 * @return uint32_t The number of items on the list.
249 */
250uint32_t
251rtems_bdbuf_list_count (rtems_chain_control* list)
252{
253  rtems_chain_node* node = rtems_chain_first (list);
254  uint32_t          count = 0;
255  while (!rtems_chain_is_tail (list, node))
256  {
257    count++;
258    node = rtems_chain_next (node);
259  }
260  return count;
261}
262
263/**
264 * Show the usage for the bdbuf cache.
265 */
266void
267rtems_bdbuf_show_usage (void)
268{
269  uint32_t group;
270  uint32_t total = 0;
271  uint32_t val;
272
273  for (group = 0; group < bdbuf_cache.group_count; group++)
274    total += bdbuf_cache.groups[group].users;
275  printf ("bdbuf:group users=%lu", total);
276  val = rtems_bdbuf_list_count (&bdbuf_cache.lru);
277  printf (", lru=%lu", val);
278  total = val;
279  val = rtems_bdbuf_list_count (&bdbuf_cache.modified);
280  printf (", mod=%lu", val);
281  total += val;
282  val = rtems_bdbuf_list_count (&bdbuf_cache.sync);
283  printf (", sync=%lu", val);
284  total += val;
285  printf (", total=%lu\n", total);
286}
287
288/**
289 * Show the users for a group of a bd.
290 *
291 * @param where A label to show the context of output.
292 * @param bd The bd to show the users of.
293 */
294void
295rtems_bdbuf_show_users (const char* where, rtems_bdbuf_buffer* bd)
296{
297  const char* states[] =
298    { "FR", "EM", "CH", "AC", "AM", "AE", "AP", "MD", "SY", "TR", "TP" };
299
300  printf ("bdbuf:users: %15s: [%" PRIu32 " (%s)] %td:%td = %" PRIu32 " %s\n",
301          where,
302          bd->block, states[bd->state],
303          bd->group - bdbuf_cache.groups,
304          bd - bdbuf_cache.bds,
305          bd->group->users,
306          bd->group->users > 8 ? "<<<<<<<" : "");
307}
308#else
309#define rtems_bdbuf_tracer (0)
310#define rtems_bdbuf_show_usage() ((void) 0)
311#define rtems_bdbuf_show_users(_w, _b) ((void) 0)
312#endif
313
314/**
315 * The default maximum height of 32 allows for AVL trees having between
316 * 5,704,880 and 4,294,967,295 nodes, depending on order of insertion.  You may
317 * change this compile-time constant as you wish.
318 */
319#ifndef RTEMS_BDBUF_AVL_MAX_HEIGHT
320#define RTEMS_BDBUF_AVL_MAX_HEIGHT (32)
321#endif
322
323static void
324rtems_bdbuf_fatal (rtems_fatal_code error)
325{
326  rtems_fatal (RTEMS_FATAL_SOURCE_BDBUF, error);
327}
328
329static void
330rtems_bdbuf_fatal_with_state (rtems_bdbuf_buf_state state,
331                              rtems_bdbuf_fatal_code error)
332{
333  rtems_bdbuf_fatal ((((uint32_t) state) << 16) | error);
334}
335
336static rtems_status_code
337rtems_bdbuf_lock_create (rtems_name name, rtems_bdbuf_lock_type *lock)
338{
339#if defined(RTEMS_BDBUF_USE_PTHREAD)
340  int                 eno;
341  pthread_mutexattr_t attr;
342
343  (void) name;
344
345  eno = pthread_mutexattr_init (&attr);
346  if (eno != 0)
347    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_MTX_ATTR_INIT);
348
349  eno = pthread_mutexattr_setprotocol (&attr, PTHREAD_PRIO_INHERIT);
350  if (eno != 0)
351    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_MTX_ATTR_SETPROTO);
352
353  eno = pthread_mutex_init (lock, &attr);
354
355  pthread_mutexattr_destroy (&attr);
356
357  if (eno != 0)
358    return RTEMS_UNSATISFIED;
359
360  return RTEMS_SUCCESSFUL;
361#else
362  return rtems_semaphore_create(
363    name,
364    1,
365    RTEMS_BDBUF_CACHE_LOCK_ATTRIBS,
366    0,
367    lock
368  );
369#endif
370}
371
372static void
373rtems_bdbuf_lock_delete (rtems_bdbuf_lock_type *lock)
374{
375#if defined(RTEMS_BDBUF_USE_PTHREAD)
376  pthread_mutex_destroy (lock);
377#else
378  rtems_semaphore_delete (*lock);
379#endif
380}
381
382static rtems_status_code
383rtems_bdbuf_waiter_create (rtems_name name, rtems_bdbuf_waiters *waiter)
384{
385#if defined(RTEMS_BDBUF_USE_PTHREAD)
386  int eno = pthread_cond_init (&waiter->cond_var, NULL);
387  if (eno != 0)
388    return RTEMS_UNSATISFIED;
389
390  return RTEMS_SUCCESSFUL;
391#else
392  return rtems_semaphore_create(
393    name,
394    0,
395    RTEMS_BDBUF_CACHE_WAITER_ATTRIBS,
396    0,
397    &waiter->sema
398  );
399#endif
400}
401
402static void
403rtems_bdbuf_waiter_delete (rtems_bdbuf_waiters *waiter)
404{
405#if defined(RTEMS_BDBUF_USE_PTHREAD)
406  pthread_cond_destroy (&waiter->cond_var);
407#else
408  rtems_semaphore_delete (waiter->sema);
409#endif
410}
411
412/**
413 * Searches for the node with specified dd/block.
414 *
415 * @param root pointer to the root node of the AVL-Tree
416 * @param dd disk device search key
417 * @param block block search key
418 * @retval NULL node with the specified dd/block is not found
419 * @return pointer to the node with specified dd/block
420 */
421static rtems_bdbuf_buffer *
422rtems_bdbuf_avl_search (rtems_bdbuf_buffer** root,
423                        const rtems_disk_device *dd,
424                        rtems_blkdev_bnum    block)
425{
426  rtems_bdbuf_buffer* p = *root;
427
428  while ((p != NULL) && ((p->dd != dd) || (p->block != block)))
429  {
430    if (((uintptr_t) p->dd < (uintptr_t) dd)
431        || ((p->dd == dd) && (p->block < block)))
432    {
433      p = p->avl.right;
434    }
435    else
436    {
437      p = p->avl.left;
438    }
439  }
440
441  return p;
442}
443
444/**
445 * Inserts the specified node to the AVl-Tree.
446 *
447 * @param root pointer to the root node of the AVL-Tree
448 * @param node Pointer to the node to add.
449 * @retval 0 The node added successfully
450 * @retval -1 An error occured
451 */
452static int
453rtems_bdbuf_avl_insert(rtems_bdbuf_buffer** root,
454                       rtems_bdbuf_buffer*  node)
455{
456  const rtems_disk_device *dd = node->dd;
457  rtems_blkdev_bnum block = node->block;
458
459  rtems_bdbuf_buffer*  p = *root;
460  rtems_bdbuf_buffer*  q;
461  rtems_bdbuf_buffer*  p1;
462  rtems_bdbuf_buffer*  p2;
463  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
464  rtems_bdbuf_buffer** buf_prev = buf_stack;
465
466  bool modified = false;
467
468  if (p == NULL)
469  {
470    *root = node;
471    node->avl.left = NULL;
472    node->avl.right = NULL;
473    node->avl.bal = 0;
474    return 0;
475  }
476
477  while (p != NULL)
478  {
479    *buf_prev++ = p;
480
481    if (((uintptr_t) p->dd < (uintptr_t) dd)
482        || ((p->dd == dd) && (p->block < block)))
483    {
484      p->avl.cache = 1;
485      q = p->avl.right;
486      if (q == NULL)
487      {
488        q = node;
489        p->avl.right = q = node;
490        break;
491      }
492    }
493    else if ((p->dd != dd) || (p->block != block))
494    {
495      p->avl.cache = -1;
496      q = p->avl.left;
497      if (q == NULL)
498      {
499        q = node;
500        p->avl.left = q;
501        break;
502      }
503    }
504    else
505    {
506      return -1;
507    }
508
509    p = q;
510  }
511
512  q->avl.left = q->avl.right = NULL;
513  q->avl.bal = 0;
514  modified = true;
515  buf_prev--;
516
517  while (modified)
518  {
519    if (p->avl.cache == -1)
520    {
521      switch (p->avl.bal)
522      {
523        case 1:
524          p->avl.bal = 0;
525          modified = false;
526          break;
527
528        case 0:
529          p->avl.bal = -1;
530          break;
531
532        case -1:
533          p1 = p->avl.left;
534          if (p1->avl.bal == -1) /* simple LL-turn */
535          {
536            p->avl.left = p1->avl.right;
537            p1->avl.right = p;
538            p->avl.bal = 0;
539            p = p1;
540          }
541          else /* double LR-turn */
542          {
543            p2 = p1->avl.right;
544            p1->avl.right = p2->avl.left;
545            p2->avl.left = p1;
546            p->avl.left = p2->avl.right;
547            p2->avl.right = p;
548            if (p2->avl.bal == -1) p->avl.bal = +1; else p->avl.bal = 0;
549            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
550            p = p2;
551          }
552          p->avl.bal = 0;
553          modified = false;
554          break;
555
556        default:
557          break;
558      }
559    }
560    else
561    {
562      switch (p->avl.bal)
563      {
564        case -1:
565          p->avl.bal = 0;
566          modified = false;
567          break;
568
569        case 0:
570          p->avl.bal = 1;
571          break;
572
573        case 1:
574          p1 = p->avl.right;
575          if (p1->avl.bal == 1) /* simple RR-turn */
576          {
577            p->avl.right = p1->avl.left;
578            p1->avl.left = p;
579            p->avl.bal = 0;
580            p = p1;
581          }
582          else /* double RL-turn */
583          {
584            p2 = p1->avl.left;
585            p1->avl.left = p2->avl.right;
586            p2->avl.right = p1;
587            p->avl.right = p2->avl.left;
588            p2->avl.left = p;
589            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
590            if (p2->avl.bal == -1) p1->avl.bal = +1; else p1->avl.bal = 0;
591            p = p2;
592          }
593          p->avl.bal = 0;
594          modified = false;
595          break;
596
597        default:
598          break;
599      }
600    }
601    q = p;
602    if (buf_prev > buf_stack)
603    {
604      p = *--buf_prev;
605
606      if (p->avl.cache == -1)
607      {
608        p->avl.left = q;
609      }
610      else
611      {
612        p->avl.right = q;
613      }
614    }
615    else
616    {
617      *root = p;
618      break;
619    }
620  };
621
622  return 0;
623}
624
625
626/**
627 * Removes the node from the tree.
628 *
629 * @param root Pointer to pointer to the root node
630 * @param node Pointer to the node to remove
631 * @retval 0 Item removed
632 * @retval -1 No such item found
633 */
634static int
635rtems_bdbuf_avl_remove(rtems_bdbuf_buffer**      root,
636                       const rtems_bdbuf_buffer* node)
637{
638  const rtems_disk_device *dd = node->dd;
639  rtems_blkdev_bnum block = node->block;
640
641  rtems_bdbuf_buffer*  p = *root;
642  rtems_bdbuf_buffer*  q;
643  rtems_bdbuf_buffer*  r;
644  rtems_bdbuf_buffer*  s;
645  rtems_bdbuf_buffer*  p1;
646  rtems_bdbuf_buffer*  p2;
647  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
648  rtems_bdbuf_buffer** buf_prev = buf_stack;
649
650  bool modified = false;
651
652  memset (buf_stack, 0, sizeof(buf_stack));
653
654  while (p != NULL)
655  {
656    *buf_prev++ = p;
657
658    if (((uintptr_t) p->dd < (uintptr_t) dd)
659        || ((p->dd == dd) && (p->block < block)))
660    {
661      p->avl.cache = 1;
662      p = p->avl.right;
663    }
664    else if ((p->dd != dd) || (p->block != block))
665    {
666      p->avl.cache = -1;
667      p = p->avl.left;
668    }
669    else
670    {
671      /* node found */
672      break;
673    }
674  }
675
676  if (p == NULL)
677  {
678    /* there is no such node */
679    return -1;
680  }
681
682  q = p;
683
684  buf_prev--;
685  if (buf_prev > buf_stack)
686  {
687    p = *(buf_prev - 1);
688  }
689  else
690  {
691    p = NULL;
692  }
693
694  /* at this moment q - is a node to delete, p is q's parent */
695  if (q->avl.right == NULL)
696  {
697    r = q->avl.left;
698    if (r != NULL)
699    {
700      r->avl.bal = 0;
701    }
702    q = r;
703  }
704  else
705  {
706    rtems_bdbuf_buffer **t;
707
708    r = q->avl.right;
709
710    if (r->avl.left == NULL)
711    {
712      r->avl.left = q->avl.left;
713      r->avl.bal = q->avl.bal;
714      r->avl.cache = 1;
715      *buf_prev++ = q = r;
716    }
717    else
718    {
719      t = buf_prev++;
720      s = r;
721
722      while (s->avl.left != NULL)
723      {
724        *buf_prev++ = r = s;
725        s = r->avl.left;
726        r->avl.cache = -1;
727      }
728
729      s->avl.left = q->avl.left;
730      r->avl.left = s->avl.right;
731      s->avl.right = q->avl.right;
732      s->avl.bal = q->avl.bal;
733      s->avl.cache = 1;
734
735      *t = q = s;
736    }
737  }
738
739  if (p != NULL)
740  {
741    if (p->avl.cache == -1)
742    {
743      p->avl.left = q;
744    }
745    else
746    {
747      p->avl.right = q;
748    }
749  }
750  else
751  {
752    *root = q;
753  }
754
755  modified = true;
756
757  while (modified)
758  {
759    if (buf_prev > buf_stack)
760    {
761      p = *--buf_prev;
762    }
763    else
764    {
765      break;
766    }
767
768    if (p->avl.cache == -1)
769    {
770      /* rebalance left branch */
771      switch (p->avl.bal)
772      {
773        case -1:
774          p->avl.bal = 0;
775          break;
776        case  0:
777          p->avl.bal = 1;
778          modified = false;
779          break;
780
781        case +1:
782          p1 = p->avl.right;
783
784          if (p1->avl.bal >= 0) /* simple RR-turn */
785          {
786            p->avl.right = p1->avl.left;
787            p1->avl.left = p;
788
789            if (p1->avl.bal == 0)
790            {
791              p1->avl.bal = -1;
792              modified = false;
793            }
794            else
795            {
796              p->avl.bal = 0;
797              p1->avl.bal = 0;
798            }
799            p = p1;
800          }
801          else /* double RL-turn */
802          {
803            p2 = p1->avl.left;
804
805            p1->avl.left = p2->avl.right;
806            p2->avl.right = p1;
807            p->avl.right = p2->avl.left;
808            p2->avl.left = p;
809
810            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
811            if (p2->avl.bal == -1) p1->avl.bal = 1; else p1->avl.bal = 0;
812
813            p = p2;
814            p2->avl.bal = 0;
815          }
816          break;
817
818        default:
819          break;
820      }
821    }
822    else
823    {
824      /* rebalance right branch */
825      switch (p->avl.bal)
826      {
827        case +1:
828          p->avl.bal = 0;
829          break;
830
831        case  0:
832          p->avl.bal = -1;
833          modified = false;
834          break;
835
836        case -1:
837          p1 = p->avl.left;
838
839          if (p1->avl.bal <= 0) /* simple LL-turn */
840          {
841            p->avl.left = p1->avl.right;
842            p1->avl.right = p;
843            if (p1->avl.bal == 0)
844            {
845              p1->avl.bal = 1;
846              modified = false;
847            }
848            else
849            {
850              p->avl.bal = 0;
851              p1->avl.bal = 0;
852            }
853            p = p1;
854          }
855          else /* double LR-turn */
856          {
857            p2 = p1->avl.right;
858
859            p1->avl.right = p2->avl.left;
860            p2->avl.left = p1;
861            p->avl.left = p2->avl.right;
862            p2->avl.right = p;
863
864            if (p2->avl.bal == -1) p->avl.bal = 1; else p->avl.bal = 0;
865            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
866
867            p = p2;
868            p2->avl.bal = 0;
869          }
870          break;
871
872        default:
873          break;
874      }
875    }
876
877    if (buf_prev > buf_stack)
878    {
879      q = *(buf_prev - 1);
880
881      if (q->avl.cache == -1)
882      {
883        q->avl.left = p;
884      }
885      else
886      {
887        q->avl.right = p;
888      }
889    }
890    else
891    {
892      *root = p;
893      break;
894    }
895
896  }
897
898  return 0;
899}
900
901static void
902rtems_bdbuf_set_state (rtems_bdbuf_buffer *bd, rtems_bdbuf_buf_state state)
903{
904  bd->state = state;
905}
906
907static rtems_blkdev_bnum
908rtems_bdbuf_media_block (const rtems_disk_device *dd, rtems_blkdev_bnum block)
909{
910  if (dd->block_to_media_block_shift >= 0)
911    return block << dd->block_to_media_block_shift;
912  else
913    /*
914     * Change the block number for the block size to the block number for the media
915     * block size. We have to use 64bit maths. There is no short cut here.
916     */
917    return (rtems_blkdev_bnum)
918      ((((uint64_t) block) * dd->block_size) / dd->media_block_size);
919}
920
921/**
922 * Lock the mutex. A single task can nest calls.
923 *
924 * @param lock The mutex to lock.
925 * @param fatal_error_code The error code if the call fails.
926 */
927static void
928rtems_bdbuf_lock (rtems_bdbuf_lock_type *lock, uint32_t fatal_error_code)
929{
930#if defined(RTEMS_BDBUF_USE_PTHREAD)
931  int eno = pthread_mutex_lock (lock);
932  if (eno != 0)
933    rtems_bdbuf_fatal (fatal_error_code);
934#else
935  rtems_status_code sc = rtems_semaphore_obtain (*lock,
936                                                 RTEMS_WAIT,
937                                                 RTEMS_NO_TIMEOUT);
938  if (sc != RTEMS_SUCCESSFUL)
939    rtems_bdbuf_fatal (fatal_error_code);
940#endif
941}
942
943/**
944 * Unlock the mutex.
945 *
946 * @param lock The mutex to unlock.
947 * @param fatal_error_code The error code if the call fails.
948 */
949static void
950rtems_bdbuf_unlock (rtems_bdbuf_lock_type *lock, uint32_t fatal_error_code)
951{
952#if defined(RTEMS_BDBUF_USE_PTHREAD)
953  int eno = pthread_mutex_unlock (lock);
954  if (eno != 0)
955    rtems_bdbuf_fatal (fatal_error_code);
956#else
957  rtems_status_code sc = rtems_semaphore_release (*lock);
958  if (sc != RTEMS_SUCCESSFUL)
959    rtems_bdbuf_fatal (fatal_error_code);
960#endif
961}
962
963/**
964 * Lock the cache. A single task can nest calls.
965 */
966static void
967rtems_bdbuf_lock_cache (void)
968{
969  rtems_bdbuf_lock (&bdbuf_cache.lock, RTEMS_BDBUF_FATAL_CACHE_LOCK);
970}
971
972/**
973 * Unlock the cache.
974 */
975static void
976rtems_bdbuf_unlock_cache (void)
977{
978  rtems_bdbuf_unlock (&bdbuf_cache.lock, RTEMS_BDBUF_FATAL_CACHE_UNLOCK);
979}
980
981/**
982 * Lock the cache's sync. A single task can nest calls.
983 */
984static void
985rtems_bdbuf_lock_sync (void)
986{
987  rtems_bdbuf_lock (&bdbuf_cache.sync_lock, RTEMS_BDBUF_FATAL_SYNC_LOCK);
988}
989
990/**
991 * Unlock the cache's sync lock. Any blocked writers are woken.
992 */
993static void
994rtems_bdbuf_unlock_sync (void)
995{
996  rtems_bdbuf_unlock (&bdbuf_cache.sync_lock,
997                      RTEMS_BDBUF_FATAL_SYNC_UNLOCK);
998}
999
1000static void
1001rtems_bdbuf_group_obtain (rtems_bdbuf_buffer *bd)
1002{
1003  ++bd->group->users;
1004}
1005
1006static void
1007rtems_bdbuf_group_release (rtems_bdbuf_buffer *bd)
1008{
1009  --bd->group->users;
1010}
1011
1012#if !defined(RTEMS_BDBUF_USE_PTHREAD)
1013static rtems_mode
1014rtems_bdbuf_disable_preemption (void)
1015{
1016  rtems_status_code sc = RTEMS_SUCCESSFUL;
1017  rtems_mode prev_mode = 0;
1018
1019  sc = rtems_task_mode (RTEMS_NO_PREEMPT, RTEMS_PREEMPT_MASK, &prev_mode);
1020  if (sc != RTEMS_SUCCESSFUL)
1021    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_PREEMPT_DIS);
1022
1023  return prev_mode;
1024}
1025
1026static void
1027rtems_bdbuf_restore_preemption (rtems_mode prev_mode)
1028{
1029  rtems_status_code sc = RTEMS_SUCCESSFUL;
1030
1031  sc = rtems_task_mode (prev_mode, RTEMS_ALL_MODE_MASKS, &prev_mode);
1032  if (sc != RTEMS_SUCCESSFUL)
1033    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_PREEMPT_RST);
1034}
1035#endif
1036
1037/**
1038 * Wait until woken. Semaphores are used so a number of tasks can wait and can
1039 * be woken at once. Task events would require we maintain a list of tasks to
1040 * be woken and this would require storage and we do not know the number of
1041 * tasks that could be waiting.
1042 *
1043 * While we have the cache locked we can try and claim the semaphore and
1044 * therefore know when we release the lock to the cache we will block until the
1045 * semaphore is released. This may even happen before we get to block.
1046 *
1047 * A counter is used to save the release call when no one is waiting.
1048 *
1049 * The function assumes the cache is locked on entry and it will be locked on
1050 * exit.
1051 */
1052static void
1053rtems_bdbuf_anonymous_wait (rtems_bdbuf_waiters *waiters)
1054{
1055  /*
1056   * Indicate we are waiting.
1057   */
1058  ++waiters->count;
1059
1060#if defined(RTEMS_BDBUF_USE_PTHREAD)
1061  {
1062    int eno = pthread_cond_wait (&waiters->cond_var, &bdbuf_cache.lock);
1063    if (eno != 0)
1064      rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_CV_WAIT);
1065  }
1066#else
1067  {
1068    rtems_status_code sc;
1069    rtems_mode        prev_mode;
1070
1071    /*
1072     * Disable preemption then unlock the cache and block.  There is no POSIX
1073     * condition variable in the core API so this is a work around.
1074     *
1075     * The issue is a task could preempt after the cache is unlocked because it is
1076     * blocking or just hits that window, and before this task has blocked on the
1077     * semaphore. If the preempting task flushes the queue this task will not see
1078     * the flush and may block for ever or until another transaction flushes this
1079     * semaphore.
1080     */
1081    prev_mode = rtems_bdbuf_disable_preemption();
1082
1083    /*
1084     * Unlock the cache, wait, and lock the cache when we return.
1085     */
1086    rtems_bdbuf_unlock_cache ();
1087
1088    sc = rtems_semaphore_obtain (waiters->sema, RTEMS_WAIT, RTEMS_BDBUF_WAIT_TIMEOUT);
1089
1090    if (sc == RTEMS_TIMEOUT)
1091      rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_CACHE_WAIT_TO);
1092
1093    if (sc != RTEMS_UNSATISFIED)
1094      rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_CACHE_WAIT_2);
1095
1096    rtems_bdbuf_lock_cache ();
1097
1098    rtems_bdbuf_restore_preemption (prev_mode);
1099  }
1100#endif
1101
1102  --waiters->count;
1103}
1104
1105static void
1106rtems_bdbuf_wait (rtems_bdbuf_buffer *bd, rtems_bdbuf_waiters *waiters)
1107{
1108  rtems_bdbuf_group_obtain (bd);
1109  ++bd->waiters;
1110  rtems_bdbuf_anonymous_wait (waiters);
1111  --bd->waiters;
1112  rtems_bdbuf_group_release (bd);
1113}
1114
1115/**
1116 * Wake a blocked resource. The resource has a counter that lets us know if
1117 * there are any waiters.
1118 */
1119static void
1120rtems_bdbuf_wake (rtems_bdbuf_waiters *waiters)
1121{
1122  if (waiters->count > 0)
1123  {
1124#if defined(RTEMS_BDBUF_USE_PTHREAD)
1125    int eno = pthread_cond_broadcast (&waiters->cond_var);
1126    if (eno != 0)
1127      rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_CV_BROADCAST);
1128#else
1129    rtems_status_code sc = rtems_semaphore_flush (waiters->sema);
1130    if (sc != RTEMS_SUCCESSFUL)
1131      rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_CACHE_WAKE);
1132#endif
1133  }
1134}
1135
1136static void
1137rtems_bdbuf_wake_swapper (void)
1138{
1139  rtems_status_code sc = rtems_event_send (bdbuf_cache.swapout,
1140                                           RTEMS_BDBUF_SWAPOUT_SYNC);
1141  if (sc != RTEMS_SUCCESSFUL)
1142    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_SO_WAKE_1);
1143}
1144
1145static bool
1146rtems_bdbuf_has_buffer_waiters (void)
1147{
1148  return bdbuf_cache.buffer_waiters.count;
1149}
1150
1151static void
1152rtems_bdbuf_remove_from_tree (rtems_bdbuf_buffer *bd)
1153{
1154  if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
1155    rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_TREE_RM);
1156}
1157
1158static void
1159rtems_bdbuf_remove_from_tree_and_lru_list (rtems_bdbuf_buffer *bd)
1160{
1161  switch (bd->state)
1162  {
1163    case RTEMS_BDBUF_STATE_FREE:
1164      break;
1165    case RTEMS_BDBUF_STATE_CACHED:
1166      rtems_bdbuf_remove_from_tree (bd);
1167      break;
1168    default:
1169      rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_10);
1170  }
1171
1172  rtems_chain_extract_unprotected (&bd->link);
1173}
1174
1175static void
1176rtems_bdbuf_make_free_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
1177{
1178  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_FREE);
1179  rtems_chain_prepend_unprotected (&bdbuf_cache.lru, &bd->link);
1180}
1181
1182static void
1183rtems_bdbuf_make_empty (rtems_bdbuf_buffer *bd)
1184{
1185  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_EMPTY);
1186}
1187
1188static void
1189rtems_bdbuf_make_cached_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
1190{
1191  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_CACHED);
1192  rtems_chain_append_unprotected (&bdbuf_cache.lru, &bd->link);
1193}
1194
1195static void
1196rtems_bdbuf_discard_buffer (rtems_bdbuf_buffer *bd)
1197{
1198  rtems_bdbuf_make_empty (bd);
1199
1200  if (bd->waiters == 0)
1201  {
1202    rtems_bdbuf_remove_from_tree (bd);
1203    rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1204  }
1205}
1206
1207static void
1208rtems_bdbuf_add_to_modified_list_after_access (rtems_bdbuf_buffer *bd)
1209{
1210  if (bdbuf_cache.sync_active && bdbuf_cache.sync_device == bd->dd)
1211  {
1212    rtems_bdbuf_unlock_cache ();
1213
1214    /*
1215     * Wait for the sync lock.
1216     */
1217    rtems_bdbuf_lock_sync ();
1218
1219    rtems_bdbuf_unlock_sync ();
1220    rtems_bdbuf_lock_cache ();
1221  }
1222
1223  /*
1224   * Only the first modified release sets the timer and any further user
1225   * accesses do not change the timer value which should move down. This
1226   * assumes the user's hold of the buffer is much less than the time on the
1227   * modified list. Resetting the timer on each access which could result in a
1228   * buffer never getting to 0 and never being forced onto disk. This raises a
1229   * difficult question. Is a snapshot of a block that is changing better than
1230   * nothing being written? We have tended to think we should hold changes for
1231   * only a specific period of time even if still changing and get onto disk
1232   * and letting the file system try and recover this position if it can.
1233   */
1234  if (bd->state == RTEMS_BDBUF_STATE_ACCESS_CACHED
1235        || bd->state == RTEMS_BDBUF_STATE_ACCESS_EMPTY)
1236    bd->hold_timer = bdbuf_config.swap_block_hold;
1237
1238  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_MODIFIED);
1239  rtems_chain_append_unprotected (&bdbuf_cache.modified, &bd->link);
1240
1241  if (bd->waiters)
1242    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1243  else if (rtems_bdbuf_has_buffer_waiters ())
1244    rtems_bdbuf_wake_swapper ();
1245}
1246
1247static void
1248rtems_bdbuf_add_to_lru_list_after_access (rtems_bdbuf_buffer *bd)
1249{
1250  rtems_bdbuf_group_release (bd);
1251  rtems_bdbuf_make_cached_and_add_to_lru_list (bd);
1252
1253  if (bd->waiters)
1254    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1255  else
1256    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1257}
1258
1259/**
1260 * Compute the number of BDs per group for a given buffer size.
1261 *
1262 * @param size The buffer size. It can be any size and we scale up.
1263 */
1264static size_t
1265rtems_bdbuf_bds_per_group (size_t size)
1266{
1267  size_t bufs_per_size;
1268  size_t bds_per_size;
1269
1270  if (size > bdbuf_config.buffer_max)
1271    return 0;
1272
1273  bufs_per_size = ((size - 1) / bdbuf_config.buffer_min) + 1;
1274
1275  for (bds_per_size = 1;
1276       bds_per_size < bufs_per_size;
1277       bds_per_size <<= 1)
1278    ;
1279
1280  return bdbuf_cache.max_bds_per_group / bds_per_size;
1281}
1282
1283static void
1284rtems_bdbuf_discard_buffer_after_access (rtems_bdbuf_buffer *bd)
1285{
1286  rtems_bdbuf_group_release (bd);
1287  rtems_bdbuf_discard_buffer (bd);
1288
1289  if (bd->waiters)
1290    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1291  else
1292    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1293}
1294
1295/**
1296 * Reallocate a group. The BDs currently allocated in the group are removed
1297 * from the ALV tree and any lists then the new BD's are prepended to the ready
1298 * list of the cache.
1299 *
1300 * @param group The group to reallocate.
1301 * @param new_bds_per_group The new count of BDs per group.
1302 * @return A buffer of this group.
1303 */
1304static rtems_bdbuf_buffer *
1305rtems_bdbuf_group_realloc (rtems_bdbuf_group* group, size_t new_bds_per_group)
1306{
1307  rtems_bdbuf_buffer* bd;
1308  size_t              b;
1309  size_t              bufs_per_bd;
1310
1311  if (rtems_bdbuf_tracer)
1312    printf ("bdbuf:realloc: %tu: %zd -> %zd\n",
1313            group - bdbuf_cache.groups, group->bds_per_group,
1314            new_bds_per_group);
1315
1316  bufs_per_bd = bdbuf_cache.max_bds_per_group / group->bds_per_group;
1317
1318  for (b = 0, bd = group->bdbuf;
1319       b < group->bds_per_group;
1320       b++, bd += bufs_per_bd)
1321    rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1322
1323  group->bds_per_group = new_bds_per_group;
1324  bufs_per_bd = bdbuf_cache.max_bds_per_group / new_bds_per_group;
1325
1326  for (b = 1, bd = group->bdbuf + bufs_per_bd;
1327       b < group->bds_per_group;
1328       b++, bd += bufs_per_bd)
1329    rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1330
1331  if (b > 1)
1332    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1333
1334  return group->bdbuf;
1335}
1336
1337static void
1338rtems_bdbuf_setup_empty_buffer (rtems_bdbuf_buffer *bd,
1339                                rtems_disk_device  *dd,
1340                                rtems_blkdev_bnum   block)
1341{
1342  bd->dd        = dd ;
1343  bd->block     = block;
1344  bd->avl.left  = NULL;
1345  bd->avl.right = NULL;
1346  bd->waiters   = 0;
1347
1348  if (rtems_bdbuf_avl_insert (&bdbuf_cache.tree, bd) != 0)
1349    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_RECYCLE);
1350
1351  rtems_bdbuf_make_empty (bd);
1352}
1353
1354static rtems_bdbuf_buffer *
1355rtems_bdbuf_get_buffer_from_lru_list (rtems_disk_device *dd,
1356                                      rtems_blkdev_bnum  block)
1357{
1358  rtems_chain_node *node = rtems_chain_first (&bdbuf_cache.lru);
1359
1360  while (!rtems_chain_is_tail (&bdbuf_cache.lru, node))
1361  {
1362    rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
1363    rtems_bdbuf_buffer *empty_bd = NULL;
1364
1365    if (rtems_bdbuf_tracer)
1366      printf ("bdbuf:next-bd: %tu (%td:%" PRId32 ") %zd -> %zd\n",
1367              bd - bdbuf_cache.bds,
1368              bd->group - bdbuf_cache.groups, bd->group->users,
1369              bd->group->bds_per_group, dd->bds_per_group);
1370
1371    /*
1372     * If nobody waits for this BD, we may recycle it.
1373     */
1374    if (bd->waiters == 0)
1375    {
1376      if (bd->group->bds_per_group == dd->bds_per_group)
1377      {
1378        rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1379
1380        empty_bd = bd;
1381      }
1382      else if (bd->group->users == 0)
1383        empty_bd = rtems_bdbuf_group_realloc (bd->group, dd->bds_per_group);
1384    }
1385
1386    if (empty_bd != NULL)
1387    {
1388      rtems_bdbuf_setup_empty_buffer (empty_bd, dd, block);
1389
1390      return empty_bd;
1391    }
1392
1393    node = rtems_chain_next (node);
1394  }
1395
1396  return NULL;
1397}
1398
1399static rtems_status_code
1400rtems_bdbuf_create_task(
1401  rtems_name name,
1402  rtems_task_priority priority,
1403  rtems_task_priority default_priority,
1404  rtems_id *id
1405)
1406{
1407  rtems_status_code sc;
1408  size_t stack_size = bdbuf_config.task_stack_size ?
1409    bdbuf_config.task_stack_size : RTEMS_BDBUF_TASK_STACK_SIZE_DEFAULT;
1410
1411  priority = priority != 0 ? priority : default_priority;
1412
1413  sc = rtems_task_create (name,
1414                          priority,
1415                          stack_size,
1416                          RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
1417                          RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT,
1418                          id);
1419
1420  return sc;
1421}
1422
1423static rtems_bdbuf_swapout_transfer*
1424rtems_bdbuf_swapout_transfer_alloc (void)
1425{
1426  /*
1427   * @note chrisj The rtems_blkdev_request and the array at the end is a hack.
1428   * I am disappointment at finding code like this in RTEMS. The request should
1429   * have been a rtems_chain_control. Simple, fast and less storage as the node
1430   * is already part of the buffer structure.
1431   */
1432  size_t transfer_size = sizeof (rtems_bdbuf_swapout_transfer)
1433    + (bdbuf_config.max_write_blocks * sizeof (rtems_blkdev_sg_buffer));
1434  return calloc (1, transfer_size);
1435}
1436
1437static void
1438rtems_bdbuf_transfer_done (rtems_blkdev_request* req, rtems_status_code status);
1439
1440static void
1441rtems_bdbuf_swapout_transfer_init (rtems_bdbuf_swapout_transfer* transfer,
1442                                   rtems_id id)
1443{
1444  rtems_chain_initialize_empty (&transfer->bds);
1445  transfer->dd = BDBUF_INVALID_DEV;
1446  transfer->syncing = false;
1447  transfer->write_req.req = RTEMS_BLKDEV_REQ_WRITE;
1448  transfer->write_req.done = rtems_bdbuf_transfer_done;
1449  transfer->write_req.io_task = id;
1450}
1451
1452static size_t
1453rtems_bdbuf_swapout_worker_size (void)
1454{
1455  return sizeof (rtems_bdbuf_swapout_worker)
1456    + (bdbuf_config.max_write_blocks * sizeof (rtems_blkdev_sg_buffer));
1457}
1458
1459static rtems_task
1460rtems_bdbuf_swapout_worker_task (rtems_task_argument arg);
1461
1462static rtems_status_code
1463rtems_bdbuf_swapout_workers_create (void)
1464{
1465  rtems_status_code  sc;
1466  size_t             w;
1467  size_t             worker_size;
1468  char              *worker_current;
1469
1470  worker_size = rtems_bdbuf_swapout_worker_size ();
1471  worker_current = calloc (1, bdbuf_config.swapout_workers * worker_size);
1472  sc = worker_current != NULL ? RTEMS_SUCCESSFUL : RTEMS_NO_MEMORY;
1473
1474  bdbuf_cache.swapout_workers = (rtems_bdbuf_swapout_worker *) worker_current;
1475
1476  for (w = 0;
1477       sc == RTEMS_SUCCESSFUL && w < bdbuf_config.swapout_workers;
1478       w++, worker_current += worker_size)
1479  {
1480    rtems_bdbuf_swapout_worker *worker = (rtems_bdbuf_swapout_worker *) worker_current;
1481
1482    sc = rtems_bdbuf_create_task (rtems_build_name('B', 'D', 'o', 'a' + w),
1483                                  bdbuf_config.swapout_worker_priority,
1484                                  RTEMS_BDBUF_SWAPOUT_WORKER_TASK_PRIORITY_DEFAULT,
1485                                  &worker->id);
1486    if (sc == RTEMS_SUCCESSFUL)
1487    {
1488      rtems_bdbuf_swapout_transfer_init (&worker->transfer, worker->id);
1489
1490      rtems_chain_append_unprotected (&bdbuf_cache.swapout_free_workers, &worker->link);
1491      worker->enabled = true;
1492
1493      sc = rtems_task_start (worker->id,
1494                             rtems_bdbuf_swapout_worker_task,
1495                             (rtems_task_argument) worker);
1496    }
1497  }
1498
1499  return sc;
1500}
1501
1502static size_t
1503rtems_bdbuf_read_request_size (uint32_t transfer_count)
1504{
1505  return sizeof (rtems_blkdev_request)
1506    + sizeof (rtems_blkdev_sg_buffer) * transfer_count;
1507}
1508
1509static rtems_status_code
1510rtems_bdbuf_do_init (void)
1511{
1512  rtems_bdbuf_group*  group;
1513  rtems_bdbuf_buffer* bd;
1514  uint8_t*            buffer;
1515  size_t              b;
1516  size_t              cache_aligment;
1517  rtems_status_code   sc;
1518
1519  if (rtems_bdbuf_tracer)
1520    printf ("bdbuf:init\n");
1521
1522  if (rtems_interrupt_is_in_progress())
1523    return RTEMS_CALLED_FROM_ISR;
1524
1525  /*
1526   * Check the configuration table values.
1527   */
1528
1529  if ((bdbuf_config.buffer_max % bdbuf_config.buffer_min) != 0)
1530    return RTEMS_INVALID_NUMBER;
1531
1532  if (rtems_bdbuf_read_request_size (bdbuf_config.max_read_ahead_blocks)
1533      > RTEMS_MINIMUM_STACK_SIZE / 8U)
1534    return RTEMS_INVALID_NUMBER;
1535
1536  /*
1537   * For unspecified cache alignments we use the CPU alignment.
1538   */
1539  cache_aligment = 32; /* FIXME rtems_cache_get_data_line_size() */
1540  if (cache_aligment <= 0)
1541    cache_aligment = CPU_ALIGNMENT;
1542
1543  bdbuf_cache.sync_device = BDBUF_INVALID_DEV;
1544
1545  rtems_chain_initialize_empty (&bdbuf_cache.swapout_free_workers);
1546  rtems_chain_initialize_empty (&bdbuf_cache.lru);
1547  rtems_chain_initialize_empty (&bdbuf_cache.modified);
1548  rtems_chain_initialize_empty (&bdbuf_cache.sync);
1549  rtems_chain_initialize_empty (&bdbuf_cache.read_ahead_chain);
1550
1551  /*
1552   * Create the locks for the cache.
1553   */
1554
1555  sc = rtems_bdbuf_lock_create (rtems_build_name ('B', 'D', 'C', 'l'),
1556                                &bdbuf_cache.lock);
1557  if (sc != RTEMS_SUCCESSFUL)
1558    goto error;
1559
1560  rtems_bdbuf_lock_cache ();
1561
1562  sc = rtems_bdbuf_lock_create (rtems_build_name ('B', 'D', 'C', 's'),
1563                                &bdbuf_cache.sync_lock);
1564  if (sc != RTEMS_SUCCESSFUL)
1565    goto error;
1566
1567  sc = rtems_bdbuf_waiter_create (rtems_build_name ('B', 'D', 'C', 'a'),
1568                                  &bdbuf_cache.access_waiters);
1569  if (sc != RTEMS_SUCCESSFUL)
1570    goto error;
1571
1572  sc = rtems_bdbuf_waiter_create (rtems_build_name ('B', 'D', 'C', 't'),
1573                                  &bdbuf_cache.transfer_waiters);
1574  if (sc != RTEMS_SUCCESSFUL)
1575    goto error;
1576
1577  sc = rtems_bdbuf_waiter_create (rtems_build_name ('B', 'D', 'C', 'b'),
1578                                  &bdbuf_cache.buffer_waiters);
1579  if (sc != RTEMS_SUCCESSFUL)
1580    goto error;
1581
1582  /*
1583   * Compute the various number of elements in the cache.
1584   */
1585  bdbuf_cache.buffer_min_count =
1586    bdbuf_config.size / bdbuf_config.buffer_min;
1587  bdbuf_cache.max_bds_per_group =
1588    bdbuf_config.buffer_max / bdbuf_config.buffer_min;
1589  bdbuf_cache.group_count =
1590    bdbuf_cache.buffer_min_count / bdbuf_cache.max_bds_per_group;
1591
1592  /*
1593   * Allocate the memory for the buffer descriptors.
1594   */
1595  bdbuf_cache.bds = calloc (sizeof (rtems_bdbuf_buffer),
1596                            bdbuf_cache.buffer_min_count);
1597  if (!bdbuf_cache.bds)
1598    goto error;
1599
1600  /*
1601   * Allocate the memory for the buffer descriptors.
1602   */
1603  bdbuf_cache.groups = calloc (sizeof (rtems_bdbuf_group),
1604                               bdbuf_cache.group_count);
1605  if (!bdbuf_cache.groups)
1606    goto error;
1607
1608  /*
1609   * Allocate memory for buffer memory. The buffer memory will be cache
1610   * aligned. It is possible to free the memory allocated by rtems_memalign()
1611   * with free(). Return 0 if allocated.
1612   *
1613   * The memory allocate allows a
1614   */
1615  if (rtems_memalign ((void **) &bdbuf_cache.buffers,
1616                      cache_aligment,
1617                      bdbuf_cache.buffer_min_count * bdbuf_config.buffer_min) != 0)
1618    goto error;
1619
1620  /*
1621   * The cache is empty after opening so we need to add all the buffers to it
1622   * and initialise the groups.
1623   */
1624  for (b = 0, group = bdbuf_cache.groups,
1625         bd = bdbuf_cache.bds, buffer = bdbuf_cache.buffers;
1626       b < bdbuf_cache.buffer_min_count;
1627       b++, bd++, buffer += bdbuf_config.buffer_min)
1628  {
1629    bd->dd    = BDBUF_INVALID_DEV;
1630    bd->group  = group;
1631    bd->buffer = buffer;
1632
1633    rtems_chain_append_unprotected (&bdbuf_cache.lru, &bd->link);
1634
1635    if ((b % bdbuf_cache.max_bds_per_group) ==
1636        (bdbuf_cache.max_bds_per_group - 1))
1637      group++;
1638  }
1639
1640  for (b = 0,
1641         group = bdbuf_cache.groups,
1642         bd = bdbuf_cache.bds;
1643       b < bdbuf_cache.group_count;
1644       b++,
1645         group++,
1646         bd += bdbuf_cache.max_bds_per_group)
1647  {
1648    group->bds_per_group = bdbuf_cache.max_bds_per_group;
1649    group->bdbuf = bd;
1650  }
1651
1652  /*
1653   * Create and start swapout task.
1654   */
1655
1656  bdbuf_cache.swapout_transfer = rtems_bdbuf_swapout_transfer_alloc ();
1657  if (!bdbuf_cache.swapout_transfer)
1658    goto error;
1659
1660  bdbuf_cache.swapout_enabled = true;
1661
1662  sc = rtems_bdbuf_create_task (rtems_build_name('B', 'S', 'W', 'P'),
1663                                bdbuf_config.swapout_priority,
1664                                RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT,
1665                                &bdbuf_cache.swapout);
1666  if (sc != RTEMS_SUCCESSFUL)
1667    goto error;
1668
1669  rtems_bdbuf_swapout_transfer_init (bdbuf_cache.swapout_transfer, bdbuf_cache.swapout);
1670
1671  sc = rtems_task_start (bdbuf_cache.swapout,
1672                         rtems_bdbuf_swapout_task,
1673                         (rtems_task_argument) bdbuf_cache.swapout_transfer);
1674  if (sc != RTEMS_SUCCESSFUL)
1675    goto error;
1676
1677  if (bdbuf_config.swapout_workers > 0)
1678  {
1679    sc = rtems_bdbuf_swapout_workers_create ();
1680    if (sc != RTEMS_SUCCESSFUL)
1681      goto error;
1682  }
1683
1684  if (bdbuf_config.max_read_ahead_blocks > 0)
1685  {
1686    bdbuf_cache.read_ahead_enabled = true;
1687    sc = rtems_bdbuf_create_task (rtems_build_name('B', 'R', 'D', 'A'),
1688                                  bdbuf_config.read_ahead_priority,
1689                                  RTEMS_BDBUF_READ_AHEAD_TASK_PRIORITY_DEFAULT,
1690                                  &bdbuf_cache.read_ahead_task);
1691    if (sc != RTEMS_SUCCESSFUL)
1692      goto error;
1693
1694    sc = rtems_task_start (bdbuf_cache.read_ahead_task,
1695                           rtems_bdbuf_read_ahead_task,
1696                           0);
1697    if (sc != RTEMS_SUCCESSFUL)
1698      goto error;
1699  }
1700
1701  rtems_bdbuf_unlock_cache ();
1702
1703  return RTEMS_SUCCESSFUL;
1704
1705error:
1706
1707  if (bdbuf_cache.read_ahead_task != 0)
1708    rtems_task_delete (bdbuf_cache.read_ahead_task);
1709
1710  if (bdbuf_cache.swapout != 0)
1711    rtems_task_delete (bdbuf_cache.swapout);
1712
1713  if (bdbuf_cache.swapout_workers)
1714  {
1715    char   *worker_current = (char *) bdbuf_cache.swapout_workers;
1716    size_t  worker_size = rtems_bdbuf_swapout_worker_size ();
1717    size_t  w;
1718
1719    for (w = 0;
1720         w < bdbuf_config.swapout_workers;
1721         w++, worker_current += worker_size)
1722    {
1723      rtems_bdbuf_swapout_worker *worker = (rtems_bdbuf_swapout_worker *) worker_current;
1724
1725      if (worker->id != 0) {
1726        rtems_task_delete (worker->id);
1727      }
1728    }
1729  }
1730
1731  free (bdbuf_cache.buffers);
1732  free (bdbuf_cache.groups);
1733  free (bdbuf_cache.bds);
1734  free (bdbuf_cache.swapout_transfer);
1735  free (bdbuf_cache.swapout_workers);
1736
1737  rtems_bdbuf_waiter_delete (&bdbuf_cache.buffer_waiters);
1738  rtems_bdbuf_waiter_delete (&bdbuf_cache.access_waiters);
1739  rtems_bdbuf_waiter_delete (&bdbuf_cache.transfer_waiters);
1740  rtems_bdbuf_lock_delete (&bdbuf_cache.sync_lock);
1741
1742  if (bdbuf_cache.lock != 0)
1743  {
1744    rtems_bdbuf_unlock_cache ();
1745    rtems_bdbuf_lock_delete (&bdbuf_cache.lock);
1746  }
1747
1748  return RTEMS_UNSATISFIED;
1749}
1750
1751static void
1752rtems_bdbuf_init_once (void)
1753{
1754  bdbuf_cache.init_status = rtems_bdbuf_do_init();
1755}
1756
1757rtems_status_code
1758rtems_bdbuf_init (void)
1759{
1760  int eno = pthread_once (&rtems_bdbuf_once_state, rtems_bdbuf_init_once);
1761
1762  if (eno != 0)
1763    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_ONCE);
1764
1765  return bdbuf_cache.init_status;
1766}
1767
1768static void
1769rtems_bdbuf_wait_for_event (rtems_event_set event)
1770{
1771  rtems_status_code sc = RTEMS_SUCCESSFUL;
1772  rtems_event_set   out = 0;
1773
1774  sc = rtems_event_receive (event,
1775                            RTEMS_EVENT_ALL | RTEMS_WAIT,
1776                            RTEMS_NO_TIMEOUT,
1777                            &out);
1778
1779  if (sc != RTEMS_SUCCESSFUL || out != event)
1780    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_WAIT_EVNT);
1781}
1782
1783static void
1784rtems_bdbuf_wait_for_transient_event (void)
1785{
1786  rtems_status_code sc = RTEMS_SUCCESSFUL;
1787
1788  sc = rtems_event_transient_receive (RTEMS_WAIT, RTEMS_NO_TIMEOUT);
1789  if (sc != RTEMS_SUCCESSFUL)
1790    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_WAIT_TRANS_EVNT);
1791}
1792
1793static void
1794rtems_bdbuf_wait_for_access (rtems_bdbuf_buffer *bd)
1795{
1796  while (true)
1797  {
1798    switch (bd->state)
1799    {
1800      case RTEMS_BDBUF_STATE_MODIFIED:
1801        rtems_bdbuf_group_release (bd);
1802        /* Fall through */
1803      case RTEMS_BDBUF_STATE_CACHED:
1804        rtems_chain_extract_unprotected (&bd->link);
1805        /* Fall through */
1806      case RTEMS_BDBUF_STATE_EMPTY:
1807        return;
1808      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1809      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1810      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1811      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1812        rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
1813        break;
1814      case RTEMS_BDBUF_STATE_SYNC:
1815      case RTEMS_BDBUF_STATE_TRANSFER:
1816      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1817        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1818        break;
1819      default:
1820        rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_7);
1821    }
1822  }
1823}
1824
1825static void
1826rtems_bdbuf_request_sync_for_modified_buffer (rtems_bdbuf_buffer *bd)
1827{
1828  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
1829  rtems_chain_extract_unprotected (&bd->link);
1830  rtems_chain_append_unprotected (&bdbuf_cache.sync, &bd->link);
1831  rtems_bdbuf_wake_swapper ();
1832}
1833
1834/**
1835 * @brief Waits until the buffer is ready for recycling.
1836 *
1837 * @retval @c true Buffer is valid and may be recycled.
1838 * @retval @c false Buffer is invalid and has to searched again.
1839 */
1840static bool
1841rtems_bdbuf_wait_for_recycle (rtems_bdbuf_buffer *bd)
1842{
1843  while (true)
1844  {
1845    switch (bd->state)
1846    {
1847      case RTEMS_BDBUF_STATE_FREE:
1848        return true;
1849      case RTEMS_BDBUF_STATE_MODIFIED:
1850        rtems_bdbuf_request_sync_for_modified_buffer (bd);
1851        break;
1852      case RTEMS_BDBUF_STATE_CACHED:
1853      case RTEMS_BDBUF_STATE_EMPTY:
1854        if (bd->waiters == 0)
1855          return true;
1856        else
1857        {
1858          /*
1859           * It is essential that we wait here without a special wait count and
1860           * without the group in use.  Otherwise we could trigger a wait ping
1861           * pong with another recycle waiter.  The state of the buffer is
1862           * arbitrary afterwards.
1863           */
1864          rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
1865          return false;
1866        }
1867      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1868      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1869      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1870      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1871        rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
1872        break;
1873      case RTEMS_BDBUF_STATE_SYNC:
1874      case RTEMS_BDBUF_STATE_TRANSFER:
1875      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1876        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1877        break;
1878      default:
1879        rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_8);
1880    }
1881  }
1882}
1883
1884static void
1885rtems_bdbuf_wait_for_sync_done (rtems_bdbuf_buffer *bd)
1886{
1887  while (true)
1888  {
1889    switch (bd->state)
1890    {
1891      case RTEMS_BDBUF_STATE_CACHED:
1892      case RTEMS_BDBUF_STATE_EMPTY:
1893      case RTEMS_BDBUF_STATE_MODIFIED:
1894      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1895      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1896      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1897      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1898        return;
1899      case RTEMS_BDBUF_STATE_SYNC:
1900      case RTEMS_BDBUF_STATE_TRANSFER:
1901      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1902        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1903        break;
1904      default:
1905        rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_9);
1906    }
1907  }
1908}
1909
1910static void
1911rtems_bdbuf_wait_for_buffer (void)
1912{
1913  if (!rtems_chain_is_empty (&bdbuf_cache.modified))
1914    rtems_bdbuf_wake_swapper ();
1915
1916  rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
1917}
1918
1919static void
1920rtems_bdbuf_sync_after_access (rtems_bdbuf_buffer *bd)
1921{
1922  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
1923
1924  rtems_chain_append_unprotected (&bdbuf_cache.sync, &bd->link);
1925
1926  if (bd->waiters)
1927    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1928
1929  rtems_bdbuf_wake_swapper ();
1930  rtems_bdbuf_wait_for_sync_done (bd);
1931
1932  /*
1933   * We may have created a cached or empty buffer which may be recycled.
1934   */
1935  if (bd->waiters == 0
1936        && (bd->state == RTEMS_BDBUF_STATE_CACHED
1937          || bd->state == RTEMS_BDBUF_STATE_EMPTY))
1938  {
1939    if (bd->state == RTEMS_BDBUF_STATE_EMPTY)
1940    {
1941      rtems_bdbuf_remove_from_tree (bd);
1942      rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1943    }
1944    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1945  }
1946}
1947
1948static rtems_bdbuf_buffer *
1949rtems_bdbuf_get_buffer_for_read_ahead (rtems_disk_device *dd,
1950                                       rtems_blkdev_bnum  block)
1951{
1952  rtems_bdbuf_buffer *bd = NULL;
1953
1954  bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dd, block);
1955
1956  if (bd == NULL)
1957  {
1958    bd = rtems_bdbuf_get_buffer_from_lru_list (dd, block);
1959
1960    if (bd != NULL)
1961      rtems_bdbuf_group_obtain (bd);
1962  }
1963  else
1964    /*
1965     * The buffer is in the cache.  So it is already available or in use, and
1966     * thus no need for a read ahead.
1967     */
1968    bd = NULL;
1969
1970  return bd;
1971}
1972
1973static rtems_bdbuf_buffer *
1974rtems_bdbuf_get_buffer_for_access (rtems_disk_device *dd,
1975                                   rtems_blkdev_bnum  block)
1976{
1977  rtems_bdbuf_buffer *bd = NULL;
1978
1979  do
1980  {
1981    bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dd, block);
1982
1983    if (bd != NULL)
1984    {
1985      if (bd->group->bds_per_group != dd->bds_per_group)
1986      {
1987        if (rtems_bdbuf_wait_for_recycle (bd))
1988        {
1989          rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1990          rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1991          rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1992        }
1993        bd = NULL;
1994      }
1995    }
1996    else
1997    {
1998      bd = rtems_bdbuf_get_buffer_from_lru_list (dd, block);
1999
2000      if (bd == NULL)
2001        rtems_bdbuf_wait_for_buffer ();
2002    }
2003  }
2004  while (bd == NULL);
2005
2006  rtems_bdbuf_wait_for_access (bd);
2007  rtems_bdbuf_group_obtain (bd);
2008
2009  return bd;
2010}
2011
2012static rtems_status_code
2013rtems_bdbuf_get_media_block (const rtems_disk_device *dd,
2014                             rtems_blkdev_bnum        block,
2015                             rtems_blkdev_bnum       *media_block_ptr)
2016{
2017  rtems_status_code sc = RTEMS_SUCCESSFUL;
2018
2019  if (block < dd->block_count)
2020  {
2021    /*
2022     * Compute the media block number. Drivers work with media block number not
2023     * the block number a BD may have as this depends on the block size set by
2024     * the user.
2025     */
2026    *media_block_ptr = rtems_bdbuf_media_block (dd, block) + dd->start;
2027  }
2028  else
2029  {
2030    sc = RTEMS_INVALID_ID;
2031  }
2032
2033  return sc;
2034}
2035
2036rtems_status_code
2037rtems_bdbuf_get (rtems_disk_device   *dd,
2038                 rtems_blkdev_bnum    block,
2039                 rtems_bdbuf_buffer **bd_ptr)
2040{
2041  rtems_status_code   sc = RTEMS_SUCCESSFUL;
2042  rtems_bdbuf_buffer *bd = NULL;
2043  rtems_blkdev_bnum   media_block;
2044
2045  rtems_bdbuf_lock_cache ();
2046
2047  sc = rtems_bdbuf_get_media_block (dd, block, &media_block);
2048  if (sc == RTEMS_SUCCESSFUL)
2049  {
2050    /*
2051     * Print the block index relative to the physical disk.
2052     */
2053    if (rtems_bdbuf_tracer)
2054      printf ("bdbuf:get: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
2055              media_block, block, (unsigned) dd->dev);
2056
2057    bd = rtems_bdbuf_get_buffer_for_access (dd, media_block);
2058
2059    switch (bd->state)
2060    {
2061      case RTEMS_BDBUF_STATE_CACHED:
2062        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
2063        break;
2064      case RTEMS_BDBUF_STATE_EMPTY:
2065        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_EMPTY);
2066        break;
2067      case RTEMS_BDBUF_STATE_MODIFIED:
2068        /*
2069         * To get a modified buffer could be considered a bug in the caller
2070         * because you should not be getting an already modified buffer but
2071         * user may have modified a byte in a block then decided to seek the
2072         * start and write the whole block and the file system will have no
2073         * record of this so just gets the block to fill.
2074         */
2075        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
2076        break;
2077      default:
2078        rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_2);
2079        break;
2080    }
2081
2082    if (rtems_bdbuf_tracer)
2083    {
2084      rtems_bdbuf_show_users ("get", bd);
2085      rtems_bdbuf_show_usage ();
2086    }
2087  }
2088
2089  rtems_bdbuf_unlock_cache ();
2090
2091  *bd_ptr = bd;
2092
2093  return sc;
2094}
2095
2096/**
2097 * Call back handler called by the low level driver when the transfer has
2098 * completed. This function may be invoked from interrupt handler.
2099 *
2100 * @param arg Arbitrary argument specified in block device request
2101 *            structure (in this case - pointer to the appropriate
2102 *            block device request structure).
2103 * @param status I/O completion status
2104 */
2105static void
2106rtems_bdbuf_transfer_done (rtems_blkdev_request* req, rtems_status_code status)
2107{
2108  req->status = status;
2109
2110  rtems_event_transient_send (req->io_task);
2111}
2112
2113static rtems_status_code
2114rtems_bdbuf_execute_transfer_request (rtems_disk_device    *dd,
2115                                      rtems_blkdev_request *req,
2116                                      bool                  cache_locked)
2117{
2118  rtems_status_code sc = RTEMS_SUCCESSFUL;
2119  uint32_t transfer_index = 0;
2120  bool wake_transfer_waiters = false;
2121  bool wake_buffer_waiters = false;
2122
2123  if (cache_locked)
2124    rtems_bdbuf_unlock_cache ();
2125
2126  /* The return value will be ignored for transfer requests */
2127  dd->ioctl (dd->phys_dev, RTEMS_BLKIO_REQUEST, req);
2128
2129  /* Wait for transfer request completion */
2130  rtems_bdbuf_wait_for_transient_event ();
2131  sc = req->status;
2132
2133  rtems_bdbuf_lock_cache ();
2134
2135  /* Statistics */
2136  if (req->req == RTEMS_BLKDEV_REQ_READ)
2137  {
2138    dd->stats.read_blocks += req->bufnum;
2139    if (sc != RTEMS_SUCCESSFUL)
2140      ++dd->stats.read_errors;
2141  }
2142  else
2143  {
2144    dd->stats.write_blocks += req->bufnum;
2145    ++dd->stats.write_transfers;
2146    if (sc != RTEMS_SUCCESSFUL)
2147      ++dd->stats.write_errors;
2148  }
2149
2150  for (transfer_index = 0; transfer_index < req->bufnum; ++transfer_index)
2151  {
2152    rtems_bdbuf_buffer *bd = req->bufs [transfer_index].user;
2153    bool waiters = bd->waiters;
2154
2155    if (waiters)
2156      wake_transfer_waiters = true;
2157    else
2158      wake_buffer_waiters = true;
2159
2160    rtems_bdbuf_group_release (bd);
2161
2162    if (sc == RTEMS_SUCCESSFUL && bd->state == RTEMS_BDBUF_STATE_TRANSFER)
2163      rtems_bdbuf_make_cached_and_add_to_lru_list (bd);
2164    else
2165      rtems_bdbuf_discard_buffer (bd);
2166
2167    if (rtems_bdbuf_tracer)
2168      rtems_bdbuf_show_users ("transfer", bd);
2169  }
2170
2171  if (wake_transfer_waiters)
2172    rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
2173
2174  if (wake_buffer_waiters)
2175    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
2176
2177  if (!cache_locked)
2178    rtems_bdbuf_unlock_cache ();
2179
2180  if (sc == RTEMS_SUCCESSFUL || sc == RTEMS_UNSATISFIED)
2181    return sc;
2182  else
2183    return RTEMS_IO_ERROR;
2184}
2185
2186static rtems_status_code
2187rtems_bdbuf_execute_read_request (rtems_disk_device  *dd,
2188                                  rtems_bdbuf_buffer *bd,
2189                                  uint32_t            transfer_count)
2190{
2191  rtems_blkdev_request *req = NULL;
2192  rtems_blkdev_bnum media_block = bd->block;
2193  uint32_t media_blocks_per_block = dd->media_blocks_per_block;
2194  uint32_t block_size = dd->block_size;
2195  uint32_t transfer_index = 1;
2196
2197  /*
2198   * TODO: This type of request structure is wrong and should be removed.
2199   */
2200#define bdbuf_alloc(size) __builtin_alloca (size)
2201
2202  req = bdbuf_alloc (rtems_bdbuf_read_request_size (transfer_count));
2203
2204  req->req = RTEMS_BLKDEV_REQ_READ;
2205  req->done = rtems_bdbuf_transfer_done;
2206  req->io_task = rtems_task_self ();
2207  req->bufnum = 0;
2208
2209  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
2210
2211  req->bufs [0].user   = bd;
2212  req->bufs [0].block  = media_block;
2213  req->bufs [0].length = block_size;
2214  req->bufs [0].buffer = bd->buffer;
2215
2216  if (rtems_bdbuf_tracer)
2217    rtems_bdbuf_show_users ("read", bd);
2218
2219  while (transfer_index < transfer_count)
2220  {
2221    media_block += media_blocks_per_block;
2222
2223    bd = rtems_bdbuf_get_buffer_for_read_ahead (dd, media_block);
2224
2225    if (bd == NULL)
2226      break;
2227
2228    rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
2229
2230    req->bufs [transfer_index].user   = bd;
2231    req->bufs [transfer_index].block  = media_block;
2232    req->bufs [transfer_index].length = block_size;
2233    req->bufs [transfer_index].buffer = bd->buffer;
2234
2235    if (rtems_bdbuf_tracer)
2236      rtems_bdbuf_show_users ("read", bd);
2237
2238    ++transfer_index;
2239  }
2240
2241  req->bufnum = transfer_index;
2242
2243  return rtems_bdbuf_execute_transfer_request (dd, req, true);
2244}
2245
2246static bool
2247rtems_bdbuf_is_read_ahead_active (const rtems_disk_device *dd)
2248{
2249  return !rtems_chain_is_node_off_chain (&dd->read_ahead.node);
2250}
2251
2252static void
2253rtems_bdbuf_read_ahead_cancel (rtems_disk_device *dd)
2254{
2255  if (rtems_bdbuf_is_read_ahead_active (dd))
2256  {
2257    rtems_chain_extract_unprotected (&dd->read_ahead.node);
2258    rtems_chain_set_off_chain (&dd->read_ahead.node);
2259  }
2260}
2261
2262static void
2263rtems_bdbuf_read_ahead_reset (rtems_disk_device *dd)
2264{
2265  rtems_bdbuf_read_ahead_cancel (dd);
2266  dd->read_ahead.trigger = RTEMS_DISK_READ_AHEAD_NO_TRIGGER;
2267}
2268
2269static void
2270rtems_bdbuf_check_read_ahead_trigger (rtems_disk_device *dd,
2271                                      rtems_blkdev_bnum  block)
2272{
2273  if (bdbuf_cache.read_ahead_task != 0
2274      && dd->read_ahead.trigger == block
2275      && !rtems_bdbuf_is_read_ahead_active (dd))
2276  {
2277    rtems_status_code sc;
2278    rtems_chain_control *chain = &bdbuf_cache.read_ahead_chain;
2279
2280    if (rtems_chain_is_empty (chain))
2281    {
2282      sc = rtems_event_send (bdbuf_cache.read_ahead_task,
2283                             RTEMS_BDBUF_READ_AHEAD_WAKE_UP);
2284      if (sc != RTEMS_SUCCESSFUL)
2285        rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_RA_WAKE_UP);
2286    }
2287
2288    rtems_chain_append_unprotected (chain, &dd->read_ahead.node);
2289  }
2290}
2291
2292static void
2293rtems_bdbuf_set_read_ahead_trigger (rtems_disk_device *dd,
2294                                    rtems_blkdev_bnum  block)
2295{
2296  if (dd->read_ahead.trigger != block)
2297  {
2298    rtems_bdbuf_read_ahead_cancel (dd);
2299    dd->read_ahead.trigger = block + 1;
2300    dd->read_ahead.next = block + 2;
2301  }
2302}
2303
2304rtems_status_code
2305rtems_bdbuf_read (rtems_disk_device   *dd,
2306                  rtems_blkdev_bnum    block,
2307                  rtems_bdbuf_buffer **bd_ptr)
2308{
2309  rtems_status_code     sc = RTEMS_SUCCESSFUL;
2310  rtems_bdbuf_buffer   *bd = NULL;
2311  rtems_blkdev_bnum     media_block;
2312
2313  rtems_bdbuf_lock_cache ();
2314
2315  sc = rtems_bdbuf_get_media_block (dd, block, &media_block);
2316  if (sc == RTEMS_SUCCESSFUL)
2317  {
2318    if (rtems_bdbuf_tracer)
2319      printf ("bdbuf:read: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
2320              media_block, block, (unsigned) dd->dev);
2321
2322    bd = rtems_bdbuf_get_buffer_for_access (dd, media_block);
2323    switch (bd->state)
2324    {
2325      case RTEMS_BDBUF_STATE_CACHED:
2326        ++dd->stats.read_hits;
2327        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
2328        break;
2329      case RTEMS_BDBUF_STATE_MODIFIED:
2330        ++dd->stats.read_hits;
2331        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
2332        break;
2333      case RTEMS_BDBUF_STATE_EMPTY:
2334        ++dd->stats.read_misses;
2335        rtems_bdbuf_set_read_ahead_trigger (dd, block);
2336        sc = rtems_bdbuf_execute_read_request (dd, bd, 1);
2337        if (sc == RTEMS_SUCCESSFUL)
2338        {
2339          rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
2340          rtems_chain_extract_unprotected (&bd->link);
2341          rtems_bdbuf_group_obtain (bd);
2342        }
2343        else
2344        {
2345          bd = NULL;
2346        }
2347        break;
2348      default:
2349        rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_4);
2350        break;
2351    }
2352
2353    rtems_bdbuf_check_read_ahead_trigger (dd, block);
2354  }
2355
2356  rtems_bdbuf_unlock_cache ();
2357
2358  *bd_ptr = bd;
2359
2360  return sc;
2361}
2362
2363static rtems_status_code
2364rtems_bdbuf_check_bd_and_lock_cache (rtems_bdbuf_buffer *bd, const char *kind)
2365{
2366  if (bd == NULL)
2367    return RTEMS_INVALID_ADDRESS;
2368  if (rtems_bdbuf_tracer)
2369  {
2370    printf ("bdbuf:%s: %" PRIu32 "\n", kind, bd->block);
2371    rtems_bdbuf_show_users (kind, bd);
2372  }
2373  rtems_bdbuf_lock_cache();
2374
2375  return RTEMS_SUCCESSFUL;
2376}
2377
2378rtems_status_code
2379rtems_bdbuf_release (rtems_bdbuf_buffer *bd)
2380{
2381  rtems_status_code sc = RTEMS_SUCCESSFUL;
2382
2383  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release");
2384  if (sc != RTEMS_SUCCESSFUL)
2385    return sc;
2386
2387  switch (bd->state)
2388  {
2389    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2390      rtems_bdbuf_add_to_lru_list_after_access (bd);
2391      break;
2392    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2393    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2394      rtems_bdbuf_discard_buffer_after_access (bd);
2395      break;
2396    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2397      rtems_bdbuf_add_to_modified_list_after_access (bd);
2398      break;
2399    default:
2400      rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_0);
2401      break;
2402  }
2403
2404  if (rtems_bdbuf_tracer)
2405    rtems_bdbuf_show_usage ();
2406
2407  rtems_bdbuf_unlock_cache ();
2408
2409  return RTEMS_SUCCESSFUL;
2410}
2411
2412rtems_status_code
2413rtems_bdbuf_release_modified (rtems_bdbuf_buffer *bd)
2414{
2415  rtems_status_code sc = RTEMS_SUCCESSFUL;
2416
2417  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release modified");
2418  if (sc != RTEMS_SUCCESSFUL)
2419    return sc;
2420
2421  switch (bd->state)
2422  {
2423    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2424    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2425    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2426      rtems_bdbuf_add_to_modified_list_after_access (bd);
2427      break;
2428    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2429      rtems_bdbuf_discard_buffer_after_access (bd);
2430      break;
2431    default:
2432      rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_6);
2433      break;
2434  }
2435
2436  if (rtems_bdbuf_tracer)
2437    rtems_bdbuf_show_usage ();
2438
2439  rtems_bdbuf_unlock_cache ();
2440
2441  return RTEMS_SUCCESSFUL;
2442}
2443
2444rtems_status_code
2445rtems_bdbuf_sync (rtems_bdbuf_buffer *bd)
2446{
2447  rtems_status_code sc = RTEMS_SUCCESSFUL;
2448
2449  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "sync");
2450  if (sc != RTEMS_SUCCESSFUL)
2451    return sc;
2452
2453  switch (bd->state)
2454  {
2455    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2456    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2457    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2458      rtems_bdbuf_sync_after_access (bd);
2459      break;
2460    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2461      rtems_bdbuf_discard_buffer_after_access (bd);
2462      break;
2463    default:
2464      rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_5);
2465      break;
2466  }
2467
2468  if (rtems_bdbuf_tracer)
2469    rtems_bdbuf_show_usage ();
2470
2471  rtems_bdbuf_unlock_cache ();
2472
2473  return RTEMS_SUCCESSFUL;
2474}
2475
2476rtems_status_code
2477rtems_bdbuf_syncdev (rtems_disk_device *dd)
2478{
2479  if (rtems_bdbuf_tracer)
2480    printf ("bdbuf:syncdev: %08x\n", (unsigned) dd->dev);
2481
2482  /*
2483   * Take the sync lock before locking the cache. Once we have the sync lock we
2484   * can lock the cache. If another thread has the sync lock it will cause this
2485   * thread to block until it owns the sync lock then it can own the cache. The
2486   * sync lock can only be obtained with the cache unlocked.
2487   */
2488  rtems_bdbuf_lock_sync ();
2489  rtems_bdbuf_lock_cache ();
2490
2491  /*
2492   * Set the cache to have a sync active for a specific device and let the swap
2493   * out task know the id of the requester to wake when done.
2494   *
2495   * The swap out task will negate the sync active flag when no more buffers
2496   * for the device are held on the "modified for sync" queues.
2497   */
2498  bdbuf_cache.sync_active    = true;
2499  bdbuf_cache.sync_requester = rtems_task_self ();
2500  bdbuf_cache.sync_device    = dd;
2501
2502  rtems_bdbuf_wake_swapper ();
2503  rtems_bdbuf_unlock_cache ();
2504  rtems_bdbuf_wait_for_transient_event ();
2505  rtems_bdbuf_unlock_sync ();
2506
2507  return RTEMS_SUCCESSFUL;
2508}
2509
2510/**
2511 * Swapout transfer to the driver. The driver will break this I/O into groups
2512 * of consecutive write requests is multiple consecutive buffers are required
2513 * by the driver. The cache is not locked.
2514 *
2515 * @param transfer The transfer transaction.
2516 */
2517static void
2518rtems_bdbuf_swapout_write (rtems_bdbuf_swapout_transfer* transfer)
2519{
2520  rtems_chain_node *node;
2521
2522  if (rtems_bdbuf_tracer)
2523    printf ("bdbuf:swapout transfer: %08x\n", (unsigned) transfer->dd->dev);
2524
2525  /*
2526   * If there are buffers to transfer to the media transfer them.
2527   */
2528  if (!rtems_chain_is_empty (&transfer->bds))
2529  {
2530    /*
2531     * The last block number used when the driver only supports
2532     * continuous blocks in a single request.
2533     */
2534    uint32_t last_block = 0;
2535
2536    rtems_disk_device *dd = transfer->dd;
2537    uint32_t media_blocks_per_block = dd->media_blocks_per_block;
2538    bool need_continuous_blocks =
2539      (dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_MULTISECTOR_CONT) != 0;
2540
2541    /*
2542     * Take as many buffers as configured and pass to the driver. Note, the
2543     * API to the drivers has an array of buffers and if a chain was passed
2544     * we could have just passed the list. If the driver API is updated it
2545     * should be possible to make this change with little effect in this
2546     * code. The array that is passed is broken in design and should be
2547     * removed. Merging members of a struct into the first member is
2548     * trouble waiting to happen.
2549     */
2550    transfer->write_req.status = RTEMS_RESOURCE_IN_USE;
2551    transfer->write_req.bufnum = 0;
2552
2553    while ((node = rtems_chain_get_unprotected(&transfer->bds)) != NULL)
2554    {
2555      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2556      bool                write = false;
2557
2558      /*
2559       * If the device only accepts sequential buffers and this is not the
2560       * first buffer (the first is always sequential, and the buffer is not
2561       * sequential then put the buffer back on the transfer chain and write
2562       * the committed buffers.
2563       */
2564
2565      if (rtems_bdbuf_tracer)
2566        printf ("bdbuf:swapout write: bd:%" PRIu32 ", bufnum:%" PRIu32 " mode:%s\n",
2567                bd->block, transfer->write_req.bufnum,
2568                need_continuous_blocks ? "MULTI" : "SCAT");
2569
2570      if (need_continuous_blocks && transfer->write_req.bufnum &&
2571          bd->block != last_block + media_blocks_per_block)
2572      {
2573        rtems_chain_prepend_unprotected (&transfer->bds, &bd->link);
2574        write = true;
2575      }
2576      else
2577      {
2578        rtems_blkdev_sg_buffer* buf;
2579        buf = &transfer->write_req.bufs[transfer->write_req.bufnum];
2580        transfer->write_req.bufnum++;
2581        buf->user   = bd;
2582        buf->block  = bd->block;
2583        buf->length = dd->block_size;
2584        buf->buffer = bd->buffer;
2585        last_block  = bd->block;
2586      }
2587
2588      /*
2589       * Perform the transfer if there are no more buffers, or the transfer
2590       * size has reached the configured max. value.
2591       */
2592
2593      if (rtems_chain_is_empty (&transfer->bds) ||
2594          (transfer->write_req.bufnum >= bdbuf_config.max_write_blocks))
2595        write = true;
2596
2597      if (write)
2598      {
2599        rtems_bdbuf_execute_transfer_request (dd, &transfer->write_req, false);
2600
2601        transfer->write_req.status = RTEMS_RESOURCE_IN_USE;
2602        transfer->write_req.bufnum = 0;
2603      }
2604    }
2605
2606    /*
2607     * If sync'ing and the deivce is capability of handling a sync IO control
2608     * call perform the call.
2609     */
2610    if (transfer->syncing &&
2611        (dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_SYNC))
2612    {
2613      /* int result = */ dd->ioctl (dd->phys_dev, RTEMS_BLKDEV_REQ_SYNC, NULL);
2614      /* How should the error be handled ? */
2615    }
2616  }
2617}
2618
2619/**
2620 * Process the modified list of buffers. There is a sync or modified list that
2621 * needs to be handled so we have a common function to do the work.
2622 *
2623 * @param dd_ptr Pointer to the device to handle. If BDBUF_INVALID_DEV no
2624 * device is selected so select the device of the first buffer to be written to
2625 * disk.
2626 * @param chain The modified chain to process.
2627 * @param transfer The chain to append buffers to be written too.
2628 * @param sync_active If true this is a sync operation so expire all timers.
2629 * @param update_timers If true update the timers.
2630 * @param timer_delta It update_timers is true update the timers by this
2631 *                    amount.
2632 */
2633static void
2634rtems_bdbuf_swapout_modified_processing (rtems_disk_device  **dd_ptr,
2635                                         rtems_chain_control* chain,
2636                                         rtems_chain_control* transfer,
2637                                         bool                 sync_active,
2638                                         bool                 update_timers,
2639                                         uint32_t             timer_delta)
2640{
2641  if (!rtems_chain_is_empty (chain))
2642  {
2643    rtems_chain_node* node = rtems_chain_head (chain);
2644    bool              sync_all;
2645
2646    node = node->next;
2647
2648    /*
2649     * A sync active with no valid dev means sync all.
2650     */
2651    if (sync_active && (*dd_ptr == BDBUF_INVALID_DEV))
2652      sync_all = true;
2653    else
2654      sync_all = false;
2655
2656    while (!rtems_chain_is_tail (chain, node))
2657    {
2658      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2659
2660      /*
2661       * Check if the buffer's hold timer has reached 0. If a sync is active
2662       * or someone waits for a buffer written force all the timers to 0.
2663       *
2664       * @note Lots of sync requests will skew this timer. It should be based
2665       *       on TOD to be accurate. Does it matter ?
2666       */
2667      if (sync_all || (sync_active && (*dd_ptr == bd->dd))
2668          || rtems_bdbuf_has_buffer_waiters ())
2669        bd->hold_timer = 0;
2670
2671      if (bd->hold_timer)
2672      {
2673        if (update_timers)
2674        {
2675          if (bd->hold_timer > timer_delta)
2676            bd->hold_timer -= timer_delta;
2677          else
2678            bd->hold_timer = 0;
2679        }
2680
2681        if (bd->hold_timer)
2682        {
2683          node = node->next;
2684          continue;
2685        }
2686      }
2687
2688      /*
2689       * This assumes we can set it to BDBUF_INVALID_DEV which is just an
2690       * assumption. Cannot use the transfer list being empty the sync dev
2691       * calls sets the dev to use.
2692       */
2693      if (*dd_ptr == BDBUF_INVALID_DEV)
2694        *dd_ptr = bd->dd;
2695
2696      if (bd->dd == *dd_ptr)
2697      {
2698        rtems_chain_node* next_node = node->next;
2699        rtems_chain_node* tnode = rtems_chain_tail (transfer);
2700
2701        /*
2702         * The blocks on the transfer list are sorted in block order. This
2703         * means multi-block transfers for drivers that require consecutive
2704         * blocks perform better with sorted blocks and for real disks it may
2705         * help lower head movement.
2706         */
2707
2708        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
2709
2710        rtems_chain_extract_unprotected (node);
2711
2712        tnode = tnode->previous;
2713
2714        while (node && !rtems_chain_is_head (transfer, tnode))
2715        {
2716          rtems_bdbuf_buffer* tbd = (rtems_bdbuf_buffer*) tnode;
2717
2718          if (bd->block > tbd->block)
2719          {
2720            rtems_chain_insert_unprotected (tnode, node);
2721            node = NULL;
2722          }
2723          else
2724            tnode = tnode->previous;
2725        }
2726
2727        if (node)
2728          rtems_chain_prepend_unprotected (transfer, node);
2729
2730        node = next_node;
2731      }
2732      else
2733      {
2734        node = node->next;
2735      }
2736    }
2737  }
2738}
2739
2740/**
2741 * Process the cache's modified buffers. Check the sync list first then the
2742 * modified list extracting the buffers suitable to be written to disk. We have
2743 * a device at a time. The task level loop will repeat this operation while
2744 * there are buffers to be written. If the transfer fails place the buffers
2745 * back on the modified list and try again later. The cache is unlocked while
2746 * the buffers are being written to disk.
2747 *
2748 * @param timer_delta It update_timers is true update the timers by this
2749 *                    amount.
2750 * @param update_timers If true update the timers.
2751 * @param transfer The transfer transaction data.
2752 *
2753 * @retval true Buffers where written to disk so scan again.
2754 * @retval false No buffers where written to disk.
2755 */
2756static bool
2757rtems_bdbuf_swapout_processing (unsigned long                 timer_delta,
2758                                bool                          update_timers,
2759                                rtems_bdbuf_swapout_transfer* transfer)
2760{
2761  rtems_bdbuf_swapout_worker* worker;
2762  bool                        transfered_buffers = false;
2763
2764  rtems_bdbuf_lock_cache ();
2765
2766  /*
2767   * If a sync is active do not use a worker because the current code does not
2768   * cleaning up after. We need to know the buffers have been written when
2769   * syncing to release sync lock and currently worker threads do not return to
2770   * here. We do not know the worker is the last in a sequence of sync writes
2771   * until after we have it running so we do not know to tell it to release the
2772   * lock. The simplest solution is to get the main swap out task perform all
2773   * sync operations.
2774   */
2775  if (bdbuf_cache.sync_active)
2776    worker = NULL;
2777  else
2778  {
2779    worker = (rtems_bdbuf_swapout_worker*)
2780      rtems_chain_get_unprotected (&bdbuf_cache.swapout_free_workers);
2781    if (worker)
2782      transfer = &worker->transfer;
2783  }
2784
2785  rtems_chain_initialize_empty (&transfer->bds);
2786  transfer->dd = BDBUF_INVALID_DEV;
2787  transfer->syncing = bdbuf_cache.sync_active;
2788
2789  /*
2790   * When the sync is for a device limit the sync to that device. If the sync
2791   * is for a buffer handle process the devices in the order on the sync
2792   * list. This means the dev is BDBUF_INVALID_DEV.
2793   */
2794  if (bdbuf_cache.sync_active)
2795    transfer->dd = bdbuf_cache.sync_device;
2796
2797  /*
2798   * If we have any buffers in the sync queue move them to the modified
2799   * list. The first sync buffer will select the device we use.
2800   */
2801  rtems_bdbuf_swapout_modified_processing (&transfer->dd,
2802                                           &bdbuf_cache.sync,
2803                                           &transfer->bds,
2804                                           true, false,
2805                                           timer_delta);
2806
2807  /*
2808   * Process the cache's modified list.
2809   */
2810  rtems_bdbuf_swapout_modified_processing (&transfer->dd,
2811                                           &bdbuf_cache.modified,
2812                                           &transfer->bds,
2813                                           bdbuf_cache.sync_active,
2814                                           update_timers,
2815                                           timer_delta);
2816
2817  /*
2818   * We have all the buffers that have been modified for this device so the
2819   * cache can be unlocked because the state of each buffer has been set to
2820   * TRANSFER.
2821   */
2822  rtems_bdbuf_unlock_cache ();
2823
2824  /*
2825   * If there are buffers to transfer to the media transfer them.
2826   */
2827  if (!rtems_chain_is_empty (&transfer->bds))
2828  {
2829    if (worker)
2830    {
2831      rtems_status_code sc = rtems_event_send (worker->id,
2832                                               RTEMS_BDBUF_SWAPOUT_SYNC);
2833      if (sc != RTEMS_SUCCESSFUL)
2834        rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_SO_WAKE_2);
2835    }
2836    else
2837    {
2838      rtems_bdbuf_swapout_write (transfer);
2839    }
2840
2841    transfered_buffers = true;
2842  }
2843
2844  if (bdbuf_cache.sync_active && !transfered_buffers)
2845  {
2846    rtems_id sync_requester;
2847    rtems_bdbuf_lock_cache ();
2848    sync_requester = bdbuf_cache.sync_requester;
2849    bdbuf_cache.sync_active = false;
2850    bdbuf_cache.sync_requester = 0;
2851    rtems_bdbuf_unlock_cache ();
2852    if (sync_requester)
2853      rtems_event_transient_send (sync_requester);
2854  }
2855
2856  return transfered_buffers;
2857}
2858
2859/**
2860 * The swapout worker thread body.
2861 *
2862 * @param arg A pointer to the worker thread's private data.
2863 * @return rtems_task Not used.
2864 */
2865static rtems_task
2866rtems_bdbuf_swapout_worker_task (rtems_task_argument arg)
2867{
2868  rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) arg;
2869
2870  while (worker->enabled)
2871  {
2872    rtems_bdbuf_wait_for_event (RTEMS_BDBUF_SWAPOUT_SYNC);
2873
2874    rtems_bdbuf_swapout_write (&worker->transfer);
2875
2876    rtems_bdbuf_lock_cache ();
2877
2878    rtems_chain_initialize_empty (&worker->transfer.bds);
2879    worker->transfer.dd = BDBUF_INVALID_DEV;
2880
2881    rtems_chain_append_unprotected (&bdbuf_cache.swapout_free_workers, &worker->link);
2882
2883    rtems_bdbuf_unlock_cache ();
2884  }
2885
2886  free (worker);
2887
2888  rtems_task_delete (RTEMS_SELF);
2889}
2890
2891/**
2892 * Close the swapout worker threads.
2893 */
2894static void
2895rtems_bdbuf_swapout_workers_close (void)
2896{
2897  rtems_chain_node* node;
2898
2899  rtems_bdbuf_lock_cache ();
2900
2901  node = rtems_chain_first (&bdbuf_cache.swapout_free_workers);
2902  while (!rtems_chain_is_tail (&bdbuf_cache.swapout_free_workers, node))
2903  {
2904    rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) node;
2905    worker->enabled = false;
2906    rtems_event_send (worker->id, RTEMS_BDBUF_SWAPOUT_SYNC);
2907    node = rtems_chain_next (node);
2908  }
2909
2910  rtems_bdbuf_unlock_cache ();
2911}
2912
2913/**
2914 * Body of task which takes care on flushing modified buffers to the disk.
2915 *
2916 * @param arg A pointer to the global cache data. Use the global variable and
2917 *            not this.
2918 * @return rtems_task Not used.
2919 */
2920static rtems_task
2921rtems_bdbuf_swapout_task (rtems_task_argument arg)
2922{
2923  rtems_bdbuf_swapout_transfer* transfer = (rtems_bdbuf_swapout_transfer *) arg;
2924  uint32_t                      period_in_ticks;
2925  const uint32_t                period_in_msecs = bdbuf_config.swapout_period;
2926  uint32_t                      timer_delta;
2927
2928  /*
2929   * Localise the period.
2930   */
2931  period_in_ticks = RTEMS_MICROSECONDS_TO_TICKS (period_in_msecs * 1000);
2932
2933  /*
2934   * This is temporary. Needs to be changed to use the real time clock.
2935   */
2936  timer_delta = period_in_msecs;
2937
2938  while (bdbuf_cache.swapout_enabled)
2939  {
2940    rtems_event_set   out;
2941    rtems_status_code sc;
2942
2943    /*
2944     * Only update the timers once in the processing cycle.
2945     */
2946    bool update_timers = true;
2947
2948    /*
2949     * If we write buffers to any disk perform a check again. We only write a
2950     * single device at a time and the cache may have more than one device's
2951     * buffers modified waiting to be written.
2952     */
2953    bool transfered_buffers;
2954
2955    do
2956    {
2957      transfered_buffers = false;
2958
2959      /*
2960       * Extact all the buffers we find for a specific device. The device is
2961       * the first one we find on a modified list. Process the sync queue of
2962       * buffers first.
2963       */
2964      if (rtems_bdbuf_swapout_processing (timer_delta,
2965                                          update_timers,
2966                                          transfer))
2967      {
2968        transfered_buffers = true;
2969      }
2970
2971      /*
2972       * Only update the timers once.
2973       */
2974      update_timers = false;
2975    }
2976    while (transfered_buffers);
2977
2978    sc = rtems_event_receive (RTEMS_BDBUF_SWAPOUT_SYNC,
2979                              RTEMS_EVENT_ALL | RTEMS_WAIT,
2980                              period_in_ticks,
2981                              &out);
2982
2983    if ((sc != RTEMS_SUCCESSFUL) && (sc != RTEMS_TIMEOUT))
2984      rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_SWAPOUT_RE);
2985  }
2986
2987  rtems_bdbuf_swapout_workers_close ();
2988
2989  free (transfer);
2990
2991  rtems_task_delete (RTEMS_SELF);
2992}
2993
2994static void
2995rtems_bdbuf_purge_list (rtems_chain_control *purge_list)
2996{
2997  bool wake_buffer_waiters = false;
2998  rtems_chain_node *node = NULL;
2999
3000  while ((node = rtems_chain_get_unprotected (purge_list)) != NULL)
3001  {
3002    rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
3003
3004    if (bd->waiters == 0)
3005      wake_buffer_waiters = true;
3006
3007    rtems_bdbuf_discard_buffer (bd);
3008  }
3009
3010  if (wake_buffer_waiters)
3011    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
3012}
3013
3014static void
3015rtems_bdbuf_gather_for_purge (rtems_chain_control *purge_list,
3016                              const rtems_disk_device *dd)
3017{
3018  rtems_bdbuf_buffer *stack [RTEMS_BDBUF_AVL_MAX_HEIGHT];
3019  rtems_bdbuf_buffer **prev = stack;
3020  rtems_bdbuf_buffer *cur = bdbuf_cache.tree;
3021
3022  *prev = NULL;
3023
3024  while (cur != NULL)
3025  {
3026    if (cur->dd == dd)
3027    {
3028      switch (cur->state)
3029      {
3030        case RTEMS_BDBUF_STATE_FREE:
3031        case RTEMS_BDBUF_STATE_EMPTY:
3032        case RTEMS_BDBUF_STATE_ACCESS_PURGED:
3033        case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
3034          break;
3035        case RTEMS_BDBUF_STATE_SYNC:
3036          rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
3037          /* Fall through */
3038        case RTEMS_BDBUF_STATE_MODIFIED:
3039          rtems_bdbuf_group_release (cur);
3040          /* Fall through */
3041        case RTEMS_BDBUF_STATE_CACHED:
3042          rtems_chain_extract_unprotected (&cur->link);
3043          rtems_chain_append_unprotected (purge_list, &cur->link);
3044          break;
3045        case RTEMS_BDBUF_STATE_TRANSFER:
3046          rtems_bdbuf_set_state (cur, RTEMS_BDBUF_STATE_TRANSFER_PURGED);
3047          break;
3048        case RTEMS_BDBUF_STATE_ACCESS_CACHED:
3049        case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
3050        case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
3051          rtems_bdbuf_set_state (cur, RTEMS_BDBUF_STATE_ACCESS_PURGED);
3052          break;
3053        default:
3054          rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_STATE_11);
3055      }
3056    }
3057
3058    if (cur->avl.left != NULL)
3059    {
3060      /* Left */
3061      ++prev;
3062      *prev = cur;
3063      cur = cur->avl.left;
3064    }
3065    else if (cur->avl.right != NULL)
3066    {
3067      /* Right */
3068      ++prev;
3069      *prev = cur;
3070      cur = cur->avl.right;
3071    }
3072    else
3073    {
3074      while (*prev != NULL
3075             && (cur == (*prev)->avl.right || (*prev)->avl.right == NULL))
3076      {
3077        /* Up */
3078        cur = *prev;
3079        --prev;
3080      }
3081      if (*prev != NULL)
3082        /* Right */
3083        cur = (*prev)->avl.right;
3084      else
3085        /* Finished */
3086        cur = NULL;
3087    }
3088  }
3089}
3090
3091static void
3092rtems_bdbuf_do_purge_dev (rtems_disk_device *dd)
3093{
3094  rtems_chain_control purge_list;
3095
3096  rtems_chain_initialize_empty (&purge_list);
3097  rtems_bdbuf_read_ahead_reset (dd);
3098  rtems_bdbuf_gather_for_purge (&purge_list, dd);
3099  rtems_bdbuf_purge_list (&purge_list);
3100}
3101
3102void
3103rtems_bdbuf_purge_dev (rtems_disk_device *dd)
3104{
3105  rtems_bdbuf_lock_cache ();
3106  rtems_bdbuf_do_purge_dev (dd);
3107  rtems_bdbuf_unlock_cache ();
3108}
3109
3110rtems_status_code
3111rtems_bdbuf_set_block_size (rtems_disk_device *dd,
3112                            uint32_t           block_size,
3113                            bool               sync)
3114{
3115  rtems_status_code sc = RTEMS_SUCCESSFUL;
3116
3117  /*
3118   * We do not care about the synchronization status since we will purge the
3119   * device later.
3120   */
3121  if (sync)
3122    rtems_bdbuf_syncdev (dd);
3123
3124  rtems_bdbuf_lock_cache ();
3125
3126  if (block_size > 0)
3127  {
3128    size_t bds_per_group = rtems_bdbuf_bds_per_group (block_size);
3129
3130    if (bds_per_group != 0)
3131    {
3132      int block_to_media_block_shift = 0;
3133      uint32_t media_blocks_per_block = block_size / dd->media_block_size;
3134      uint32_t one = 1;
3135
3136      while ((one << block_to_media_block_shift) < media_blocks_per_block)
3137      {
3138        ++block_to_media_block_shift;
3139      }
3140
3141      if ((dd->media_block_size << block_to_media_block_shift) != block_size)
3142        block_to_media_block_shift = -1;
3143
3144      dd->block_size = block_size;
3145      dd->block_count = dd->size / media_blocks_per_block;
3146      dd->media_blocks_per_block = media_blocks_per_block;
3147      dd->block_to_media_block_shift = block_to_media_block_shift;
3148      dd->bds_per_group = bds_per_group;
3149
3150      rtems_bdbuf_do_purge_dev (dd);
3151    }
3152    else
3153    {
3154      sc = RTEMS_INVALID_NUMBER;
3155    }
3156  }
3157  else
3158  {
3159    sc = RTEMS_INVALID_NUMBER;
3160  }
3161
3162  rtems_bdbuf_unlock_cache ();
3163
3164  return sc;
3165}
3166
3167static rtems_task
3168rtems_bdbuf_read_ahead_task (rtems_task_argument arg)
3169{
3170  rtems_chain_control *chain = &bdbuf_cache.read_ahead_chain;
3171
3172  while (bdbuf_cache.read_ahead_enabled)
3173  {
3174    rtems_chain_node *node;
3175
3176    rtems_bdbuf_wait_for_event (RTEMS_BDBUF_READ_AHEAD_WAKE_UP);
3177    rtems_bdbuf_lock_cache ();
3178
3179    while ((node = rtems_chain_get_unprotected (chain)) != NULL)
3180    {
3181      rtems_disk_device *dd =
3182        RTEMS_CONTAINER_OF (node, rtems_disk_device, read_ahead.node);
3183      rtems_blkdev_bnum block = dd->read_ahead.next;
3184      rtems_blkdev_bnum media_block = 0;
3185      rtems_status_code sc =
3186        rtems_bdbuf_get_media_block (dd, block, &media_block);
3187
3188      rtems_chain_set_off_chain (&dd->read_ahead.node);
3189
3190      if (sc == RTEMS_SUCCESSFUL)
3191      {
3192        rtems_bdbuf_buffer *bd =
3193          rtems_bdbuf_get_buffer_for_read_ahead (dd, media_block);
3194
3195        if (bd != NULL)
3196        {
3197          uint32_t transfer_count = dd->block_count - block;
3198          uint32_t max_transfer_count = bdbuf_config.max_read_ahead_blocks;
3199
3200          if (transfer_count >= max_transfer_count)
3201          {
3202            transfer_count = max_transfer_count;
3203            dd->read_ahead.trigger = block + transfer_count / 2;
3204            dd->read_ahead.next = block + transfer_count;
3205          }
3206          else
3207          {
3208            dd->read_ahead.trigger = RTEMS_DISK_READ_AHEAD_NO_TRIGGER;
3209          }
3210
3211          ++dd->stats.read_ahead_transfers;
3212          rtems_bdbuf_execute_read_request (dd, bd, transfer_count);
3213        }
3214      }
3215      else
3216      {
3217        dd->read_ahead.trigger = RTEMS_DISK_READ_AHEAD_NO_TRIGGER;
3218      }
3219    }
3220
3221    rtems_bdbuf_unlock_cache ();
3222  }
3223
3224  rtems_task_delete (RTEMS_SELF);
3225}
3226
3227void rtems_bdbuf_get_device_stats (const rtems_disk_device *dd,
3228                                   rtems_blkdev_stats      *stats)
3229{
3230  rtems_bdbuf_lock_cache ();
3231  *stats = dd->stats;
3232  rtems_bdbuf_unlock_cache ();
3233}
3234
3235void rtems_bdbuf_reset_device_stats (rtems_disk_device *dd)
3236{
3237  rtems_bdbuf_lock_cache ();
3238  memset (&dd->stats, 0, sizeof(dd->stats));
3239  rtems_bdbuf_unlock_cache ();
3240}
Note: See TracBrowser for help on using the repository browser.