source: rtems/cpukit/libblock/src/bdbuf.c @ 865c307

4.115
Last change on this file since 865c307 was 865c307, checked in by Ric Claus <claus@…>, on 10/17/13 at 02:15:18

Remove double count of start offset

  • Property mode set to 100644
File size: 83.6 KB
Line 
1/**
2 * @file
3 *
4 * @ingroup rtems_bdbuf
5 *
6 * Block device buffer management.
7 */
8
9/*
10 * Disk I/O buffering
11 * Buffer managment
12 *
13 * Copyright (C) 2001 OKTET Ltd., St.-Peterburg, Russia
14 * Author: Andrey G. Ivanov <Andrey.Ivanov@oktet.ru>
15 *         Victor V. Vengerov <vvv@oktet.ru>
16 *         Alexander Kukuta <kam@oktet.ru>
17 *
18 * Copyright (C) 2008,2009 Chris Johns <chrisj@rtems.org>
19 *    Rewritten to remove score mutex access. Fixes many performance
20 *    issues.
21 *
22 * Copyright (c) 2009-2012 embedded brains GmbH.
23 */
24
25/**
26 * Set to 1 to enable debug tracing.
27 */
28#define RTEMS_BDBUF_TRACE 0
29
30#if HAVE_CONFIG_H
31#include "config.h"
32#endif
33#include <limits.h>
34#include <errno.h>
35#include <stdio.h>
36#include <string.h>
37#include <inttypes.h>
38
39#include <rtems.h>
40#include <rtems/error.h>
41#include <rtems/malloc.h>
42
43#include "rtems/bdbuf.h"
44
45#define BDBUF_INVALID_DEV NULL
46
47/*
48 * Simpler label for this file.
49 */
50#define bdbuf_config rtems_bdbuf_configuration
51
52/**
53 * A swapout transfer transaction data. This data is passed to a worked thread
54 * to handle the write phase of the transfer.
55 */
56typedef struct rtems_bdbuf_swapout_transfer
57{
58  rtems_chain_control   bds;         /**< The transfer list of BDs. */
59  rtems_disk_device    *dd;          /**< The device the transfer is for. */
60  bool                  syncing;     /**< The data is a sync'ing. */
61  rtems_blkdev_request  write_req;   /**< The write request. */
62} rtems_bdbuf_swapout_transfer;
63
64/**
65 * Swapout worker thread. These are available to take processing from the
66 * main swapout thread and handle the I/O operation.
67 */
68typedef struct rtems_bdbuf_swapout_worker
69{
70  rtems_chain_node             link;     /**< The threads sit on a chain when
71                                          * idle. */
72  rtems_id                     id;       /**< The id of the task so we can wake
73                                          * it. */
74  bool                         enabled;  /**< The worker is enabled. */
75  rtems_bdbuf_swapout_transfer transfer; /**< The transfer data for this
76                                          * thread. */
77} rtems_bdbuf_swapout_worker;
78
79/**
80 * Buffer waiters synchronization.
81 */
82typedef struct rtems_bdbuf_waiters {
83  unsigned count;
84  rtems_id sema;
85} rtems_bdbuf_waiters;
86
87/**
88 * The BD buffer cache.
89 */
90typedef struct rtems_bdbuf_cache
91{
92  rtems_id            swapout;           /**< Swapout task ID */
93  bool                swapout_enabled;   /**< Swapout is only running if
94                                          * enabled. Set to false to kill the
95                                          * swap out task. It deletes itself. */
96  rtems_chain_control swapout_free_workers; /**< The work threads for the swapout
97                                             * task. */
98
99  rtems_bdbuf_buffer* bds;               /**< Pointer to table of buffer
100                                          * descriptors. */
101  void*               buffers;           /**< The buffer's memory. */
102  size_t              buffer_min_count;  /**< Number of minimum size buffers
103                                          * that fit the buffer memory. */
104  size_t              max_bds_per_group; /**< The number of BDs of minimum
105                                          * buffer size that fit in a group. */
106  uint32_t            flags;             /**< Configuration flags. */
107
108  rtems_id            lock;              /**< The cache lock. It locks all
109                                          * cache data, BD and lists. */
110  rtems_id            sync_lock;         /**< Sync calls block writes. */
111  bool                sync_active;       /**< True if a sync is active. */
112  rtems_id            sync_requester;    /**< The sync requester. */
113  rtems_disk_device  *sync_device;       /**< The device to sync and
114                                          * BDBUF_INVALID_DEV not a device
115                                          * sync. */
116
117  rtems_bdbuf_buffer* tree;              /**< Buffer descriptor lookup AVL tree
118                                          * root. There is only one. */
119  rtems_chain_control lru;               /**< Least recently used list */
120  rtems_chain_control modified;          /**< Modified buffers list */
121  rtems_chain_control sync;              /**< Buffers to sync list */
122
123  rtems_bdbuf_waiters access_waiters;    /**< Wait for a buffer in
124                                          * ACCESS_CACHED, ACCESS_MODIFIED or
125                                          * ACCESS_EMPTY
126                                          * state. */
127  rtems_bdbuf_waiters transfer_waiters;  /**< Wait for a buffer in TRANSFER
128                                          * state. */
129  rtems_bdbuf_waiters buffer_waiters;    /**< Wait for a buffer and no one is
130                                          * available. */
131
132  rtems_bdbuf_swapout_transfer *swapout_transfer;
133  rtems_bdbuf_swapout_worker *swapout_workers;
134
135  size_t              group_count;       /**< The number of groups. */
136  rtems_bdbuf_group*  groups;            /**< The groups. */
137  rtems_id            read_ahead_task;   /**< Read-ahead task */
138  rtems_chain_control read_ahead_chain;  /**< Read-ahead request chain */
139  bool                read_ahead_enabled; /**< Read-ahead enabled */
140
141  bool                initialised;       /**< Initialised state. */
142} rtems_bdbuf_cache;
143
144typedef enum {
145  RTEMS_BDBUF_FATAL_CACHE_LOCK,
146  RTEMS_BDBUF_FATAL_CACHE_UNLOCK,
147  RTEMS_BDBUF_FATAL_CACHE_WAIT_2,
148  RTEMS_BDBUF_FATAL_CACHE_WAIT_TO,
149  RTEMS_BDBUF_FATAL_CACHE_WAKE,
150  RTEMS_BDBUF_FATAL_PREEMPT_DIS,
151  RTEMS_BDBUF_FATAL_PREEMPT_RST,
152  RTEMS_BDBUF_FATAL_RA_WAKE_UP,
153  RTEMS_BDBUF_FATAL_RECYCLE,
154  RTEMS_BDBUF_FATAL_SO_WAKE_1,
155  RTEMS_BDBUF_FATAL_SO_WAKE_2,
156  RTEMS_BDBUF_FATAL_STATE_0,
157  RTEMS_BDBUF_FATAL_STATE_2,
158  RTEMS_BDBUF_FATAL_STATE_4,
159  RTEMS_BDBUF_FATAL_STATE_5,
160  RTEMS_BDBUF_FATAL_STATE_6,
161  RTEMS_BDBUF_FATAL_STATE_7,
162  RTEMS_BDBUF_FATAL_STATE_8,
163  RTEMS_BDBUF_FATAL_STATE_9,
164  RTEMS_BDBUF_FATAL_STATE_10,
165  RTEMS_BDBUF_FATAL_STATE_11,
166  RTEMS_BDBUF_FATAL_SWAPOUT_RE,
167  RTEMS_BDBUF_FATAL_SYNC_LOCK,
168  RTEMS_BDBUF_FATAL_SYNC_UNLOCK,
169  RTEMS_BDBUF_FATAL_TREE_RM,
170  RTEMS_BDBUF_FATAL_WAIT_EVNT,
171  RTEMS_BDBUF_FATAL_WAIT_TRANS_EVNT
172} rtems_bdbuf_fatal_code;
173
174/**
175 * The events used in this code. These should be system events rather than
176 * application events.
177 */
178#define RTEMS_BDBUF_SWAPOUT_SYNC   RTEMS_EVENT_2
179#define RTEMS_BDBUF_READ_AHEAD_WAKE_UP RTEMS_EVENT_1
180
181/**
182 * Lock semaphore attributes. This is used for locking type mutexes.
183 *
184 * @warning Priority inheritance is on.
185 */
186#define RTEMS_BDBUF_CACHE_LOCK_ATTRIBS \
187  (RTEMS_PRIORITY | RTEMS_BINARY_SEMAPHORE | \
188   RTEMS_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
189
190/**
191 * Waiter semaphore attributes.
192 *
193 * @warning Do not configure as inherit priority. If a driver is in the driver
194 *          initialisation table this locked semaphore will have the IDLE task
195 *          as the holder and a blocking task will raise the priority of the
196 *          IDLE task which can cause unsual side effects.
197 */
198#define RTEMS_BDBUF_CACHE_WAITER_ATTRIBS \
199  (RTEMS_PRIORITY | RTEMS_SIMPLE_BINARY_SEMAPHORE | \
200   RTEMS_NO_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
201
202/**
203 * Waiter timeout. Set to non-zero to find some info on a waiter that is
204 * waiting too long.
205 */
206#define RTEMS_BDBUF_WAIT_TIMEOUT RTEMS_NO_TIMEOUT
207#if !defined (RTEMS_BDBUF_WAIT_TIMEOUT)
208#define RTEMS_BDBUF_WAIT_TIMEOUT \
209  (RTEMS_MICROSECONDS_TO_TICKS (20000000))
210#endif
211
212static rtems_task rtems_bdbuf_swapout_task(rtems_task_argument arg);
213
214static rtems_task rtems_bdbuf_read_ahead_task(rtems_task_argument arg);
215
216/**
217 * The Buffer Descriptor cache.
218 */
219static rtems_bdbuf_cache bdbuf_cache;
220
221#if RTEMS_BDBUF_TRACE
222/**
223 * If true output the trace message.
224 */
225bool rtems_bdbuf_tracer;
226
227/**
228 * Return the number of items on the list.
229 *
230 * @param list The chain control.
231 * @return uint32_t The number of items on the list.
232 */
233uint32_t
234rtems_bdbuf_list_count (rtems_chain_control* list)
235{
236  rtems_chain_node* node = rtems_chain_first (list);
237  uint32_t          count = 0;
238  while (!rtems_chain_is_tail (list, node))
239  {
240    count++;
241    node = rtems_chain_next (node);
242  }
243  return count;
244}
245
246/**
247 * Show the usage for the bdbuf cache.
248 */
249void
250rtems_bdbuf_show_usage (void)
251{
252  uint32_t group;
253  uint32_t total = 0;
254  uint32_t val;
255
256  for (group = 0; group < bdbuf_cache.group_count; group++)
257    total += bdbuf_cache.groups[group].users;
258  printf ("bdbuf:group users=%lu", total);
259  val = rtems_bdbuf_list_count (&bdbuf_cache.lru);
260  printf (", lru=%lu", val);
261  total = val;
262  val = rtems_bdbuf_list_count (&bdbuf_cache.modified);
263  printf (", mod=%lu", val);
264  total += val;
265  val = rtems_bdbuf_list_count (&bdbuf_cache.sync);
266  printf (", sync=%lu", val);
267  total += val;
268  printf (", total=%lu\n", total);
269}
270
271/**
272 * Show the users for a group of a bd.
273 *
274 * @param where A label to show the context of output.
275 * @param bd The bd to show the users of.
276 */
277void
278rtems_bdbuf_show_users (const char* where, rtems_bdbuf_buffer* bd)
279{
280  const char* states[] =
281    { "FR", "EM", "CH", "AC", "AM", "AE", "AP", "MD", "SY", "TR", "TP" };
282
283  printf ("bdbuf:users: %15s: [%" PRIu32 " (%s)] %td:%td = %" PRIu32 " %s\n",
284          where,
285          bd->block, states[bd->state],
286          bd->group - bdbuf_cache.groups,
287          bd - bdbuf_cache.bds,
288          bd->group->users,
289          bd->group->users > 8 ? "<<<<<<<" : "");
290}
291#else
292#define rtems_bdbuf_tracer (0)
293#define rtems_bdbuf_show_usage() ((void) 0)
294#define rtems_bdbuf_show_users(_w, _b) ((void) 0)
295#endif
296
297/**
298 * The default maximum height of 32 allows for AVL trees having between
299 * 5,704,880 and 4,294,967,295 nodes, depending on order of insertion.  You may
300 * change this compile-time constant as you wish.
301 */
302#ifndef RTEMS_BDBUF_AVL_MAX_HEIGHT
303#define RTEMS_BDBUF_AVL_MAX_HEIGHT (32)
304#endif
305
306static void
307rtems_bdbuf_fatal (rtems_fatal_code error)
308{
309  rtems_fatal (RTEMS_FATAL_SOURCE_BDBUF, error);
310}
311
312static void
313rtems_bdbuf_fatal_with_state (rtems_bdbuf_buf_state state,
314                              rtems_bdbuf_fatal_code error)
315{
316  rtems_bdbuf_fatal ((((uint32_t) state) << 16) | error);
317}
318
319/**
320 * Searches for the node with specified dd/block.
321 *
322 * @param root pointer to the root node of the AVL-Tree
323 * @param dd disk device search key
324 * @param block block search key
325 * @retval NULL node with the specified dd/block is not found
326 * @return pointer to the node with specified dd/block
327 */
328static rtems_bdbuf_buffer *
329rtems_bdbuf_avl_search (rtems_bdbuf_buffer** root,
330                        const rtems_disk_device *dd,
331                        rtems_blkdev_bnum    block)
332{
333  rtems_bdbuf_buffer* p = *root;
334
335  while ((p != NULL) && ((p->dd != dd) || (p->block != block)))
336  {
337    if (((uintptr_t) p->dd < (uintptr_t) dd)
338        || ((p->dd == dd) && (p->block < block)))
339    {
340      p = p->avl.right;
341    }
342    else
343    {
344      p = p->avl.left;
345    }
346  }
347
348  return p;
349}
350
351/**
352 * Inserts the specified node to the AVl-Tree.
353 *
354 * @param root pointer to the root node of the AVL-Tree
355 * @param node Pointer to the node to add.
356 * @retval 0 The node added successfully
357 * @retval -1 An error occured
358 */
359static int
360rtems_bdbuf_avl_insert(rtems_bdbuf_buffer** root,
361                       rtems_bdbuf_buffer*  node)
362{
363  const rtems_disk_device *dd = node->dd;
364  rtems_blkdev_bnum block = node->block;
365
366  rtems_bdbuf_buffer*  p = *root;
367  rtems_bdbuf_buffer*  q;
368  rtems_bdbuf_buffer*  p1;
369  rtems_bdbuf_buffer*  p2;
370  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
371  rtems_bdbuf_buffer** buf_prev = buf_stack;
372
373  bool modified = false;
374
375  if (p == NULL)
376  {
377    *root = node;
378    node->avl.left = NULL;
379    node->avl.right = NULL;
380    node->avl.bal = 0;
381    return 0;
382  }
383
384  while (p != NULL)
385  {
386    *buf_prev++ = p;
387
388    if (((uintptr_t) p->dd < (uintptr_t) dd)
389        || ((p->dd == dd) && (p->block < block)))
390    {
391      p->avl.cache = 1;
392      q = p->avl.right;
393      if (q == NULL)
394      {
395        q = node;
396        p->avl.right = q = node;
397        break;
398      }
399    }
400    else if ((p->dd != dd) || (p->block != block))
401    {
402      p->avl.cache = -1;
403      q = p->avl.left;
404      if (q == NULL)
405      {
406        q = node;
407        p->avl.left = q;
408        break;
409      }
410    }
411    else
412    {
413      return -1;
414    }
415
416    p = q;
417  }
418
419  q->avl.left = q->avl.right = NULL;
420  q->avl.bal = 0;
421  modified = true;
422  buf_prev--;
423
424  while (modified)
425  {
426    if (p->avl.cache == -1)
427    {
428      switch (p->avl.bal)
429      {
430        case 1:
431          p->avl.bal = 0;
432          modified = false;
433          break;
434
435        case 0:
436          p->avl.bal = -1;
437          break;
438
439        case -1:
440          p1 = p->avl.left;
441          if (p1->avl.bal == -1) /* simple LL-turn */
442          {
443            p->avl.left = p1->avl.right;
444            p1->avl.right = p;
445            p->avl.bal = 0;
446            p = p1;
447          }
448          else /* double LR-turn */
449          {
450            p2 = p1->avl.right;
451            p1->avl.right = p2->avl.left;
452            p2->avl.left = p1;
453            p->avl.left = p2->avl.right;
454            p2->avl.right = p;
455            if (p2->avl.bal == -1) p->avl.bal = +1; else p->avl.bal = 0;
456            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
457            p = p2;
458          }
459          p->avl.bal = 0;
460          modified = false;
461          break;
462
463        default:
464          break;
465      }
466    }
467    else
468    {
469      switch (p->avl.bal)
470      {
471        case -1:
472          p->avl.bal = 0;
473          modified = false;
474          break;
475
476        case 0:
477          p->avl.bal = 1;
478          break;
479
480        case 1:
481          p1 = p->avl.right;
482          if (p1->avl.bal == 1) /* simple RR-turn */
483          {
484            p->avl.right = p1->avl.left;
485            p1->avl.left = p;
486            p->avl.bal = 0;
487            p = p1;
488          }
489          else /* double RL-turn */
490          {
491            p2 = p1->avl.left;
492            p1->avl.left = p2->avl.right;
493            p2->avl.right = p1;
494            p->avl.right = p2->avl.left;
495            p2->avl.left = p;
496            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
497            if (p2->avl.bal == -1) p1->avl.bal = +1; else p1->avl.bal = 0;
498            p = p2;
499          }
500          p->avl.bal = 0;
501          modified = false;
502          break;
503
504        default:
505          break;
506      }
507    }
508    q = p;
509    if (buf_prev > buf_stack)
510    {
511      p = *--buf_prev;
512
513      if (p->avl.cache == -1)
514      {
515        p->avl.left = q;
516      }
517      else
518      {
519        p->avl.right = q;
520      }
521    }
522    else
523    {
524      *root = p;
525      break;
526    }
527  };
528
529  return 0;
530}
531
532
533/**
534 * Removes the node from the tree.
535 *
536 * @param root Pointer to pointer to the root node
537 * @param node Pointer to the node to remove
538 * @retval 0 Item removed
539 * @retval -1 No such item found
540 */
541static int
542rtems_bdbuf_avl_remove(rtems_bdbuf_buffer**      root,
543                       const rtems_bdbuf_buffer* node)
544{
545  const rtems_disk_device *dd = node->dd;
546  rtems_blkdev_bnum block = node->block;
547
548  rtems_bdbuf_buffer*  p = *root;
549  rtems_bdbuf_buffer*  q;
550  rtems_bdbuf_buffer*  r;
551  rtems_bdbuf_buffer*  s;
552  rtems_bdbuf_buffer*  p1;
553  rtems_bdbuf_buffer*  p2;
554  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
555  rtems_bdbuf_buffer** buf_prev = buf_stack;
556
557  bool modified = false;
558
559  memset (buf_stack, 0, sizeof(buf_stack));
560
561  while (p != NULL)
562  {
563    *buf_prev++ = p;
564
565    if (((uintptr_t) p->dd < (uintptr_t) dd)
566        || ((p->dd == dd) && (p->block < block)))
567    {
568      p->avl.cache = 1;
569      p = p->avl.right;
570    }
571    else if ((p->dd != dd) || (p->block != block))
572    {
573      p->avl.cache = -1;
574      p = p->avl.left;
575    }
576    else
577    {
578      /* node found */
579      break;
580    }
581  }
582
583  if (p == NULL)
584  {
585    /* there is no such node */
586    return -1;
587  }
588
589  q = p;
590
591  buf_prev--;
592  if (buf_prev > buf_stack)
593  {
594    p = *(buf_prev - 1);
595  }
596  else
597  {
598    p = NULL;
599  }
600
601  /* at this moment q - is a node to delete, p is q's parent */
602  if (q->avl.right == NULL)
603  {
604    r = q->avl.left;
605    if (r != NULL)
606    {
607      r->avl.bal = 0;
608    }
609    q = r;
610  }
611  else
612  {
613    rtems_bdbuf_buffer **t;
614
615    r = q->avl.right;
616
617    if (r->avl.left == NULL)
618    {
619      r->avl.left = q->avl.left;
620      r->avl.bal = q->avl.bal;
621      r->avl.cache = 1;
622      *buf_prev++ = q = r;
623    }
624    else
625    {
626      t = buf_prev++;
627      s = r;
628
629      while (s->avl.left != NULL)
630      {
631        *buf_prev++ = r = s;
632        s = r->avl.left;
633        r->avl.cache = -1;
634      }
635
636      s->avl.left = q->avl.left;
637      r->avl.left = s->avl.right;
638      s->avl.right = q->avl.right;
639      s->avl.bal = q->avl.bal;
640      s->avl.cache = 1;
641
642      *t = q = s;
643    }
644  }
645
646  if (p != NULL)
647  {
648    if (p->avl.cache == -1)
649    {
650      p->avl.left = q;
651    }
652    else
653    {
654      p->avl.right = q;
655    }
656  }
657  else
658  {
659    *root = q;
660  }
661
662  modified = true;
663
664  while (modified)
665  {
666    if (buf_prev > buf_stack)
667    {
668      p = *--buf_prev;
669    }
670    else
671    {
672      break;
673    }
674
675    if (p->avl.cache == -1)
676    {
677      /* rebalance left branch */
678      switch (p->avl.bal)
679      {
680        case -1:
681          p->avl.bal = 0;
682          break;
683        case  0:
684          p->avl.bal = 1;
685          modified = false;
686          break;
687
688        case +1:
689          p1 = p->avl.right;
690
691          if (p1->avl.bal >= 0) /* simple RR-turn */
692          {
693            p->avl.right = p1->avl.left;
694            p1->avl.left = p;
695
696            if (p1->avl.bal == 0)
697            {
698              p1->avl.bal = -1;
699              modified = false;
700            }
701            else
702            {
703              p->avl.bal = 0;
704              p1->avl.bal = 0;
705            }
706            p = p1;
707          }
708          else /* double RL-turn */
709          {
710            p2 = p1->avl.left;
711
712            p1->avl.left = p2->avl.right;
713            p2->avl.right = p1;
714            p->avl.right = p2->avl.left;
715            p2->avl.left = p;
716
717            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
718            if (p2->avl.bal == -1) p1->avl.bal = 1; else p1->avl.bal = 0;
719
720            p = p2;
721            p2->avl.bal = 0;
722          }
723          break;
724
725        default:
726          break;
727      }
728    }
729    else
730    {
731      /* rebalance right branch */
732      switch (p->avl.bal)
733      {
734        case +1:
735          p->avl.bal = 0;
736          break;
737
738        case  0:
739          p->avl.bal = -1;
740          modified = false;
741          break;
742
743        case -1:
744          p1 = p->avl.left;
745
746          if (p1->avl.bal <= 0) /* simple LL-turn */
747          {
748            p->avl.left = p1->avl.right;
749            p1->avl.right = p;
750            if (p1->avl.bal == 0)
751            {
752              p1->avl.bal = 1;
753              modified = false;
754            }
755            else
756            {
757              p->avl.bal = 0;
758              p1->avl.bal = 0;
759            }
760            p = p1;
761          }
762          else /* double LR-turn */
763          {
764            p2 = p1->avl.right;
765
766            p1->avl.right = p2->avl.left;
767            p2->avl.left = p1;
768            p->avl.left = p2->avl.right;
769            p2->avl.right = p;
770
771            if (p2->avl.bal == -1) p->avl.bal = 1; else p->avl.bal = 0;
772            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
773
774            p = p2;
775            p2->avl.bal = 0;
776          }
777          break;
778
779        default:
780          break;
781      }
782    }
783
784    if (buf_prev > buf_stack)
785    {
786      q = *(buf_prev - 1);
787
788      if (q->avl.cache == -1)
789      {
790        q->avl.left = p;
791      }
792      else
793      {
794        q->avl.right = p;
795      }
796    }
797    else
798    {
799      *root = p;
800      break;
801    }
802
803  }
804
805  return 0;
806}
807
808static void
809rtems_bdbuf_set_state (rtems_bdbuf_buffer *bd, rtems_bdbuf_buf_state state)
810{
811  bd->state = state;
812}
813
814static rtems_blkdev_bnum
815rtems_bdbuf_media_block (const rtems_disk_device *dd, rtems_blkdev_bnum block)
816{
817  if (dd->block_to_media_block_shift >= 0)
818    return block << dd->block_to_media_block_shift;
819  else
820    /*
821     * Change the block number for the block size to the block number for the media
822     * block size. We have to use 64bit maths. There is no short cut here.
823     */
824    return (rtems_blkdev_bnum)
825      ((((uint64_t) block) * dd->block_size) / dd->media_block_size);
826}
827
828/**
829 * Lock the mutex. A single task can nest calls.
830 *
831 * @param lock The mutex to lock.
832 * @param fatal_error_code The error code if the call fails.
833 */
834static void
835rtems_bdbuf_lock (rtems_id lock, uint32_t fatal_error_code)
836{
837  rtems_status_code sc = rtems_semaphore_obtain (lock,
838                                                 RTEMS_WAIT,
839                                                 RTEMS_NO_TIMEOUT);
840  if (sc != RTEMS_SUCCESSFUL)
841    rtems_bdbuf_fatal (fatal_error_code);
842}
843
844/**
845 * Unlock the mutex.
846 *
847 * @param lock The mutex to unlock.
848 * @param fatal_error_code The error code if the call fails.
849 */
850static void
851rtems_bdbuf_unlock (rtems_id lock, uint32_t fatal_error_code)
852{
853  rtems_status_code sc = rtems_semaphore_release (lock);
854  if (sc != RTEMS_SUCCESSFUL)
855    rtems_bdbuf_fatal (fatal_error_code);
856}
857
858/**
859 * Lock the cache. A single task can nest calls.
860 */
861static void
862rtems_bdbuf_lock_cache (void)
863{
864  rtems_bdbuf_lock (bdbuf_cache.lock, RTEMS_BDBUF_FATAL_CACHE_LOCK);
865}
866
867/**
868 * Unlock the cache.
869 */
870static void
871rtems_bdbuf_unlock_cache (void)
872{
873  rtems_bdbuf_unlock (bdbuf_cache.lock, RTEMS_BDBUF_FATAL_CACHE_UNLOCK);
874}
875
876/**
877 * Lock the cache's sync. A single task can nest calls.
878 */
879static void
880rtems_bdbuf_lock_sync (void)
881{
882  rtems_bdbuf_lock (bdbuf_cache.sync_lock, RTEMS_BDBUF_FATAL_SYNC_LOCK);
883}
884
885/**
886 * Unlock the cache's sync lock. Any blocked writers are woken.
887 */
888static void
889rtems_bdbuf_unlock_sync (void)
890{
891  rtems_bdbuf_unlock (bdbuf_cache.sync_lock,
892                      RTEMS_BDBUF_FATAL_SYNC_UNLOCK);
893}
894
895static void
896rtems_bdbuf_group_obtain (rtems_bdbuf_buffer *bd)
897{
898  ++bd->group->users;
899}
900
901static void
902rtems_bdbuf_group_release (rtems_bdbuf_buffer *bd)
903{
904  --bd->group->users;
905}
906
907static rtems_mode
908rtems_bdbuf_disable_preemption (void)
909{
910  rtems_status_code sc = RTEMS_SUCCESSFUL;
911  rtems_mode prev_mode = 0;
912
913  sc = rtems_task_mode (RTEMS_NO_PREEMPT, RTEMS_PREEMPT_MASK, &prev_mode);
914  if (sc != RTEMS_SUCCESSFUL)
915    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_PREEMPT_DIS);
916
917  return prev_mode;
918}
919
920static void
921rtems_bdbuf_restore_preemption (rtems_mode prev_mode)
922{
923  rtems_status_code sc = RTEMS_SUCCESSFUL;
924
925  sc = rtems_task_mode (prev_mode, RTEMS_ALL_MODE_MASKS, &prev_mode);
926  if (sc != RTEMS_SUCCESSFUL)
927    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_PREEMPT_RST);
928}
929
930/**
931 * Wait until woken. Semaphores are used so a number of tasks can wait and can
932 * be woken at once. Task events would require we maintain a list of tasks to
933 * be woken and this would require storage and we do not know the number of
934 * tasks that could be waiting.
935 *
936 * While we have the cache locked we can try and claim the semaphore and
937 * therefore know when we release the lock to the cache we will block until the
938 * semaphore is released. This may even happen before we get to block.
939 *
940 * A counter is used to save the release call when no one is waiting.
941 *
942 * The function assumes the cache is locked on entry and it will be locked on
943 * exit.
944 */
945static void
946rtems_bdbuf_anonymous_wait (rtems_bdbuf_waiters *waiters)
947{
948  rtems_status_code sc;
949  rtems_mode        prev_mode;
950
951  /*
952   * Indicate we are waiting.
953   */
954  ++waiters->count;
955
956  /*
957   * Disable preemption then unlock the cache and block.  There is no POSIX
958   * condition variable in the core API so this is a work around.
959   *
960   * The issue is a task could preempt after the cache is unlocked because it is
961   * blocking or just hits that window, and before this task has blocked on the
962   * semaphore. If the preempting task flushes the queue this task will not see
963   * the flush and may block for ever or until another transaction flushes this
964   * semaphore.
965   */
966  prev_mode = rtems_bdbuf_disable_preemption ();
967
968  /*
969   * Unlock the cache, wait, and lock the cache when we return.
970   */
971  rtems_bdbuf_unlock_cache ();
972
973  sc = rtems_semaphore_obtain (waiters->sema, RTEMS_WAIT, RTEMS_BDBUF_WAIT_TIMEOUT);
974
975  if (sc == RTEMS_TIMEOUT)
976    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_CACHE_WAIT_TO);
977
978  if (sc != RTEMS_UNSATISFIED)
979    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_CACHE_WAIT_2);
980
981  rtems_bdbuf_lock_cache ();
982
983  rtems_bdbuf_restore_preemption (prev_mode);
984
985  --waiters->count;
986}
987
988static void
989rtems_bdbuf_wait (rtems_bdbuf_buffer *bd, rtems_bdbuf_waiters *waiters)
990{
991  rtems_bdbuf_group_obtain (bd);
992  ++bd->waiters;
993  rtems_bdbuf_anonymous_wait (waiters);
994  --bd->waiters;
995  rtems_bdbuf_group_release (bd);
996}
997
998/**
999 * Wake a blocked resource. The resource has a counter that lets us know if
1000 * there are any waiters.
1001 */
1002static void
1003rtems_bdbuf_wake (const rtems_bdbuf_waiters *waiters)
1004{
1005  rtems_status_code sc = RTEMS_SUCCESSFUL;
1006
1007  if (waiters->count > 0)
1008  {
1009    sc = rtems_semaphore_flush (waiters->sema);
1010    if (sc != RTEMS_SUCCESSFUL)
1011      rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_CACHE_WAKE);
1012  }
1013}
1014
1015static void
1016rtems_bdbuf_wake_swapper (void)
1017{
1018  rtems_status_code sc = rtems_event_send (bdbuf_cache.swapout,
1019                                           RTEMS_BDBUF_SWAPOUT_SYNC);
1020  if (sc != RTEMS_SUCCESSFUL)
1021    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_SO_WAKE_1);
1022}
1023
1024static bool
1025rtems_bdbuf_has_buffer_waiters (void)
1026{
1027  return bdbuf_cache.buffer_waiters.count;
1028}
1029
1030static void
1031rtems_bdbuf_remove_from_tree (rtems_bdbuf_buffer *bd)
1032{
1033  if (rtems_bdbuf_avl_remove (&bdbuf_cache.tree, bd) != 0)
1034    rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_TREE_RM);
1035}
1036
1037static void
1038rtems_bdbuf_remove_from_tree_and_lru_list (rtems_bdbuf_buffer *bd)
1039{
1040  switch (bd->state)
1041  {
1042    case RTEMS_BDBUF_STATE_FREE:
1043      break;
1044    case RTEMS_BDBUF_STATE_CACHED:
1045      rtems_bdbuf_remove_from_tree (bd);
1046      break;
1047    default:
1048      rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_10);
1049  }
1050
1051  rtems_chain_extract_unprotected (&bd->link);
1052}
1053
1054static void
1055rtems_bdbuf_make_free_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
1056{
1057  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_FREE);
1058  rtems_chain_prepend_unprotected (&bdbuf_cache.lru, &bd->link);
1059}
1060
1061static void
1062rtems_bdbuf_make_empty (rtems_bdbuf_buffer *bd)
1063{
1064  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_EMPTY);
1065}
1066
1067static void
1068rtems_bdbuf_make_cached_and_add_to_lru_list (rtems_bdbuf_buffer *bd)
1069{
1070  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_CACHED);
1071  rtems_chain_append_unprotected (&bdbuf_cache.lru, &bd->link);
1072}
1073
1074static void
1075rtems_bdbuf_discard_buffer (rtems_bdbuf_buffer *bd)
1076{
1077  rtems_bdbuf_make_empty (bd);
1078
1079  if (bd->waiters == 0)
1080  {
1081    rtems_bdbuf_remove_from_tree (bd);
1082    rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1083  }
1084}
1085
1086static void
1087rtems_bdbuf_add_to_modified_list_after_access (rtems_bdbuf_buffer *bd)
1088{
1089  if (bdbuf_cache.sync_active && bdbuf_cache.sync_device == bd->dd)
1090  {
1091    rtems_bdbuf_unlock_cache ();
1092
1093    /*
1094     * Wait for the sync lock.
1095     */
1096    rtems_bdbuf_lock_sync ();
1097
1098    rtems_bdbuf_unlock_sync ();
1099    rtems_bdbuf_lock_cache ();
1100  }
1101
1102  /*
1103   * Only the first modified release sets the timer and any further user
1104   * accesses do not change the timer value which should move down. This
1105   * assumes the user's hold of the buffer is much less than the time on the
1106   * modified list. Resetting the timer on each access which could result in a
1107   * buffer never getting to 0 and never being forced onto disk. This raises a
1108   * difficult question. Is a snapshot of a block that is changing better than
1109   * nothing being written? We have tended to think we should hold changes for
1110   * only a specific period of time even if still changing and get onto disk
1111   * and letting the file system try and recover this position if it can.
1112   */
1113  if (bd->state == RTEMS_BDBUF_STATE_ACCESS_CACHED
1114        || bd->state == RTEMS_BDBUF_STATE_ACCESS_EMPTY)
1115    bd->hold_timer = bdbuf_config.swap_block_hold;
1116
1117  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_MODIFIED);
1118  rtems_chain_append_unprotected (&bdbuf_cache.modified, &bd->link);
1119
1120  if (bd->waiters)
1121    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1122  else if (rtems_bdbuf_has_buffer_waiters ())
1123    rtems_bdbuf_wake_swapper ();
1124}
1125
1126static void
1127rtems_bdbuf_add_to_lru_list_after_access (rtems_bdbuf_buffer *bd)
1128{
1129  rtems_bdbuf_group_release (bd);
1130  rtems_bdbuf_make_cached_and_add_to_lru_list (bd);
1131
1132  if (bd->waiters)
1133    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1134  else
1135    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1136}
1137
1138/**
1139 * Compute the number of BDs per group for a given buffer size.
1140 *
1141 * @param size The buffer size. It can be any size and we scale up.
1142 */
1143static size_t
1144rtems_bdbuf_bds_per_group (size_t size)
1145{
1146  size_t bufs_per_size;
1147  size_t bds_per_size;
1148
1149  if (size > bdbuf_config.buffer_max)
1150    return 0;
1151
1152  bufs_per_size = ((size - 1) / bdbuf_config.buffer_min) + 1;
1153
1154  for (bds_per_size = 1;
1155       bds_per_size < bufs_per_size;
1156       bds_per_size <<= 1)
1157    ;
1158
1159  return bdbuf_cache.max_bds_per_group / bds_per_size;
1160}
1161
1162static void
1163rtems_bdbuf_discard_buffer_after_access (rtems_bdbuf_buffer *bd)
1164{
1165  rtems_bdbuf_group_release (bd);
1166  rtems_bdbuf_discard_buffer (bd);
1167
1168  if (bd->waiters)
1169    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1170  else
1171    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1172}
1173
1174/**
1175 * Reallocate a group. The BDs currently allocated in the group are removed
1176 * from the ALV tree and any lists then the new BD's are prepended to the ready
1177 * list of the cache.
1178 *
1179 * @param group The group to reallocate.
1180 * @param new_bds_per_group The new count of BDs per group.
1181 * @return A buffer of this group.
1182 */
1183static rtems_bdbuf_buffer *
1184rtems_bdbuf_group_realloc (rtems_bdbuf_group* group, size_t new_bds_per_group)
1185{
1186  rtems_bdbuf_buffer* bd;
1187  size_t              b;
1188  size_t              bufs_per_bd;
1189
1190  if (rtems_bdbuf_tracer)
1191    printf ("bdbuf:realloc: %tu: %zd -> %zd\n",
1192            group - bdbuf_cache.groups, group->bds_per_group,
1193            new_bds_per_group);
1194
1195  bufs_per_bd = bdbuf_cache.max_bds_per_group / group->bds_per_group;
1196
1197  for (b = 0, bd = group->bdbuf;
1198       b < group->bds_per_group;
1199       b++, bd += bufs_per_bd)
1200    rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1201
1202  group->bds_per_group = new_bds_per_group;
1203  bufs_per_bd = bdbuf_cache.max_bds_per_group / new_bds_per_group;
1204
1205  for (b = 1, bd = group->bdbuf + bufs_per_bd;
1206       b < group->bds_per_group;
1207       b++, bd += bufs_per_bd)
1208    rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1209
1210  if (b > 1)
1211    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1212
1213  return group->bdbuf;
1214}
1215
1216static void
1217rtems_bdbuf_setup_empty_buffer (rtems_bdbuf_buffer *bd,
1218                                rtems_disk_device  *dd,
1219                                rtems_blkdev_bnum   block)
1220{
1221  bd->dd        = dd ;
1222  bd->block     = block;
1223  bd->avl.left  = NULL;
1224  bd->avl.right = NULL;
1225  bd->waiters   = 0;
1226
1227  if (rtems_bdbuf_avl_insert (&bdbuf_cache.tree, bd) != 0)
1228    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_RECYCLE);
1229
1230  rtems_bdbuf_make_empty (bd);
1231}
1232
1233static rtems_bdbuf_buffer *
1234rtems_bdbuf_get_buffer_from_lru_list (rtems_disk_device *dd,
1235                                      rtems_blkdev_bnum  block)
1236{
1237  rtems_chain_node *node = rtems_chain_first (&bdbuf_cache.lru);
1238
1239  while (!rtems_chain_is_tail (&bdbuf_cache.lru, node))
1240  {
1241    rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
1242    rtems_bdbuf_buffer *empty_bd = NULL;
1243
1244    if (rtems_bdbuf_tracer)
1245      printf ("bdbuf:next-bd: %tu (%td:%" PRId32 ") %zd -> %zd\n",
1246              bd - bdbuf_cache.bds,
1247              bd->group - bdbuf_cache.groups, bd->group->users,
1248              bd->group->bds_per_group, dd->bds_per_group);
1249
1250    /*
1251     * If nobody waits for this BD, we may recycle it.
1252     */
1253    if (bd->waiters == 0)
1254    {
1255      if (bd->group->bds_per_group == dd->bds_per_group)
1256      {
1257        rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1258
1259        empty_bd = bd;
1260      }
1261      else if (bd->group->users == 0)
1262        empty_bd = rtems_bdbuf_group_realloc (bd->group, dd->bds_per_group);
1263    }
1264
1265    if (empty_bd != NULL)
1266    {
1267      rtems_bdbuf_setup_empty_buffer (empty_bd, dd, block);
1268
1269      return empty_bd;
1270    }
1271
1272    node = rtems_chain_next (node);
1273  }
1274
1275  return NULL;
1276}
1277
1278static rtems_status_code
1279rtems_bdbuf_create_task(
1280  rtems_name name,
1281  rtems_task_priority priority,
1282  rtems_task_priority default_priority,
1283  rtems_id *id
1284)
1285{
1286  rtems_status_code sc;
1287  size_t stack_size = bdbuf_config.task_stack_size ?
1288    bdbuf_config.task_stack_size : RTEMS_BDBUF_TASK_STACK_SIZE_DEFAULT;
1289
1290  priority = priority != 0 ? priority : default_priority;
1291
1292  sc = rtems_task_create (name,
1293                          priority,
1294                          stack_size,
1295                          RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
1296                          RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT,
1297                          id);
1298
1299  return sc;
1300}
1301
1302static rtems_bdbuf_swapout_transfer*
1303rtems_bdbuf_swapout_transfer_alloc (void)
1304{
1305  /*
1306   * @note chrisj The rtems_blkdev_request and the array at the end is a hack.
1307   * I am disappointment at finding code like this in RTEMS. The request should
1308   * have been a rtems_chain_control. Simple, fast and less storage as the node
1309   * is already part of the buffer structure.
1310   */
1311  size_t transfer_size = sizeof (rtems_bdbuf_swapout_transfer)
1312    + (bdbuf_config.max_write_blocks * sizeof (rtems_blkdev_sg_buffer));
1313  return calloc (1, transfer_size);
1314}
1315
1316static void
1317rtems_bdbuf_transfer_done (rtems_blkdev_request* req, rtems_status_code status);
1318
1319static void
1320rtems_bdbuf_swapout_transfer_init (rtems_bdbuf_swapout_transfer* transfer,
1321                                   rtems_id id)
1322{
1323  rtems_chain_initialize_empty (&transfer->bds);
1324  transfer->dd = BDBUF_INVALID_DEV;
1325  transfer->syncing = false;
1326  transfer->write_req.req = RTEMS_BLKDEV_REQ_WRITE;
1327  transfer->write_req.done = rtems_bdbuf_transfer_done;
1328  transfer->write_req.io_task = id;
1329}
1330
1331static size_t
1332rtems_bdbuf_swapout_worker_size (void)
1333{
1334  return sizeof (rtems_bdbuf_swapout_worker)
1335    + (bdbuf_config.max_write_blocks * sizeof (rtems_blkdev_sg_buffer));
1336}
1337
1338static rtems_task
1339rtems_bdbuf_swapout_worker_task (rtems_task_argument arg);
1340
1341static rtems_status_code
1342rtems_bdbuf_swapout_workers_create (void)
1343{
1344  rtems_status_code  sc;
1345  size_t             w;
1346  size_t             worker_size;
1347  char              *worker_current;
1348
1349  worker_size = rtems_bdbuf_swapout_worker_size ();
1350  worker_current = calloc (1, bdbuf_config.swapout_workers * worker_size);
1351  sc = worker_current != NULL ? RTEMS_SUCCESSFUL : RTEMS_NO_MEMORY;
1352
1353  bdbuf_cache.swapout_workers = (rtems_bdbuf_swapout_worker *) worker_current;
1354
1355  for (w = 0;
1356       sc == RTEMS_SUCCESSFUL && w < bdbuf_config.swapout_workers;
1357       w++, worker_current += worker_size)
1358  {
1359    rtems_bdbuf_swapout_worker *worker = (rtems_bdbuf_swapout_worker *) worker_current;
1360
1361    sc = rtems_bdbuf_create_task (rtems_build_name('B', 'D', 'o', 'a' + w),
1362                                  bdbuf_config.swapout_worker_priority,
1363                                  RTEMS_BDBUF_SWAPOUT_WORKER_TASK_PRIORITY_DEFAULT,
1364                                  &worker->id);
1365    if (sc == RTEMS_SUCCESSFUL)
1366    {
1367      rtems_bdbuf_swapout_transfer_init (&worker->transfer, worker->id);
1368
1369      rtems_chain_append_unprotected (&bdbuf_cache.swapout_free_workers, &worker->link);
1370      worker->enabled = true;
1371
1372      sc = rtems_task_start (worker->id,
1373                             rtems_bdbuf_swapout_worker_task,
1374                             (rtems_task_argument) worker);
1375    }
1376  }
1377
1378  return sc;
1379}
1380
1381static size_t
1382rtems_bdbuf_read_request_size (uint32_t transfer_count)
1383{
1384  return sizeof (rtems_blkdev_request)
1385    + sizeof (rtems_blkdev_sg_buffer) * transfer_count;
1386}
1387
1388/**
1389 * Initialise the cache.
1390 *
1391 * @return rtems_status_code The initialisation status.
1392 */
1393rtems_status_code
1394rtems_bdbuf_init (void)
1395{
1396  rtems_bdbuf_group*  group;
1397  rtems_bdbuf_buffer* bd;
1398  uint8_t*            buffer;
1399  size_t              b;
1400  size_t              cache_aligment;
1401  rtems_status_code   sc;
1402  rtems_mode          prev_mode;
1403
1404  if (rtems_bdbuf_tracer)
1405    printf ("bdbuf:init\n");
1406
1407  if (rtems_interrupt_is_in_progress())
1408    return RTEMS_CALLED_FROM_ISR;
1409
1410  /*
1411   * Check the configuration table values.
1412   */
1413
1414  if ((bdbuf_config.buffer_max % bdbuf_config.buffer_min) != 0)
1415    return RTEMS_INVALID_NUMBER;
1416
1417  if (rtems_bdbuf_read_request_size (bdbuf_config.max_read_ahead_blocks)
1418      > RTEMS_MINIMUM_STACK_SIZE / 8U)
1419    return RTEMS_INVALID_NUMBER;
1420
1421  /*
1422   * We use a special variable to manage the initialisation incase we have
1423   * completing threads doing this. You may get errors if the another thread
1424   * makes a call and we have not finished initialisation.
1425   */
1426  prev_mode = rtems_bdbuf_disable_preemption ();
1427  if (bdbuf_cache.initialised)
1428  {
1429    rtems_bdbuf_restore_preemption (prev_mode);
1430    return RTEMS_RESOURCE_IN_USE;
1431  }
1432
1433  memset(&bdbuf_cache, 0, sizeof(bdbuf_cache));
1434  bdbuf_cache.initialised = true;
1435  rtems_bdbuf_restore_preemption (prev_mode);
1436
1437  /*
1438   * For unspecified cache alignments we use the CPU alignment.
1439   */
1440  cache_aligment = 32; /* FIXME rtems_cache_get_data_line_size() */
1441  if (cache_aligment <= 0)
1442    cache_aligment = CPU_ALIGNMENT;
1443
1444  bdbuf_cache.sync_device = BDBUF_INVALID_DEV;
1445
1446  rtems_chain_initialize_empty (&bdbuf_cache.swapout_free_workers);
1447  rtems_chain_initialize_empty (&bdbuf_cache.lru);
1448  rtems_chain_initialize_empty (&bdbuf_cache.modified);
1449  rtems_chain_initialize_empty (&bdbuf_cache.sync);
1450  rtems_chain_initialize_empty (&bdbuf_cache.read_ahead_chain);
1451
1452  /*
1453   * Create the locks for the cache.
1454   */
1455  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'l'),
1456                               1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0,
1457                               &bdbuf_cache.lock);
1458  if (sc != RTEMS_SUCCESSFUL)
1459    goto error;
1460
1461  rtems_bdbuf_lock_cache ();
1462
1463  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 's'),
1464                               1, RTEMS_BDBUF_CACHE_LOCK_ATTRIBS, 0,
1465                               &bdbuf_cache.sync_lock);
1466  if (sc != RTEMS_SUCCESSFUL)
1467    goto error;
1468
1469  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'a'),
1470                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1471                               &bdbuf_cache.access_waiters.sema);
1472  if (sc != RTEMS_SUCCESSFUL)
1473    goto error;
1474
1475  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 't'),
1476                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1477                               &bdbuf_cache.transfer_waiters.sema);
1478  if (sc != RTEMS_SUCCESSFUL)
1479    goto error;
1480
1481  sc = rtems_semaphore_create (rtems_build_name ('B', 'D', 'C', 'b'),
1482                               0, RTEMS_BDBUF_CACHE_WAITER_ATTRIBS, 0,
1483                               &bdbuf_cache.buffer_waiters.sema);
1484  if (sc != RTEMS_SUCCESSFUL)
1485    goto error;
1486
1487  /*
1488   * Compute the various number of elements in the cache.
1489   */
1490  bdbuf_cache.buffer_min_count =
1491    bdbuf_config.size / bdbuf_config.buffer_min;
1492  bdbuf_cache.max_bds_per_group =
1493    bdbuf_config.buffer_max / bdbuf_config.buffer_min;
1494  bdbuf_cache.group_count =
1495    bdbuf_cache.buffer_min_count / bdbuf_cache.max_bds_per_group;
1496
1497  /*
1498   * Allocate the memory for the buffer descriptors.
1499   */
1500  bdbuf_cache.bds = calloc (sizeof (rtems_bdbuf_buffer),
1501                            bdbuf_cache.buffer_min_count);
1502  if (!bdbuf_cache.bds)
1503    goto error;
1504
1505  /*
1506   * Allocate the memory for the buffer descriptors.
1507   */
1508  bdbuf_cache.groups = calloc (sizeof (rtems_bdbuf_group),
1509                               bdbuf_cache.group_count);
1510  if (!bdbuf_cache.groups)
1511    goto error;
1512
1513  /*
1514   * Allocate memory for buffer memory. The buffer memory will be cache
1515   * aligned. It is possible to free the memory allocated by rtems_memalign()
1516   * with free(). Return 0 if allocated.
1517   *
1518   * The memory allocate allows a
1519   */
1520  if (rtems_memalign ((void **) &bdbuf_cache.buffers,
1521                      cache_aligment,
1522                      bdbuf_cache.buffer_min_count * bdbuf_config.buffer_min) != 0)
1523    goto error;
1524
1525  /*
1526   * The cache is empty after opening so we need to add all the buffers to it
1527   * and initialise the groups.
1528   */
1529  for (b = 0, group = bdbuf_cache.groups,
1530         bd = bdbuf_cache.bds, buffer = bdbuf_cache.buffers;
1531       b < bdbuf_cache.buffer_min_count;
1532       b++, bd++, buffer += bdbuf_config.buffer_min)
1533  {
1534    bd->dd    = BDBUF_INVALID_DEV;
1535    bd->group  = group;
1536    bd->buffer = buffer;
1537
1538    rtems_chain_append_unprotected (&bdbuf_cache.lru, &bd->link);
1539
1540    if ((b % bdbuf_cache.max_bds_per_group) ==
1541        (bdbuf_cache.max_bds_per_group - 1))
1542      group++;
1543  }
1544
1545  for (b = 0,
1546         group = bdbuf_cache.groups,
1547         bd = bdbuf_cache.bds;
1548       b < bdbuf_cache.group_count;
1549       b++,
1550         group++,
1551         bd += bdbuf_cache.max_bds_per_group)
1552  {
1553    group->bds_per_group = bdbuf_cache.max_bds_per_group;
1554    group->bdbuf = bd;
1555  }
1556
1557  /*
1558   * Create and start swapout task.
1559   */
1560
1561  bdbuf_cache.swapout_transfer = rtems_bdbuf_swapout_transfer_alloc ();
1562  if (!bdbuf_cache.swapout_transfer)
1563    goto error;
1564
1565  bdbuf_cache.swapout_enabled = true;
1566
1567  sc = rtems_bdbuf_create_task (rtems_build_name('B', 'S', 'W', 'P'),
1568                                bdbuf_config.swapout_priority,
1569                                RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT,
1570                                &bdbuf_cache.swapout);
1571  if (sc != RTEMS_SUCCESSFUL)
1572    goto error;
1573
1574  rtems_bdbuf_swapout_transfer_init (bdbuf_cache.swapout_transfer, bdbuf_cache.swapout);
1575
1576  sc = rtems_task_start (bdbuf_cache.swapout,
1577                         rtems_bdbuf_swapout_task,
1578                         (rtems_task_argument) bdbuf_cache.swapout_transfer);
1579  if (sc != RTEMS_SUCCESSFUL)
1580    goto error;
1581
1582  if (bdbuf_config.swapout_workers > 0)
1583  {
1584    sc = rtems_bdbuf_swapout_workers_create ();
1585    if (sc != RTEMS_SUCCESSFUL)
1586      goto error;
1587  }
1588
1589  if (bdbuf_config.max_read_ahead_blocks > 0)
1590  {
1591    bdbuf_cache.read_ahead_enabled = true;
1592    sc = rtems_bdbuf_create_task (rtems_build_name('B', 'R', 'D', 'A'),
1593                                  bdbuf_config.read_ahead_priority,
1594                                  RTEMS_BDBUF_READ_AHEAD_TASK_PRIORITY_DEFAULT,
1595                                  &bdbuf_cache.read_ahead_task);
1596    if (sc != RTEMS_SUCCESSFUL)
1597      goto error;
1598
1599    sc = rtems_task_start (bdbuf_cache.read_ahead_task,
1600                           rtems_bdbuf_read_ahead_task,
1601                           0);
1602    if (sc != RTEMS_SUCCESSFUL)
1603      goto error;
1604  }
1605
1606  rtems_bdbuf_unlock_cache ();
1607
1608  return RTEMS_SUCCESSFUL;
1609
1610error:
1611
1612  if (bdbuf_cache.read_ahead_task != 0)
1613    rtems_task_delete (bdbuf_cache.read_ahead_task);
1614
1615  if (bdbuf_cache.swapout != 0)
1616    rtems_task_delete (bdbuf_cache.swapout);
1617
1618  if (bdbuf_cache.swapout_workers)
1619  {
1620    char   *worker_current = (char *) bdbuf_cache.swapout_workers;
1621    size_t  worker_size = rtems_bdbuf_swapout_worker_size ();
1622    size_t  w;
1623
1624    for (w = 0;
1625         w < bdbuf_config.swapout_workers;
1626         w++, worker_current += worker_size)
1627    {
1628      rtems_bdbuf_swapout_worker *worker = (rtems_bdbuf_swapout_worker *) worker_current;
1629
1630      if (worker->id != 0) {
1631        rtems_task_delete (worker->id);
1632      }
1633    }
1634  }
1635
1636  free (bdbuf_cache.buffers);
1637  free (bdbuf_cache.groups);
1638  free (bdbuf_cache.bds);
1639  free (bdbuf_cache.swapout_transfer);
1640  free (bdbuf_cache.swapout_workers);
1641
1642  rtems_semaphore_delete (bdbuf_cache.buffer_waiters.sema);
1643  rtems_semaphore_delete (bdbuf_cache.access_waiters.sema);
1644  rtems_semaphore_delete (bdbuf_cache.transfer_waiters.sema);
1645  rtems_semaphore_delete (bdbuf_cache.sync_lock);
1646
1647  if (bdbuf_cache.lock != 0)
1648  {
1649    rtems_bdbuf_unlock_cache ();
1650    rtems_semaphore_delete (bdbuf_cache.lock);
1651  }
1652
1653  bdbuf_cache.initialised = false;
1654
1655  return RTEMS_UNSATISFIED;
1656}
1657
1658static void
1659rtems_bdbuf_wait_for_event (rtems_event_set event)
1660{
1661  rtems_status_code sc = RTEMS_SUCCESSFUL;
1662  rtems_event_set   out = 0;
1663
1664  sc = rtems_event_receive (event,
1665                            RTEMS_EVENT_ALL | RTEMS_WAIT,
1666                            RTEMS_NO_TIMEOUT,
1667                            &out);
1668
1669  if (sc != RTEMS_SUCCESSFUL || out != event)
1670    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_WAIT_EVNT);
1671}
1672
1673static void
1674rtems_bdbuf_wait_for_transient_event (void)
1675{
1676  rtems_status_code sc = RTEMS_SUCCESSFUL;
1677
1678  sc = rtems_event_transient_receive (RTEMS_WAIT, RTEMS_NO_TIMEOUT);
1679  if (sc != RTEMS_SUCCESSFUL)
1680    rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_WAIT_TRANS_EVNT);
1681}
1682
1683static void
1684rtems_bdbuf_wait_for_access (rtems_bdbuf_buffer *bd)
1685{
1686  while (true)
1687  {
1688    switch (bd->state)
1689    {
1690      case RTEMS_BDBUF_STATE_MODIFIED:
1691        rtems_bdbuf_group_release (bd);
1692        /* Fall through */
1693      case RTEMS_BDBUF_STATE_CACHED:
1694        rtems_chain_extract_unprotected (&bd->link);
1695        /* Fall through */
1696      case RTEMS_BDBUF_STATE_EMPTY:
1697        return;
1698      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1699      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1700      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1701      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1702        rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
1703        break;
1704      case RTEMS_BDBUF_STATE_SYNC:
1705      case RTEMS_BDBUF_STATE_TRANSFER:
1706      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1707        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1708        break;
1709      default:
1710        rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_7);
1711    }
1712  }
1713}
1714
1715static void
1716rtems_bdbuf_request_sync_for_modified_buffer (rtems_bdbuf_buffer *bd)
1717{
1718  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
1719  rtems_chain_extract_unprotected (&bd->link);
1720  rtems_chain_append_unprotected (&bdbuf_cache.sync, &bd->link);
1721  rtems_bdbuf_wake_swapper ();
1722}
1723
1724/**
1725 * @brief Waits until the buffer is ready for recycling.
1726 *
1727 * @retval @c true Buffer is valid and may be recycled.
1728 * @retval @c false Buffer is invalid and has to searched again.
1729 */
1730static bool
1731rtems_bdbuf_wait_for_recycle (rtems_bdbuf_buffer *bd)
1732{
1733  while (true)
1734  {
1735    switch (bd->state)
1736    {
1737      case RTEMS_BDBUF_STATE_FREE:
1738        return true;
1739      case RTEMS_BDBUF_STATE_MODIFIED:
1740        rtems_bdbuf_request_sync_for_modified_buffer (bd);
1741        break;
1742      case RTEMS_BDBUF_STATE_CACHED:
1743      case RTEMS_BDBUF_STATE_EMPTY:
1744        if (bd->waiters == 0)
1745          return true;
1746        else
1747        {
1748          /*
1749           * It is essential that we wait here without a special wait count and
1750           * without the group in use.  Otherwise we could trigger a wait ping
1751           * pong with another recycle waiter.  The state of the buffer is
1752           * arbitrary afterwards.
1753           */
1754          rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
1755          return false;
1756        }
1757      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1758      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1759      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1760      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1761        rtems_bdbuf_wait (bd, &bdbuf_cache.access_waiters);
1762        break;
1763      case RTEMS_BDBUF_STATE_SYNC:
1764      case RTEMS_BDBUF_STATE_TRANSFER:
1765      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1766        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1767        break;
1768      default:
1769        rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_8);
1770    }
1771  }
1772}
1773
1774static void
1775rtems_bdbuf_wait_for_sync_done (rtems_bdbuf_buffer *bd)
1776{
1777  while (true)
1778  {
1779    switch (bd->state)
1780    {
1781      case RTEMS_BDBUF_STATE_CACHED:
1782      case RTEMS_BDBUF_STATE_EMPTY:
1783      case RTEMS_BDBUF_STATE_MODIFIED:
1784      case RTEMS_BDBUF_STATE_ACCESS_CACHED:
1785      case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
1786      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1787      case RTEMS_BDBUF_STATE_ACCESS_PURGED:
1788        return;
1789      case RTEMS_BDBUF_STATE_SYNC:
1790      case RTEMS_BDBUF_STATE_TRANSFER:
1791      case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
1792        rtems_bdbuf_wait (bd, &bdbuf_cache.transfer_waiters);
1793        break;
1794      default:
1795        rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_9);
1796    }
1797  }
1798}
1799
1800static void
1801rtems_bdbuf_wait_for_buffer (void)
1802{
1803  if (!rtems_chain_is_empty (&bdbuf_cache.modified))
1804    rtems_bdbuf_wake_swapper ();
1805
1806  rtems_bdbuf_anonymous_wait (&bdbuf_cache.buffer_waiters);
1807}
1808
1809static void
1810rtems_bdbuf_sync_after_access (rtems_bdbuf_buffer *bd)
1811{
1812  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_SYNC);
1813
1814  rtems_chain_append_unprotected (&bdbuf_cache.sync, &bd->link);
1815
1816  if (bd->waiters)
1817    rtems_bdbuf_wake (&bdbuf_cache.access_waiters);
1818
1819  rtems_bdbuf_wake_swapper ();
1820  rtems_bdbuf_wait_for_sync_done (bd);
1821
1822  /*
1823   * We may have created a cached or empty buffer which may be recycled.
1824   */
1825  if (bd->waiters == 0
1826        && (bd->state == RTEMS_BDBUF_STATE_CACHED
1827          || bd->state == RTEMS_BDBUF_STATE_EMPTY))
1828  {
1829    if (bd->state == RTEMS_BDBUF_STATE_EMPTY)
1830    {
1831      rtems_bdbuf_remove_from_tree (bd);
1832      rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1833    }
1834    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1835  }
1836}
1837
1838static rtems_bdbuf_buffer *
1839rtems_bdbuf_get_buffer_for_read_ahead (rtems_disk_device *dd,
1840                                       rtems_blkdev_bnum  block)
1841{
1842  rtems_bdbuf_buffer *bd = NULL;
1843
1844  bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dd, block);
1845
1846  if (bd == NULL)
1847  {
1848    bd = rtems_bdbuf_get_buffer_from_lru_list (dd, block);
1849
1850    if (bd != NULL)
1851      rtems_bdbuf_group_obtain (bd);
1852  }
1853  else
1854    /*
1855     * The buffer is in the cache.  So it is already available or in use, and
1856     * thus no need for a read ahead.
1857     */
1858    bd = NULL;
1859
1860  return bd;
1861}
1862
1863static rtems_bdbuf_buffer *
1864rtems_bdbuf_get_buffer_for_access (rtems_disk_device *dd,
1865                                   rtems_blkdev_bnum  block)
1866{
1867  rtems_bdbuf_buffer *bd = NULL;
1868
1869  do
1870  {
1871    bd = rtems_bdbuf_avl_search (&bdbuf_cache.tree, dd, block);
1872
1873    if (bd != NULL)
1874    {
1875      if (bd->group->bds_per_group != dd->bds_per_group)
1876      {
1877        if (rtems_bdbuf_wait_for_recycle (bd))
1878        {
1879          rtems_bdbuf_remove_from_tree_and_lru_list (bd);
1880          rtems_bdbuf_make_free_and_add_to_lru_list (bd);
1881          rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
1882        }
1883        bd = NULL;
1884      }
1885    }
1886    else
1887    {
1888      bd = rtems_bdbuf_get_buffer_from_lru_list (dd, block);
1889
1890      if (bd == NULL)
1891        rtems_bdbuf_wait_for_buffer ();
1892    }
1893  }
1894  while (bd == NULL);
1895
1896  rtems_bdbuf_wait_for_access (bd);
1897  rtems_bdbuf_group_obtain (bd);
1898
1899  return bd;
1900}
1901
1902static rtems_status_code
1903rtems_bdbuf_get_media_block (const rtems_disk_device *dd,
1904                             rtems_blkdev_bnum        block,
1905                             rtems_blkdev_bnum       *media_block_ptr)
1906{
1907  rtems_status_code sc = RTEMS_SUCCESSFUL;
1908
1909  if (block < dd->block_count)
1910  {
1911    /*
1912     * Compute the media block number. Drivers work with media block number not
1913     * the block number a BD may have as this depends on the block size set by
1914     * the user.
1915     */
1916    *media_block_ptr = rtems_bdbuf_media_block (dd, block) + dd->start;
1917  }
1918  else
1919  {
1920    sc = RTEMS_INVALID_ID;
1921  }
1922
1923  return sc;
1924}
1925
1926rtems_status_code
1927rtems_bdbuf_get (rtems_disk_device   *dd,
1928                 rtems_blkdev_bnum    block,
1929                 rtems_bdbuf_buffer **bd_ptr)
1930{
1931  rtems_status_code   sc = RTEMS_SUCCESSFUL;
1932  rtems_bdbuf_buffer *bd = NULL;
1933  rtems_blkdev_bnum   media_block;
1934
1935  rtems_bdbuf_lock_cache ();
1936
1937  sc = rtems_bdbuf_get_media_block (dd, block, &media_block);
1938  if (sc == RTEMS_SUCCESSFUL)
1939  {
1940    /*
1941     * Print the block index relative to the physical disk.
1942     */
1943    if (rtems_bdbuf_tracer)
1944      printf ("bdbuf:get: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
1945              media_block, block, (unsigned) dd->dev);
1946
1947    bd = rtems_bdbuf_get_buffer_for_access (dd, media_block);
1948
1949    switch (bd->state)
1950    {
1951      case RTEMS_BDBUF_STATE_CACHED:
1952        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
1953        break;
1954      case RTEMS_BDBUF_STATE_EMPTY:
1955        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_EMPTY);
1956        break;
1957      case RTEMS_BDBUF_STATE_MODIFIED:
1958        /*
1959         * To get a modified buffer could be considered a bug in the caller
1960         * because you should not be getting an already modified buffer but
1961         * user may have modified a byte in a block then decided to seek the
1962         * start and write the whole block and the file system will have no
1963         * record of this so just gets the block to fill.
1964         */
1965        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
1966        break;
1967      default:
1968        rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_2);
1969        break;
1970    }
1971
1972    if (rtems_bdbuf_tracer)
1973    {
1974      rtems_bdbuf_show_users ("get", bd);
1975      rtems_bdbuf_show_usage ();
1976    }
1977  }
1978
1979  rtems_bdbuf_unlock_cache ();
1980
1981  *bd_ptr = bd;
1982
1983  return sc;
1984}
1985
1986/**
1987 * Call back handler called by the low level driver when the transfer has
1988 * completed. This function may be invoked from interrupt handler.
1989 *
1990 * @param arg Arbitrary argument specified in block device request
1991 *            structure (in this case - pointer to the appropriate
1992 *            block device request structure).
1993 * @param status I/O completion status
1994 */
1995static void
1996rtems_bdbuf_transfer_done (rtems_blkdev_request* req, rtems_status_code status)
1997{
1998  req->status = status;
1999
2000  rtems_event_transient_send (req->io_task);
2001}
2002
2003static rtems_status_code
2004rtems_bdbuf_execute_transfer_request (rtems_disk_device    *dd,
2005                                      rtems_blkdev_request *req,
2006                                      bool                  cache_locked)
2007{
2008  rtems_status_code sc = RTEMS_SUCCESSFUL;
2009  uint32_t transfer_index = 0;
2010  bool wake_transfer_waiters = false;
2011  bool wake_buffer_waiters = false;
2012
2013  if (cache_locked)
2014    rtems_bdbuf_unlock_cache ();
2015
2016  /* The return value will be ignored for transfer requests */
2017  dd->ioctl (dd->phys_dev, RTEMS_BLKIO_REQUEST, req);
2018
2019  /* Wait for transfer request completion */
2020  rtems_bdbuf_wait_for_transient_event ();
2021  sc = req->status;
2022
2023  rtems_bdbuf_lock_cache ();
2024
2025  /* Statistics */
2026  if (req->req == RTEMS_BLKDEV_REQ_READ)
2027  {
2028    dd->stats.read_blocks += req->bufnum;
2029    if (sc != RTEMS_SUCCESSFUL)
2030      ++dd->stats.read_errors;
2031  }
2032  else
2033  {
2034    dd->stats.write_blocks += req->bufnum;
2035    ++dd->stats.write_transfers;
2036    if (sc != RTEMS_SUCCESSFUL)
2037      ++dd->stats.write_errors;
2038  }
2039
2040  for (transfer_index = 0; transfer_index < req->bufnum; ++transfer_index)
2041  {
2042    rtems_bdbuf_buffer *bd = req->bufs [transfer_index].user;
2043    bool waiters = bd->waiters;
2044
2045    if (waiters)
2046      wake_transfer_waiters = true;
2047    else
2048      wake_buffer_waiters = true;
2049
2050    rtems_bdbuf_group_release (bd);
2051
2052    if (sc == RTEMS_SUCCESSFUL && bd->state == RTEMS_BDBUF_STATE_TRANSFER)
2053      rtems_bdbuf_make_cached_and_add_to_lru_list (bd);
2054    else
2055      rtems_bdbuf_discard_buffer (bd);
2056
2057    if (rtems_bdbuf_tracer)
2058      rtems_bdbuf_show_users ("transfer", bd);
2059  }
2060
2061  if (wake_transfer_waiters)
2062    rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
2063
2064  if (wake_buffer_waiters)
2065    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
2066
2067  if (!cache_locked)
2068    rtems_bdbuf_unlock_cache ();
2069
2070  if (sc == RTEMS_SUCCESSFUL || sc == RTEMS_UNSATISFIED)
2071    return sc;
2072  else
2073    return RTEMS_IO_ERROR;
2074}
2075
2076static rtems_status_code
2077rtems_bdbuf_execute_read_request (rtems_disk_device  *dd,
2078                                  rtems_bdbuf_buffer *bd,
2079                                  uint32_t            transfer_count)
2080{
2081  rtems_blkdev_request *req = NULL;
2082  rtems_blkdev_bnum media_block = bd->block;
2083  uint32_t media_blocks_per_block = dd->media_blocks_per_block;
2084  uint32_t block_size = dd->block_size;
2085  uint32_t transfer_index = 1;
2086
2087  /*
2088   * TODO: This type of request structure is wrong and should be removed.
2089   */
2090#define bdbuf_alloc(size) __builtin_alloca (size)
2091
2092  req = bdbuf_alloc (rtems_bdbuf_read_request_size (transfer_count));
2093
2094  req->req = RTEMS_BLKDEV_REQ_READ;
2095  req->done = rtems_bdbuf_transfer_done;
2096  req->io_task = rtems_task_self ();
2097  req->bufnum = 0;
2098
2099  rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
2100
2101  req->bufs [0].user   = bd;
2102  req->bufs [0].block  = media_block;
2103  req->bufs [0].length = block_size;
2104  req->bufs [0].buffer = bd->buffer;
2105
2106  if (rtems_bdbuf_tracer)
2107    rtems_bdbuf_show_users ("read", bd);
2108
2109  while (transfer_index < transfer_count)
2110  {
2111    media_block += media_blocks_per_block;
2112
2113    bd = rtems_bdbuf_get_buffer_for_read_ahead (dd, media_block);
2114
2115    if (bd == NULL)
2116      break;
2117
2118    rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
2119
2120    req->bufs [transfer_index].user   = bd;
2121    req->bufs [transfer_index].block  = media_block;
2122    req->bufs [transfer_index].length = block_size;
2123    req->bufs [transfer_index].buffer = bd->buffer;
2124
2125    if (rtems_bdbuf_tracer)
2126      rtems_bdbuf_show_users ("read", bd);
2127
2128    ++transfer_index;
2129  }
2130
2131  req->bufnum = transfer_index;
2132
2133  return rtems_bdbuf_execute_transfer_request (dd, req, true);
2134}
2135
2136static bool
2137rtems_bdbuf_is_read_ahead_active (const rtems_disk_device *dd)
2138{
2139  return !rtems_chain_is_node_off_chain (&dd->read_ahead.node);
2140}
2141
2142static void
2143rtems_bdbuf_read_ahead_cancel (rtems_disk_device *dd)
2144{
2145  if (rtems_bdbuf_is_read_ahead_active (dd))
2146  {
2147    rtems_chain_extract_unprotected (&dd->read_ahead.node);
2148    rtems_chain_set_off_chain (&dd->read_ahead.node);
2149  }
2150}
2151
2152static void
2153rtems_bdbuf_read_ahead_reset (rtems_disk_device *dd)
2154{
2155  rtems_bdbuf_read_ahead_cancel (dd);
2156  dd->read_ahead.trigger = RTEMS_DISK_READ_AHEAD_NO_TRIGGER;
2157}
2158
2159static void
2160rtems_bdbuf_check_read_ahead_trigger (rtems_disk_device *dd,
2161                                      rtems_blkdev_bnum  block)
2162{
2163  if (bdbuf_cache.read_ahead_task != 0
2164      && dd->read_ahead.trigger == block
2165      && !rtems_bdbuf_is_read_ahead_active (dd))
2166  {
2167    rtems_status_code sc;
2168    rtems_chain_control *chain = &bdbuf_cache.read_ahead_chain;
2169
2170    if (rtems_chain_is_empty (chain))
2171    {
2172      sc = rtems_event_send (bdbuf_cache.read_ahead_task,
2173                             RTEMS_BDBUF_READ_AHEAD_WAKE_UP);
2174      if (sc != RTEMS_SUCCESSFUL)
2175        rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_RA_WAKE_UP);
2176    }
2177
2178    rtems_chain_append_unprotected (chain, &dd->read_ahead.node);
2179  }
2180}
2181
2182static void
2183rtems_bdbuf_set_read_ahead_trigger (rtems_disk_device *dd,
2184                                    rtems_blkdev_bnum  block)
2185{
2186  if (dd->read_ahead.trigger != block)
2187  {
2188    rtems_bdbuf_read_ahead_cancel (dd);
2189    dd->read_ahead.trigger = block + 1;
2190    dd->read_ahead.next = block + 2;
2191  }
2192}
2193
2194rtems_status_code
2195rtems_bdbuf_read (rtems_disk_device   *dd,
2196                  rtems_blkdev_bnum    block,
2197                  rtems_bdbuf_buffer **bd_ptr)
2198{
2199  rtems_status_code     sc = RTEMS_SUCCESSFUL;
2200  rtems_bdbuf_buffer   *bd = NULL;
2201  rtems_blkdev_bnum     media_block;
2202
2203  rtems_bdbuf_lock_cache ();
2204
2205  sc = rtems_bdbuf_get_media_block (dd, block, &media_block);
2206  if (sc == RTEMS_SUCCESSFUL)
2207  {
2208    if (rtems_bdbuf_tracer)
2209      printf ("bdbuf:read: %" PRIu32 " (%" PRIu32 ") (dev = %08x)\n",
2210              media_block, block, (unsigned) dd->dev);
2211
2212    bd = rtems_bdbuf_get_buffer_for_access (dd, media_block);
2213    switch (bd->state)
2214    {
2215      case RTEMS_BDBUF_STATE_CACHED:
2216        ++dd->stats.read_hits;
2217        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
2218        break;
2219      case RTEMS_BDBUF_STATE_MODIFIED:
2220        ++dd->stats.read_hits;
2221        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_MODIFIED);
2222        break;
2223      case RTEMS_BDBUF_STATE_EMPTY:
2224        ++dd->stats.read_misses;
2225        rtems_bdbuf_set_read_ahead_trigger (dd, block);
2226        sc = rtems_bdbuf_execute_read_request (dd, bd, 1);
2227        if (sc == RTEMS_SUCCESSFUL)
2228        {
2229          rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_ACCESS_CACHED);
2230          rtems_chain_extract_unprotected (&bd->link);
2231          rtems_bdbuf_group_obtain (bd);
2232        }
2233        else
2234        {
2235          bd = NULL;
2236        }
2237        break;
2238      default:
2239        rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_4);
2240        break;
2241    }
2242
2243    rtems_bdbuf_check_read_ahead_trigger (dd, block);
2244  }
2245
2246  rtems_bdbuf_unlock_cache ();
2247
2248  *bd_ptr = bd;
2249
2250  return sc;
2251}
2252
2253static rtems_status_code
2254rtems_bdbuf_check_bd_and_lock_cache (rtems_bdbuf_buffer *bd, const char *kind)
2255{
2256  if (bd == NULL)
2257    return RTEMS_INVALID_ADDRESS;
2258  if (rtems_bdbuf_tracer)
2259  {
2260    printf ("bdbuf:%s: %" PRIu32 "\n", kind, bd->block);
2261    rtems_bdbuf_show_users (kind, bd);
2262  }
2263  rtems_bdbuf_lock_cache();
2264
2265  return RTEMS_SUCCESSFUL;
2266}
2267
2268rtems_status_code
2269rtems_bdbuf_release (rtems_bdbuf_buffer *bd)
2270{
2271  rtems_status_code sc = RTEMS_SUCCESSFUL;
2272
2273  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release");
2274  if (sc != RTEMS_SUCCESSFUL)
2275    return sc;
2276
2277  switch (bd->state)
2278  {
2279    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2280      rtems_bdbuf_add_to_lru_list_after_access (bd);
2281      break;
2282    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2283    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2284      rtems_bdbuf_discard_buffer_after_access (bd);
2285      break;
2286    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2287      rtems_bdbuf_add_to_modified_list_after_access (bd);
2288      break;
2289    default:
2290      rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_0);
2291      break;
2292  }
2293
2294  if (rtems_bdbuf_tracer)
2295    rtems_bdbuf_show_usage ();
2296
2297  rtems_bdbuf_unlock_cache ();
2298
2299  return RTEMS_SUCCESSFUL;
2300}
2301
2302rtems_status_code
2303rtems_bdbuf_release_modified (rtems_bdbuf_buffer *bd)
2304{
2305  rtems_status_code sc = RTEMS_SUCCESSFUL;
2306
2307  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "release modified");
2308  if (sc != RTEMS_SUCCESSFUL)
2309    return sc;
2310
2311  switch (bd->state)
2312  {
2313    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2314    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2315    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2316      rtems_bdbuf_add_to_modified_list_after_access (bd);
2317      break;
2318    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2319      rtems_bdbuf_discard_buffer_after_access (bd);
2320      break;
2321    default:
2322      rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_6);
2323      break;
2324  }
2325
2326  if (rtems_bdbuf_tracer)
2327    rtems_bdbuf_show_usage ();
2328
2329  rtems_bdbuf_unlock_cache ();
2330
2331  return RTEMS_SUCCESSFUL;
2332}
2333
2334rtems_status_code
2335rtems_bdbuf_sync (rtems_bdbuf_buffer *bd)
2336{
2337  rtems_status_code sc = RTEMS_SUCCESSFUL;
2338
2339  sc = rtems_bdbuf_check_bd_and_lock_cache (bd, "sync");
2340  if (sc != RTEMS_SUCCESSFUL)
2341    return sc;
2342
2343  switch (bd->state)
2344  {
2345    case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2346    case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2347    case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2348      rtems_bdbuf_sync_after_access (bd);
2349      break;
2350    case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2351      rtems_bdbuf_discard_buffer_after_access (bd);
2352      break;
2353    default:
2354      rtems_bdbuf_fatal_with_state (bd->state, RTEMS_BDBUF_FATAL_STATE_5);
2355      break;
2356  }
2357
2358  if (rtems_bdbuf_tracer)
2359    rtems_bdbuf_show_usage ();
2360
2361  rtems_bdbuf_unlock_cache ();
2362
2363  return RTEMS_SUCCESSFUL;
2364}
2365
2366rtems_status_code
2367rtems_bdbuf_syncdev (rtems_disk_device *dd)
2368{
2369  if (rtems_bdbuf_tracer)
2370    printf ("bdbuf:syncdev: %08x\n", (unsigned) dd->dev);
2371
2372  /*
2373   * Take the sync lock before locking the cache. Once we have the sync lock we
2374   * can lock the cache. If another thread has the sync lock it will cause this
2375   * thread to block until it owns the sync lock then it can own the cache. The
2376   * sync lock can only be obtained with the cache unlocked.
2377   */
2378  rtems_bdbuf_lock_sync ();
2379  rtems_bdbuf_lock_cache ();
2380
2381  /*
2382   * Set the cache to have a sync active for a specific device and let the swap
2383   * out task know the id of the requester to wake when done.
2384   *
2385   * The swap out task will negate the sync active flag when no more buffers
2386   * for the device are held on the "modified for sync" queues.
2387   */
2388  bdbuf_cache.sync_active    = true;
2389  bdbuf_cache.sync_requester = rtems_task_self ();
2390  bdbuf_cache.sync_device    = dd;
2391
2392  rtems_bdbuf_wake_swapper ();
2393  rtems_bdbuf_unlock_cache ();
2394  rtems_bdbuf_wait_for_transient_event ();
2395  rtems_bdbuf_unlock_sync ();
2396
2397  return RTEMS_SUCCESSFUL;
2398}
2399
2400/**
2401 * Swapout transfer to the driver. The driver will break this I/O into groups
2402 * of consecutive write requests is multiple consecutive buffers are required
2403 * by the driver. The cache is not locked.
2404 *
2405 * @param transfer The transfer transaction.
2406 */
2407static void
2408rtems_bdbuf_swapout_write (rtems_bdbuf_swapout_transfer* transfer)
2409{
2410  rtems_chain_node *node;
2411
2412  if (rtems_bdbuf_tracer)
2413    printf ("bdbuf:swapout transfer: %08x\n", (unsigned) transfer->dd->dev);
2414
2415  /*
2416   * If there are buffers to transfer to the media transfer them.
2417   */
2418  if (!rtems_chain_is_empty (&transfer->bds))
2419  {
2420    /*
2421     * The last block number used when the driver only supports
2422     * continuous blocks in a single request.
2423     */
2424    uint32_t last_block = 0;
2425
2426    rtems_disk_device *dd = transfer->dd;
2427    uint32_t media_blocks_per_block = dd->media_blocks_per_block;
2428    bool need_continuous_blocks =
2429      (dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_MULTISECTOR_CONT) != 0;
2430
2431    /*
2432     * Take as many buffers as configured and pass to the driver. Note, the
2433     * API to the drivers has an array of buffers and if a chain was passed
2434     * we could have just passed the list. If the driver API is updated it
2435     * should be possible to make this change with little effect in this
2436     * code. The array that is passed is broken in design and should be
2437     * removed. Merging members of a struct into the first member is
2438     * trouble waiting to happen.
2439     */
2440    transfer->write_req.status = RTEMS_RESOURCE_IN_USE;
2441    transfer->write_req.bufnum = 0;
2442
2443    while ((node = rtems_chain_get_unprotected(&transfer->bds)) != NULL)
2444    {
2445      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2446      bool                write = false;
2447
2448      /*
2449       * If the device only accepts sequential buffers and this is not the
2450       * first buffer (the first is always sequential, and the buffer is not
2451       * sequential then put the buffer back on the transfer chain and write
2452       * the committed buffers.
2453       */
2454
2455      if (rtems_bdbuf_tracer)
2456        printf ("bdbuf:swapout write: bd:%" PRIu32 ", bufnum:%" PRIu32 " mode:%s\n",
2457                bd->block, transfer->write_req.bufnum,
2458                need_continuous_blocks ? "MULTI" : "SCAT");
2459
2460      if (need_continuous_blocks && transfer->write_req.bufnum &&
2461          bd->block != last_block + media_blocks_per_block)
2462      {
2463        rtems_chain_prepend_unprotected (&transfer->bds, &bd->link);
2464        write = true;
2465      }
2466      else
2467      {
2468        rtems_blkdev_sg_buffer* buf;
2469        buf = &transfer->write_req.bufs[transfer->write_req.bufnum];
2470        transfer->write_req.bufnum++;
2471        buf->user   = bd;
2472        buf->block  = bd->block;
2473        buf->length = dd->block_size;
2474        buf->buffer = bd->buffer;
2475        last_block  = bd->block;
2476      }
2477
2478      /*
2479       * Perform the transfer if there are no more buffers, or the transfer
2480       * size has reached the configured max. value.
2481       */
2482
2483      if (rtems_chain_is_empty (&transfer->bds) ||
2484          (transfer->write_req.bufnum >= bdbuf_config.max_write_blocks))
2485        write = true;
2486
2487      if (write)
2488      {
2489        rtems_bdbuf_execute_transfer_request (dd, &transfer->write_req, false);
2490
2491        transfer->write_req.status = RTEMS_RESOURCE_IN_USE;
2492        transfer->write_req.bufnum = 0;
2493      }
2494    }
2495
2496    /*
2497     * If sync'ing and the deivce is capability of handling a sync IO control
2498     * call perform the call.
2499     */
2500    if (transfer->syncing &&
2501        (dd->phys_dev->capabilities & RTEMS_BLKDEV_CAP_SYNC))
2502    {
2503      /* int result = */ dd->ioctl (dd->phys_dev, RTEMS_BLKDEV_REQ_SYNC, NULL);
2504      /* How should the error be handled ? */
2505    }
2506  }
2507}
2508
2509/**
2510 * Process the modified list of buffers. There is a sync or modified list that
2511 * needs to be handled so we have a common function to do the work.
2512 *
2513 * @param dd_ptr Pointer to the device to handle. If BDBUF_INVALID_DEV no
2514 * device is selected so select the device of the first buffer to be written to
2515 * disk.
2516 * @param chain The modified chain to process.
2517 * @param transfer The chain to append buffers to be written too.
2518 * @param sync_active If true this is a sync operation so expire all timers.
2519 * @param update_timers If true update the timers.
2520 * @param timer_delta It update_timers is true update the timers by this
2521 *                    amount.
2522 */
2523static void
2524rtems_bdbuf_swapout_modified_processing (rtems_disk_device  **dd_ptr,
2525                                         rtems_chain_control* chain,
2526                                         rtems_chain_control* transfer,
2527                                         bool                 sync_active,
2528                                         bool                 update_timers,
2529                                         uint32_t             timer_delta)
2530{
2531  if (!rtems_chain_is_empty (chain))
2532  {
2533    rtems_chain_node* node = rtems_chain_head (chain);
2534    bool              sync_all;
2535
2536    node = node->next;
2537
2538    /*
2539     * A sync active with no valid dev means sync all.
2540     */
2541    if (sync_active && (*dd_ptr == BDBUF_INVALID_DEV))
2542      sync_all = true;
2543    else
2544      sync_all = false;
2545
2546    while (!rtems_chain_is_tail (chain, node))
2547    {
2548      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
2549
2550      /*
2551       * Check if the buffer's hold timer has reached 0. If a sync is active
2552       * or someone waits for a buffer written force all the timers to 0.
2553       *
2554       * @note Lots of sync requests will skew this timer. It should be based
2555       *       on TOD to be accurate. Does it matter ?
2556       */
2557      if (sync_all || (sync_active && (*dd_ptr == bd->dd))
2558          || rtems_bdbuf_has_buffer_waiters ())
2559        bd->hold_timer = 0;
2560
2561      if (bd->hold_timer)
2562      {
2563        if (update_timers)
2564        {
2565          if (bd->hold_timer > timer_delta)
2566            bd->hold_timer -= timer_delta;
2567          else
2568            bd->hold_timer = 0;
2569        }
2570
2571        if (bd->hold_timer)
2572        {
2573          node = node->next;
2574          continue;
2575        }
2576      }
2577
2578      /*
2579       * This assumes we can set it to BDBUF_INVALID_DEV which is just an
2580       * assumption. Cannot use the transfer list being empty the sync dev
2581       * calls sets the dev to use.
2582       */
2583      if (*dd_ptr == BDBUF_INVALID_DEV)
2584        *dd_ptr = bd->dd;
2585
2586      if (bd->dd == *dd_ptr)
2587      {
2588        rtems_chain_node* next_node = node->next;
2589        rtems_chain_node* tnode = rtems_chain_tail (transfer);
2590
2591        /*
2592         * The blocks on the transfer list are sorted in block order. This
2593         * means multi-block transfers for drivers that require consecutive
2594         * blocks perform better with sorted blocks and for real disks it may
2595         * help lower head movement.
2596         */
2597
2598        rtems_bdbuf_set_state (bd, RTEMS_BDBUF_STATE_TRANSFER);
2599
2600        rtems_chain_extract_unprotected (node);
2601
2602        tnode = tnode->previous;
2603
2604        while (node && !rtems_chain_is_head (transfer, tnode))
2605        {
2606          rtems_bdbuf_buffer* tbd = (rtems_bdbuf_buffer*) tnode;
2607
2608          if (bd->block > tbd->block)
2609          {
2610            rtems_chain_insert_unprotected (tnode, node);
2611            node = NULL;
2612          }
2613          else
2614            tnode = tnode->previous;
2615        }
2616
2617        if (node)
2618          rtems_chain_prepend_unprotected (transfer, node);
2619
2620        node = next_node;
2621      }
2622      else
2623      {
2624        node = node->next;
2625      }
2626    }
2627  }
2628}
2629
2630/**
2631 * Process the cache's modified buffers. Check the sync list first then the
2632 * modified list extracting the buffers suitable to be written to disk. We have
2633 * a device at a time. The task level loop will repeat this operation while
2634 * there are buffers to be written. If the transfer fails place the buffers
2635 * back on the modified list and try again later. The cache is unlocked while
2636 * the buffers are being written to disk.
2637 *
2638 * @param timer_delta It update_timers is true update the timers by this
2639 *                    amount.
2640 * @param update_timers If true update the timers.
2641 * @param transfer The transfer transaction data.
2642 *
2643 * @retval true Buffers where written to disk so scan again.
2644 * @retval false No buffers where written to disk.
2645 */
2646static bool
2647rtems_bdbuf_swapout_processing (unsigned long                 timer_delta,
2648                                bool                          update_timers,
2649                                rtems_bdbuf_swapout_transfer* transfer)
2650{
2651  rtems_bdbuf_swapout_worker* worker;
2652  bool                        transfered_buffers = false;
2653
2654  rtems_bdbuf_lock_cache ();
2655
2656  /*
2657   * If a sync is active do not use a worker because the current code does not
2658   * cleaning up after. We need to know the buffers have been written when
2659   * syncing to release sync lock and currently worker threads do not return to
2660   * here. We do not know the worker is the last in a sequence of sync writes
2661   * until after we have it running so we do not know to tell it to release the
2662   * lock. The simplest solution is to get the main swap out task perform all
2663   * sync operations.
2664   */
2665  if (bdbuf_cache.sync_active)
2666    worker = NULL;
2667  else
2668  {
2669    worker = (rtems_bdbuf_swapout_worker*)
2670      rtems_chain_get_unprotected (&bdbuf_cache.swapout_free_workers);
2671    if (worker)
2672      transfer = &worker->transfer;
2673  }
2674
2675  rtems_chain_initialize_empty (&transfer->bds);
2676  transfer->dd = BDBUF_INVALID_DEV;
2677  transfer->syncing = bdbuf_cache.sync_active;
2678
2679  /*
2680   * When the sync is for a device limit the sync to that device. If the sync
2681   * is for a buffer handle process the devices in the order on the sync
2682   * list. This means the dev is BDBUF_INVALID_DEV.
2683   */
2684  if (bdbuf_cache.sync_active)
2685    transfer->dd = bdbuf_cache.sync_device;
2686
2687  /*
2688   * If we have any buffers in the sync queue move them to the modified
2689   * list. The first sync buffer will select the device we use.
2690   */
2691  rtems_bdbuf_swapout_modified_processing (&transfer->dd,
2692                                           &bdbuf_cache.sync,
2693                                           &transfer->bds,
2694                                           true, false,
2695                                           timer_delta);
2696
2697  /*
2698   * Process the cache's modified list.
2699   */
2700  rtems_bdbuf_swapout_modified_processing (&transfer->dd,
2701                                           &bdbuf_cache.modified,
2702                                           &transfer->bds,
2703                                           bdbuf_cache.sync_active,
2704                                           update_timers,
2705                                           timer_delta);
2706
2707  /*
2708   * We have all the buffers that have been modified for this device so the
2709   * cache can be unlocked because the state of each buffer has been set to
2710   * TRANSFER.
2711   */
2712  rtems_bdbuf_unlock_cache ();
2713
2714  /*
2715   * If there are buffers to transfer to the media transfer them.
2716   */
2717  if (!rtems_chain_is_empty (&transfer->bds))
2718  {
2719    if (worker)
2720    {
2721      rtems_status_code sc = rtems_event_send (worker->id,
2722                                               RTEMS_BDBUF_SWAPOUT_SYNC);
2723      if (sc != RTEMS_SUCCESSFUL)
2724        rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_SO_WAKE_2);
2725    }
2726    else
2727    {
2728      rtems_bdbuf_swapout_write (transfer);
2729    }
2730
2731    transfered_buffers = true;
2732  }
2733
2734  if (bdbuf_cache.sync_active && !transfered_buffers)
2735  {
2736    rtems_id sync_requester;
2737    rtems_bdbuf_lock_cache ();
2738    sync_requester = bdbuf_cache.sync_requester;
2739    bdbuf_cache.sync_active = false;
2740    bdbuf_cache.sync_requester = 0;
2741    rtems_bdbuf_unlock_cache ();
2742    if (sync_requester)
2743      rtems_event_transient_send (sync_requester);
2744  }
2745
2746  return transfered_buffers;
2747}
2748
2749/**
2750 * The swapout worker thread body.
2751 *
2752 * @param arg A pointer to the worker thread's private data.
2753 * @return rtems_task Not used.
2754 */
2755static rtems_task
2756rtems_bdbuf_swapout_worker_task (rtems_task_argument arg)
2757{
2758  rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) arg;
2759
2760  while (worker->enabled)
2761  {
2762    rtems_bdbuf_wait_for_event (RTEMS_BDBUF_SWAPOUT_SYNC);
2763
2764    rtems_bdbuf_swapout_write (&worker->transfer);
2765
2766    rtems_bdbuf_lock_cache ();
2767
2768    rtems_chain_initialize_empty (&worker->transfer.bds);
2769    worker->transfer.dd = BDBUF_INVALID_DEV;
2770
2771    rtems_chain_append_unprotected (&bdbuf_cache.swapout_free_workers, &worker->link);
2772
2773    rtems_bdbuf_unlock_cache ();
2774  }
2775
2776  free (worker);
2777
2778  rtems_task_delete (RTEMS_SELF);
2779}
2780
2781/**
2782 * Close the swapout worker threads.
2783 */
2784static void
2785rtems_bdbuf_swapout_workers_close (void)
2786{
2787  rtems_chain_node* node;
2788
2789  rtems_bdbuf_lock_cache ();
2790
2791  node = rtems_chain_first (&bdbuf_cache.swapout_free_workers);
2792  while (!rtems_chain_is_tail (&bdbuf_cache.swapout_free_workers, node))
2793  {
2794    rtems_bdbuf_swapout_worker* worker = (rtems_bdbuf_swapout_worker*) node;
2795    worker->enabled = false;
2796    rtems_event_send (worker->id, RTEMS_BDBUF_SWAPOUT_SYNC);
2797    node = rtems_chain_next (node);
2798  }
2799
2800  rtems_bdbuf_unlock_cache ();
2801}
2802
2803/**
2804 * Body of task which takes care on flushing modified buffers to the disk.
2805 *
2806 * @param arg A pointer to the global cache data. Use the global variable and
2807 *            not this.
2808 * @return rtems_task Not used.
2809 */
2810static rtems_task
2811rtems_bdbuf_swapout_task (rtems_task_argument arg)
2812{
2813  rtems_bdbuf_swapout_transfer* transfer = (rtems_bdbuf_swapout_transfer *) arg;
2814  uint32_t                      period_in_ticks;
2815  const uint32_t                period_in_msecs = bdbuf_config.swapout_period;
2816  uint32_t                      timer_delta;
2817
2818  /*
2819   * Localise the period.
2820   */
2821  period_in_ticks = RTEMS_MICROSECONDS_TO_TICKS (period_in_msecs * 1000);
2822
2823  /*
2824   * This is temporary. Needs to be changed to use the real time clock.
2825   */
2826  timer_delta = period_in_msecs;
2827
2828  while (bdbuf_cache.swapout_enabled)
2829  {
2830    rtems_event_set   out;
2831    rtems_status_code sc;
2832
2833    /*
2834     * Only update the timers once in the processing cycle.
2835     */
2836    bool update_timers = true;
2837
2838    /*
2839     * If we write buffers to any disk perform a check again. We only write a
2840     * single device at a time and the cache may have more than one device's
2841     * buffers modified waiting to be written.
2842     */
2843    bool transfered_buffers;
2844
2845    do
2846    {
2847      transfered_buffers = false;
2848
2849      /*
2850       * Extact all the buffers we find for a specific device. The device is
2851       * the first one we find on a modified list. Process the sync queue of
2852       * buffers first.
2853       */
2854      if (rtems_bdbuf_swapout_processing (timer_delta,
2855                                          update_timers,
2856                                          transfer))
2857      {
2858        transfered_buffers = true;
2859      }
2860
2861      /*
2862       * Only update the timers once.
2863       */
2864      update_timers = false;
2865    }
2866    while (transfered_buffers);
2867
2868    sc = rtems_event_receive (RTEMS_BDBUF_SWAPOUT_SYNC,
2869                              RTEMS_EVENT_ALL | RTEMS_WAIT,
2870                              period_in_ticks,
2871                              &out);
2872
2873    if ((sc != RTEMS_SUCCESSFUL) && (sc != RTEMS_TIMEOUT))
2874      rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_SWAPOUT_RE);
2875  }
2876
2877  rtems_bdbuf_swapout_workers_close ();
2878
2879  free (transfer);
2880
2881  rtems_task_delete (RTEMS_SELF);
2882}
2883
2884static void
2885rtems_bdbuf_purge_list (rtems_chain_control *purge_list)
2886{
2887  bool wake_buffer_waiters = false;
2888  rtems_chain_node *node = NULL;
2889
2890  while ((node = rtems_chain_get_unprotected (purge_list)) != NULL)
2891  {
2892    rtems_bdbuf_buffer *bd = (rtems_bdbuf_buffer *) node;
2893
2894    if (bd->waiters == 0)
2895      wake_buffer_waiters = true;
2896
2897    rtems_bdbuf_discard_buffer (bd);
2898  }
2899
2900  if (wake_buffer_waiters)
2901    rtems_bdbuf_wake (&bdbuf_cache.buffer_waiters);
2902}
2903
2904static void
2905rtems_bdbuf_gather_for_purge (rtems_chain_control *purge_list,
2906                              const rtems_disk_device *dd)
2907{
2908  rtems_bdbuf_buffer *stack [RTEMS_BDBUF_AVL_MAX_HEIGHT];
2909  rtems_bdbuf_buffer **prev = stack;
2910  rtems_bdbuf_buffer *cur = bdbuf_cache.tree;
2911
2912  *prev = NULL;
2913
2914  while (cur != NULL)
2915  {
2916    if (cur->dd == dd)
2917    {
2918      switch (cur->state)
2919      {
2920        case RTEMS_BDBUF_STATE_FREE:
2921        case RTEMS_BDBUF_STATE_EMPTY:
2922        case RTEMS_BDBUF_STATE_ACCESS_PURGED:
2923        case RTEMS_BDBUF_STATE_TRANSFER_PURGED:
2924          break;
2925        case RTEMS_BDBUF_STATE_SYNC:
2926          rtems_bdbuf_wake (&bdbuf_cache.transfer_waiters);
2927          /* Fall through */
2928        case RTEMS_BDBUF_STATE_MODIFIED:
2929          rtems_bdbuf_group_release (cur);
2930          /* Fall through */
2931        case RTEMS_BDBUF_STATE_CACHED:
2932          rtems_chain_extract_unprotected (&cur->link);
2933          rtems_chain_append_unprotected (purge_list, &cur->link);
2934          break;
2935        case RTEMS_BDBUF_STATE_TRANSFER:
2936          rtems_bdbuf_set_state (cur, RTEMS_BDBUF_STATE_TRANSFER_PURGED);
2937          break;
2938        case RTEMS_BDBUF_STATE_ACCESS_CACHED:
2939        case RTEMS_BDBUF_STATE_ACCESS_EMPTY:
2940        case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
2941          rtems_bdbuf_set_state (cur, RTEMS_BDBUF_STATE_ACCESS_PURGED);
2942          break;
2943        default:
2944          rtems_bdbuf_fatal (RTEMS_BDBUF_FATAL_STATE_11);
2945      }
2946    }
2947
2948    if (cur->avl.left != NULL)
2949    {
2950      /* Left */
2951      ++prev;
2952      *prev = cur;
2953      cur = cur->avl.left;
2954    }
2955    else if (cur->avl.right != NULL)
2956    {
2957      /* Right */
2958      ++prev;
2959      *prev = cur;
2960      cur = cur->avl.right;
2961    }
2962    else
2963    {
2964      while (*prev != NULL
2965             && (cur == (*prev)->avl.right || (*prev)->avl.right == NULL))
2966      {
2967        /* Up */
2968        cur = *prev;
2969        --prev;
2970      }
2971      if (*prev != NULL)
2972        /* Right */
2973        cur = (*prev)->avl.right;
2974      else
2975        /* Finished */
2976        cur = NULL;
2977    }
2978  }
2979}
2980
2981void
2982rtems_bdbuf_purge_dev (rtems_disk_device *dd)
2983{
2984  rtems_chain_control purge_list;
2985
2986  rtems_chain_initialize_empty (&purge_list);
2987  rtems_bdbuf_lock_cache ();
2988  rtems_bdbuf_read_ahead_reset (dd);
2989  rtems_bdbuf_gather_for_purge (&purge_list, dd);
2990  rtems_bdbuf_purge_list (&purge_list);
2991  rtems_bdbuf_unlock_cache ();
2992}
2993
2994rtems_status_code
2995rtems_bdbuf_set_block_size (rtems_disk_device *dd,
2996                            uint32_t           block_size,
2997                            bool               sync)
2998{
2999  rtems_status_code sc = RTEMS_SUCCESSFUL;
3000
3001  /*
3002   * We do not care about the synchronization status since we will purge the
3003   * device later.
3004   */
3005  if (sync)
3006    rtems_bdbuf_syncdev (dd);
3007
3008  rtems_bdbuf_lock_cache ();
3009
3010  if (block_size > 0)
3011  {
3012    size_t bds_per_group = rtems_bdbuf_bds_per_group (block_size);
3013
3014    if (bds_per_group != 0)
3015    {
3016      int block_to_media_block_shift = 0;
3017      uint32_t media_blocks_per_block = block_size / dd->media_block_size;
3018      uint32_t one = 1;
3019
3020      while ((one << block_to_media_block_shift) < media_blocks_per_block)
3021      {
3022        ++block_to_media_block_shift;
3023      }
3024
3025      if ((dd->media_block_size << block_to_media_block_shift) != block_size)
3026        block_to_media_block_shift = -1;
3027
3028      dd->block_size = block_size;
3029      dd->block_count = dd->size / media_blocks_per_block;
3030      dd->media_blocks_per_block = media_blocks_per_block;
3031      dd->block_to_media_block_shift = block_to_media_block_shift;
3032      dd->bds_per_group = bds_per_group;
3033
3034      rtems_bdbuf_purge_dev (dd);
3035    }
3036    else
3037    {
3038      sc = RTEMS_INVALID_NUMBER;
3039    }
3040  }
3041  else
3042  {
3043    sc = RTEMS_INVALID_NUMBER;
3044  }
3045
3046  rtems_bdbuf_unlock_cache ();
3047
3048  return sc;
3049}
3050
3051static rtems_task
3052rtems_bdbuf_read_ahead_task (rtems_task_argument arg)
3053{
3054  rtems_chain_control *chain = &bdbuf_cache.read_ahead_chain;
3055
3056  while (bdbuf_cache.read_ahead_enabled)
3057  {
3058    rtems_chain_node *node;
3059
3060    rtems_bdbuf_wait_for_event (RTEMS_BDBUF_READ_AHEAD_WAKE_UP);
3061    rtems_bdbuf_lock_cache ();
3062
3063    while ((node = rtems_chain_get_unprotected (chain)) != NULL)
3064    {
3065      rtems_disk_device *dd = (rtems_disk_device *)
3066        ((char *) node - offsetof (rtems_disk_device, read_ahead.node));
3067      rtems_blkdev_bnum block = dd->read_ahead.next;
3068      rtems_blkdev_bnum media_block = 0;
3069      rtems_status_code sc =
3070        rtems_bdbuf_get_media_block (dd, block, &media_block);
3071
3072      rtems_chain_set_off_chain (&dd->read_ahead.node);
3073
3074      if (sc == RTEMS_SUCCESSFUL)
3075      {
3076        rtems_bdbuf_buffer *bd =
3077          rtems_bdbuf_get_buffer_for_read_ahead (dd, media_block);
3078
3079        if (bd != NULL)
3080        {
3081          uint32_t transfer_count = dd->block_count - block;
3082          uint32_t max_transfer_count = bdbuf_config.max_read_ahead_blocks;
3083
3084          if (transfer_count >= max_transfer_count)
3085          {
3086            transfer_count = max_transfer_count;
3087            dd->read_ahead.trigger = block + transfer_count / 2;
3088            dd->read_ahead.next = block + transfer_count;
3089          }
3090          else
3091          {
3092            dd->read_ahead.trigger = RTEMS_DISK_READ_AHEAD_NO_TRIGGER;
3093          }
3094
3095          ++dd->stats.read_ahead_transfers;
3096          rtems_bdbuf_execute_read_request (dd, bd, transfer_count);
3097        }
3098      }
3099      else
3100      {
3101        dd->read_ahead.trigger = RTEMS_DISK_READ_AHEAD_NO_TRIGGER;
3102      }
3103    }
3104
3105    rtems_bdbuf_unlock_cache ();
3106  }
3107
3108  rtems_task_delete (RTEMS_SELF);
3109}
3110
3111void rtems_bdbuf_get_device_stats (const rtems_disk_device *dd,
3112                                   rtems_blkdev_stats      *stats)
3113{
3114  rtems_bdbuf_lock_cache ();
3115  *stats = dd->stats;
3116  rtems_bdbuf_unlock_cache ();
3117}
3118
3119void rtems_bdbuf_reset_device_stats (rtems_disk_device *dd)
3120{
3121  rtems_bdbuf_lock_cache ();
3122  memset (&dd->stats, 0, sizeof(dd->stats));
3123  rtems_bdbuf_unlock_cache ();
3124}
Note: See TracBrowser for help on using the repository browser.