source: rtems/cpukit/libblock/src/bdbuf.c @ 4670d91

4.104.115
Last change on this file since 4670d91 was 4670d91, checked in by Joel Sherrill <joel.sherrill@…>, on 05/15/09 at 12:52:12

2009-05-15 Sebastian Huber <sebastian.huber@…>

  • Doxygen.in: Fixed project name. Added project number. Enabled auto brief. Disabled include graphs.
  • include/rtems/irq-extension.h, libblock/include/rtems/bdpart.h, libblock/include/rtems/bdbuf.h, libblock/include/rtems/bdpart.h, libblock/include/rtems/blkdev.h, libblock/include/rtems/diskdevs.h, libblock/include/rtems/ramdisk.h, libblock/src/bdbuf.c, libblock/src/blkdev.c, libblock/src/diskdevs.c, libblock/src/ramdisk.c: Documentation.
  • libblock/src/bdpart.c: Documentation. Fixed NULL pointer access.
  • Property mode set to 100644
File size: 62.5 KB
RevLine 
[57aa979]1/**
2 * @file
3 *
[4670d91]4 * @ingroup rtems_bdbuf
5 *
[57aa979]6 * Block device buffer management.
7 */
8
[e51bd96]9/*
10 * Disk I/O buffering
11 * Buffer managment
12 *
13 * Copyright (C) 2001 OKTET Ltd., St.-Peterburg, Russia
14 * Author: Andrey G. Ivanov <Andrey.Ivanov@oktet.ru>
15 *         Victor V. Vengerov <vvv@oktet.ru>
[df6348bb]16 *         Alexander Kukuta <kam@oktet.ru>
[e51bd96]17 *
[c21c850e]18 * Copyright (C) 2008 Chris Johns <chrisj@rtems.org>
19 *    Rewritten to remove score mutex access. Fixes many performance
20 *    issues.
21 *
[3d14a45]22 * @(#) bdbuf.c,v 1.14 2004/04/17 08:15:17 ralf Exp
[e51bd96]23 */
24
[3899a537]25/**
26 * Set to 1 to enable debug tracing.
27 */
28#define RTEMS_BDBUF_TRACE 0
29
[006fa1ef]30#if HAVE_CONFIG_H
31#include "config.h"
32#endif
33
[e51bd96]34#include <rtems.h>
[3899a537]35#include <rtems/error.h>
[57aa979]36#include <rtems/malloc.h>
[e51bd96]37#include <limits.h>
38#include <errno.h>
39#include <assert.h>
40
[3899a537]41#if RTEMS_BDBUF_TRACE
42#include <stdio.h>
[0ebfac19]43#endif
44
[3899a537]45#include "rtems/bdbuf.h"
[e51bd96]46
[3899a537]47/**
48 * The BD buffer context.
[0ebfac19]49 */
[3899a537]50typedef struct rtems_bdbuf_context {
[c21c850e]51  rtems_bdbuf_pool* pool;      /*< Table of buffer pools */
52  int               npools;    /*< Number of entries in pool table */
53  rtems_id          swapout;   /*< Swapout task ID */
[4f971343]54  bool              swapout_enabled;
[3899a537]55} rtems_bdbuf_context;
56
57/**
58 * Fatal errors
[0ebfac19]59 */
[c21c850e]60#define RTEMS_BLKDEV_FATAL_ERROR(n) \
61  (((uint32_t)'B' << 24) | ((uint32_t)(n) & (uint32_t)0x00FFFFFF))
[3899a537]62
63#define RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY RTEMS_BLKDEV_FATAL_ERROR(1)
64#define RTEMS_BLKDEV_FATAL_BDBUF_SWAPOUT     RTEMS_BLKDEV_FATAL_ERROR(2)
65#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK   RTEMS_BLKDEV_FATAL_ERROR(3)
66#define RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK RTEMS_BLKDEV_FATAL_ERROR(4)
67#define RTEMS_BLKDEV_FATAL_BDBUF_POOL_LOCK   RTEMS_BLKDEV_FATAL_ERROR(5)
68#define RTEMS_BLKDEV_FATAL_BDBUF_POOL_UNLOCK RTEMS_BLKDEV_FATAL_ERROR(6)
69#define RTEMS_BLKDEV_FATAL_BDBUF_POOL_WAIT   RTEMS_BLKDEV_FATAL_ERROR(7)
70#define RTEMS_BLKDEV_FATAL_BDBUF_POOL_WAKE   RTEMS_BLKDEV_FATAL_ERROR(8)
71#define RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE     RTEMS_BLKDEV_FATAL_ERROR(9)
72#define RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM    RTEMS_BLKDEV_FATAL_ERROR(10)
73#define BLKDEV_FATAL_BDBUF_SWAPOUT_RE        RTEMS_BLKDEV_FATAL_ERROR(11)
74#define BLKDEV_FATAL_BDBUF_SWAPOUT_TS        RTEMS_BLKDEV_FATAL_ERROR(12)
75
[c21c850e]76/**
77 * The events used in this code. These should be system events rather than
78 * application events.
79 */
[3899a537]80#define RTEMS_BDBUF_TRANSFER_SYNC  RTEMS_EVENT_1
81#define RTEMS_BDBUF_SWAPOUT_SYNC   RTEMS_EVENT_2
82
[c21c850e]83/**
84 * The swap out task size. Should be more than enough for most drivers with
85 * tracing turned on.
86 */
[3899a537]87#define SWAPOUT_TASK_STACK_SIZE (8 * 1024)
88
89/**
90 * Lock semaphore attributes. This is used for locking type mutexes.
[c21c850e]91 *
92 * @warning Priority inheritance is on.
[3899a537]93 */
94#define RTEMS_BDBUF_POOL_LOCK_ATTRIBS \
95  (RTEMS_PRIORITY | RTEMS_BINARY_SEMAPHORE | \
96   RTEMS_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
[048dcd2b]97
[3899a537]98/**
99 * Waiter semaphore attributes.
100 *
[c21c850e]101 * @warning Do not configure as inherit priority. If a driver is in the driver
102 *          initialisation table this locked semaphore will have the IDLE task
103 *          as the holder and a blocking task will raise the priority of the
104 *          IDLE task which can cause unsual side effects.
[3899a537]105 */
106#define RTEMS_BDBUF_POOL_WAITER_ATTRIBS \
[c21c850e]107  (RTEMS_PRIORITY | RTEMS_SIMPLE_BINARY_SEMAPHORE | \
[3899a537]108   RTEMS_NO_INHERIT_PRIORITY | RTEMS_NO_PRIORITY_CEILING | RTEMS_LOCAL)
[e51bd96]109
[3899a537]110/*
111 * The swap out task.
112 */
113static rtems_task rtems_bdbuf_swapout_task(rtems_task_argument arg);
[048dcd2b]114
[3899a537]115/**
116 * The context of buffering layer.
117 */
118static rtems_bdbuf_context rtems_bdbuf_ctx;
[e51bd96]119
[3899a537]120/**
121 * Print a message to the bdbuf trace output and flush it.
122 *
123 * @param format The format string. See printf for details.
124 * @param ... The arguments for the format text.
125 * @return int The number of bytes written to the output.
126 */
127#if RTEMS_BDBUF_TRACE
[4f971343]128bool rtems_bdbuf_tracer;
[3899a537]129static void
130rtems_bdbuf_printf (const char *format, ...)
131{
132  va_list args;
133  va_start (args, format);
134  if (rtems_bdbuf_tracer)
135  {
136    fprintf (stdout, "bdbuf:");
137    vfprintf (stdout, format, args);
138    fflush (stdout);
139  }
140}
[e51bd96]141#endif
142
[3899a537]143/**
144 * The default maximum height of 32 allows for AVL trees having between
145 * 5,704,880 and 4,294,967,295 nodes, depending on order of insertion.  You may
146 * change this compile-time constant as you wish.
147 */
148#ifndef RTEMS_BDBUF_AVL_MAX_HEIGHT
149#define RTEMS_BDBUF_AVL_MAX_HEIGHT (32)
[e51bd96]150#endif
151
[3899a537]152/**
153 * Searches for the node with specified dev/block.
[e51bd96]154 *
[3899a537]155 * @param root pointer to the root node of the AVL-Tree
156 * @param dev device search key
157 * @param block block search key
158 * @retval NULL node with the specified dev/block is not found
159 * @return pointer to the node with specified dev/block
[e51bd96]160 */
[3899a537]161static rtems_bdbuf_buffer *
162rtems_bdbuf_avl_search (rtems_bdbuf_buffer** root,
163                        dev_t                dev,
164                        rtems_blkdev_bnum    block)
[e51bd96]165{
[3899a537]166  rtems_bdbuf_buffer* p = *root;
[df6348bb]167
[3899a537]168  while ((p != NULL) && ((p->dev != dev) || (p->block != block)))
169  {
170    if ((p->dev < dev) || ((p->dev == dev) && (p->block < block)))
[e51bd96]171    {
[3899a537]172      p = p->avl.right;
173    }
174    else
175    {
176      p = p->avl.left;
[e51bd96]177    }
[3899a537]178  }
[048dcd2b]179
[3899a537]180  return p;
[e51bd96]181}
182
[3899a537]183/**
184 * Inserts the specified node to the AVl-Tree.
[e51bd96]185 *
[3899a537]186 * @param root pointer to the root node of the AVL-Tree
187 * @param node Pointer to the node to add.
188 * @retval 0 The node added successfully
189 * @retval -1 An error occured
[e51bd96]190 */
191static int
[3899a537]192rtems_bdbuf_avl_insert(rtems_bdbuf_buffer** root,
193                       rtems_bdbuf_buffer*  node)
[e51bd96]194{
[3899a537]195  dev_t             dev = node->dev;
196  rtems_blkdev_bnum block = node->block;
197
198  rtems_bdbuf_buffer*  p = *root;
199  rtems_bdbuf_buffer*  q;
200  rtems_bdbuf_buffer*  p1;
201  rtems_bdbuf_buffer*  p2;
202  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
203  rtems_bdbuf_buffer** buf_prev = buf_stack;
204
[4f971343]205  bool modified = false;
[3899a537]206
207  if (p == NULL)
208  {
209    *root = node;
210    node->avl.left = NULL;
211    node->avl.right = NULL;
212    node->avl.bal = 0;
213    return 0;
214  }
[e51bd96]215
[3899a537]216  while (p != NULL)
217  {
218    *buf_prev++ = p;
[e51bd96]219
[3899a537]220    if ((p->dev < dev) || ((p->dev == dev) && (p->block < block)))
[e51bd96]221    {
[3899a537]222      p->avl.cache = 1;
223      q = p->avl.right;
224      if (q == NULL)
225      {
226        q = node;
227        p->avl.right = q = node;
228        break;
229      }
[e51bd96]230    }
[3899a537]231    else if ((p->dev != dev) || (p->block != block))
[e51bd96]232    {
[3899a537]233      p->avl.cache = -1;
234      q = p->avl.left;
235      if (q == NULL)
236      {
237        q = node;
238        p->avl.left = q;
239        break;
240      }
241    }
242    else
243    {
244      return -1;
[e51bd96]245    }
246
[3899a537]247    p = q;
248  }
[e51bd96]249
[3899a537]250  q->avl.left = q->avl.right = NULL;
251  q->avl.bal = 0;
[4f971343]252  modified = true;
[3899a537]253  buf_prev--;
[e51bd96]254
[3899a537]255  while (modified)
256  {
257    if (p->avl.cache == -1)
258    {
259      switch (p->avl.bal)
260      {
261        case 1:
262          p->avl.bal = 0;
[4f971343]263          modified = false;
[3899a537]264          break;
265
266        case 0:
267          p->avl.bal = -1;
268          break;
269
270        case -1:
271          p1 = p->avl.left;
272          if (p1->avl.bal == -1) /* simple LL-turn */
273          {
274            p->avl.left = p1->avl.right;
275            p1->avl.right = p;
276            p->avl.bal = 0;
277            p = p1;
278          }
279          else /* double LR-turn */
280          {
281            p2 = p1->avl.right;
282            p1->avl.right = p2->avl.left;
283            p2->avl.left = p1;
284            p->avl.left = p2->avl.right;
285            p2->avl.right = p;
286            if (p2->avl.bal == -1) p->avl.bal = +1; else p->avl.bal = 0;
287            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
288            p = p2;
289          }
290          p->avl.bal = 0;
[4f971343]291          modified = false;
[3899a537]292          break;
293
294        default:
295          break;
296      }
297    }
298    else
299    {
300      switch (p->avl.bal)
301      {
302        case -1:
303          p->avl.bal = 0;
[4f971343]304          modified = false;
[3899a537]305          break;
306
307        case 0:
308          p->avl.bal = 1;
309          break;
310
311        case 1:
312          p1 = p->avl.right;
313          if (p1->avl.bal == 1) /* simple RR-turn */
314          {
315            p->avl.right = p1->avl.left;
316            p1->avl.left = p;
317            p->avl.bal = 0;
318            p = p1;
319          }
320          else /* double RL-turn */
321          {
322            p2 = p1->avl.left;
323            p1->avl.left = p2->avl.right;
324            p2->avl.right = p1;
325            p->avl.right = p2->avl.left;
326            p2->avl.left = p;
327            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
328            if (p2->avl.bal == -1) p1->avl.bal = +1; else p1->avl.bal = 0;
329            p = p2;
330          }
331          p->avl.bal = 0;
[4f971343]332          modified = false;
[3899a537]333          break;
334
335        default:
336          break;
337      }
338    }
339    q = p;
340    if (buf_prev > buf_stack)
341    {
342      p = *--buf_prev;
343
344      if (p->avl.cache == -1)
345      {
346        p->avl.left = q;
347      }
348      else
349      {
350        p->avl.right = q;
351      }
352    }
353    else
354    {
355      *root = p;
356      break;
357    }
358  };
[e51bd96]359
[3899a537]360  return 0;
[e51bd96]361}
362
363
[3899a537]364/**
365 * Removes the node from the tree.
[e51bd96]366 *
[57aa979]367 * @param root Pointer to pointer to the root node
[3899a537]368 * @param node Pointer to the node to remove
369 * @retval 0 Item removed
370 * @retval -1 No such item found
[e51bd96]371 */
372static int
[3899a537]373rtems_bdbuf_avl_remove(rtems_bdbuf_buffer**      root,
374                       const rtems_bdbuf_buffer* node)
[e51bd96]375{
[3899a537]376  dev_t             dev = node->dev;
377  rtems_blkdev_bnum block = node->block;
[e51bd96]378
[3899a537]379  rtems_bdbuf_buffer*  p = *root;
380  rtems_bdbuf_buffer*  q;
381  rtems_bdbuf_buffer*  r;
382  rtems_bdbuf_buffer*  s;
383  rtems_bdbuf_buffer*  p1;
384  rtems_bdbuf_buffer*  p2;
385  rtems_bdbuf_buffer*  buf_stack[RTEMS_BDBUF_AVL_MAX_HEIGHT];
386  rtems_bdbuf_buffer** buf_prev = buf_stack;
[e51bd96]387
[4f971343]388  bool modified = false;
[e51bd96]389
[3899a537]390  memset (buf_stack, 0, sizeof(buf_stack));
[e51bd96]391
[3899a537]392  while (p != NULL)
393  {
394    *buf_prev++ = p;
[e51bd96]395
[3899a537]396    if ((p->dev < dev) || ((p->dev == dev) && (p->block < block)))
397    {
398      p->avl.cache = 1;
399      p = p->avl.right;
400    }
401    else if ((p->dev != dev) || (p->block != block))
402    {
403      p->avl.cache = -1;
404      p = p->avl.left;
[df6348bb]405    }
[3899a537]406    else
407    {
408      /* node found */
409      break;
410    }
411  }
412
413  if (p == NULL)
414  {
415    /* there is no such node */
416    return -1;
417  }
[048dcd2b]418
[3899a537]419  q = p;
420
421  buf_prev--;
422  if (buf_prev > buf_stack)
423  {
424    p = *(buf_prev - 1);
425  }
426  else
427  {
428    p = NULL;
429  }
430
431  /* at this moment q - is a node to delete, p is q's parent */
432  if (q->avl.right == NULL)
433  {
434    r = q->avl.left;
435    if (r != NULL)
[df6348bb]436    {
[3899a537]437      r->avl.bal = 0;
[e51bd96]438    }
[3899a537]439    q = r;
440  }
441  else
442  {
443    rtems_bdbuf_buffer **t;
[e51bd96]444
[3899a537]445    r = q->avl.right;
[df6348bb]446
[3899a537]447    if (r->avl.left == NULL)
[e51bd96]448    {
[3899a537]449      r->avl.left = q->avl.left;
450      r->avl.bal = q->avl.bal;
451      r->avl.cache = 1;
452      *buf_prev++ = q = r;
[e51bd96]453    }
454    else
455    {
[3899a537]456      t = buf_prev++;
457      s = r;
458
459      while (s->avl.left != NULL)
460      {
461        *buf_prev++ = r = s;
462        s = r->avl.left;
463        r->avl.cache = -1;
464      }
465
466      s->avl.left = q->avl.left;
467      r->avl.left = s->avl.right;
468      s->avl.right = q->avl.right;
469      s->avl.bal = q->avl.bal;
470      s->avl.cache = 1;
471
472      *t = q = s;
[e51bd96]473    }
[3899a537]474  }
[df6348bb]475
[3899a537]476  if (p != NULL)
477  {
478    if (p->avl.cache == -1)
[e51bd96]479    {
[3899a537]480      p->avl.left = q;
[e51bd96]481    }
482    else
483    {
[3899a537]484      p->avl.right = q;
[e51bd96]485    }
[3899a537]486  }
487  else
488  {
489    *root = q;
490  }
[e51bd96]491
[4f971343]492  modified = true;
[3899a537]493
494  while (modified)
495  {
496    if (buf_prev > buf_stack)
[e51bd96]497    {
[3899a537]498      p = *--buf_prev;
[df6348bb]499    }
500    else
501    {
[3899a537]502      break;
[df6348bb]503    }
[e51bd96]504
[3899a537]505    if (p->avl.cache == -1)
[df6348bb]506    {
[3899a537]507      /* rebalance left branch */
508      switch (p->avl.bal)
509      {
510        case -1:
511          p->avl.bal = 0;
512          break;
513        case  0:
514          p->avl.bal = 1;
[4f971343]515          modified = false;
[3899a537]516          break;
517
518        case +1:
519          p1 = p->avl.right;
520
521          if (p1->avl.bal >= 0) /* simple RR-turn */
522          {
523            p->avl.right = p1->avl.left;
524            p1->avl.left = p;
525
526            if (p1->avl.bal == 0)
[e51bd96]527            {
[3899a537]528              p1->avl.bal = -1;
[4f971343]529              modified = false;
[df6348bb]530            }
[3899a537]531            else
[e51bd96]532            {
[3899a537]533              p->avl.bal = 0;
534              p1->avl.bal = 0;
[df6348bb]535            }
[3899a537]536            p = p1;
537          }
538          else /* double RL-turn */
539          {
540            p2 = p1->avl.left;
541
542            p1->avl.left = p2->avl.right;
543            p2->avl.right = p1;
544            p->avl.right = p2->avl.left;
545            p2->avl.left = p;
546
547            if (p2->avl.bal == +1) p->avl.bal = -1; else p->avl.bal = 0;
548            if (p2->avl.bal == -1) p1->avl.bal = 1; else p1->avl.bal = 0;
549
550            p = p2;
551            p2->avl.bal = 0;
552          }
553          break;
554
555        default:
556          break;
557      }
558    }
559    else
560    {
561      /* rebalance right branch */
562      switch (p->avl.bal)
563      {
564        case +1:
565          p->avl.bal = 0;
566          break;
567
568        case  0:
569          p->avl.bal = -1;
[4f971343]570          modified = false;
[3899a537]571          break;
572
573        case -1:
574          p1 = p->avl.left;
575
576          if (p1->avl.bal <= 0) /* simple LL-turn */
577          {
578            p->avl.left = p1->avl.right;
579            p1->avl.right = p;
580            if (p1->avl.bal == 0)
[df6348bb]581            {
[3899a537]582              p1->avl.bal = 1;
[4f971343]583              modified = false;
[df6348bb]584            }
585            else
586            {
[3899a537]587              p->avl.bal = 0;
588              p1->avl.bal = 0;
[df6348bb]589            }
[3899a537]590            p = p1;
591          }
592          else /* double LR-turn */
593          {
594            p2 = p1->avl.right;
595
596            p1->avl.right = p2->avl.left;
597            p2->avl.left = p1;
598            p->avl.left = p2->avl.right;
599            p2->avl.right = p;
600
601            if (p2->avl.bal == -1) p->avl.bal = 1; else p->avl.bal = 0;
602            if (p2->avl.bal == +1) p1->avl.bal = -1; else p1->avl.bal = 0;
603
604            p = p2;
605            p2->avl.bal = 0;
606          }
607          break;
608
609        default:
610          break;
611      }
612    }
[df6348bb]613
[3899a537]614    if (buf_prev > buf_stack)
615    {
616      q = *(buf_prev - 1);
617
618      if (q->avl.cache == -1)
619      {
620        q->avl.left = p;
621      }
622      else
623      {
624        q->avl.right = p;
625      }
626    }
627    else
628    {
629      *root = p;
630      break;
[df6348bb]631    }
[048dcd2b]632
[3899a537]633  }
634
635  return 0;
636}
637
638/**
639 * Get the pool for the device.
640 *
[57aa979]641 * @param pid Physical disk device.
[3899a537]642 */
643static rtems_bdbuf_pool*
644rtems_bdbuf_get_pool (const rtems_bdpool_id pid)
645{
646  return &rtems_bdbuf_ctx.pool[pid];
647}
648
649/**
650 * Lock the pool. A single task can nest calls.
651 *
652 * @param pool The pool to lock.
653 */
654static void
655rtems_bdbuf_lock_pool (rtems_bdbuf_pool* pool)
656{
657  rtems_status_code sc = rtems_semaphore_obtain (pool->lock,
658                                                 RTEMS_WAIT,
659                                                 RTEMS_NO_TIMEOUT);
660  if (sc != RTEMS_SUCCESSFUL)
661    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_POOL_LOCK);
662}
663
664/**
665 * Unlock the pool.
666 *
667 * @param pool The pool to unlock.
668 */
669static void
670rtems_bdbuf_unlock_pool (rtems_bdbuf_pool* pool)
671{
672  rtems_status_code sc = rtems_semaphore_release (pool->lock);
673  if (sc != RTEMS_SUCCESSFUL)
674    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_POOL_UNLOCK);
675}
676
677/**
678 * Lock the pool's sync. A single task can nest calls.
679 *
680 * @param pool The pool's sync to lock.
681 */
682static void
683rtems_bdbuf_lock_sync (rtems_bdbuf_pool* pool)
684{
685  rtems_status_code sc = rtems_semaphore_obtain (pool->sync_lock,
686                                                 RTEMS_WAIT,
687                                                 RTEMS_NO_TIMEOUT);
688  if (sc != RTEMS_SUCCESSFUL)
689    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SYNC_LOCK);
690}
691
692/**
693 * Unlock the pool's sync.
694 *
695 * @param pool The pool's sync to unlock.
696 */
697static void
698rtems_bdbuf_unlock_sync (rtems_bdbuf_pool* pool)
699{
700  rtems_status_code sc = rtems_semaphore_release (pool->sync_lock);
701  if (sc != RTEMS_SUCCESSFUL)
702    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SYNC_UNLOCK);
703}
704
705/**
[c21c850e]706 * Wait until woken. Semaphores are used so a number of tasks can wait and can
707 * be woken at once. Task events would require we maintain a list of tasks to
708 * be woken and this would require storgage and we do not know the number of
709 * tasks that could be waiting.
[3899a537]710 *
711 * While we have the pool locked we can try and claim the semaphore and
712 * therefore know when we release the lock to the pool we will block until the
713 * semaphore is released. This may even happen before we get to block.
714 *
715 * A counter is used to save the release call when no one is waiting.
716 *
[c21c850e]717 * The function assumes the pool is locked on entry and it will be locked on
718 * exit.
[3899a537]719 *
720 * @param pool The pool to wait for a buffer to return.
721 * @param sema The semaphore to block on and wait.
722 * @param waiters The wait counter for this semaphore.
723 */
724static void
725rtems_bdbuf_wait (rtems_bdbuf_pool* pool, rtems_id* sema,
726                  volatile uint32_t* waiters)
727{
728  rtems_status_code sc;
729  rtems_mode        prev_mode;
730 
731  /*
732   * Indicate we are waiting.
733   */
734  *waiters += 1;
735
736  /*
737   * Disable preemption then unlock the pool and block.
738   * There is no POSIX condition variable in the core API so
739   * this is a work around.
740   *
741   * The issue is a task could preempt after the pool is unlocked
742   * because it is blocking or just hits that window, and before
743   * this task has blocked on the semaphore. If the preempting task
744   * flushes the queue this task will not see the flush and may
745   * block for ever or until another transaction flushes this
746   * semaphore.
747   */
748  sc = rtems_task_mode (RTEMS_NO_PREEMPT, RTEMS_PREEMPT_MASK, &prev_mode);
749
750  if (sc != RTEMS_SUCCESSFUL)
751    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_POOL_WAIT);
752 
753  /*
754   * Unlock the pool, wait, and lock the pool when we return.
755   */
756  rtems_bdbuf_unlock_pool (pool);
757
758  sc = rtems_semaphore_obtain (*sema, RTEMS_WAIT, RTEMS_NO_TIMEOUT);
759 
760  if (sc != RTEMS_UNSATISFIED)
761    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_POOL_WAIT);
762 
763  rtems_bdbuf_lock_pool (pool);
764
765  sc = rtems_task_mode (prev_mode, RTEMS_ALL_MODE_MASKS, &prev_mode);
766
767  if (sc != RTEMS_SUCCESSFUL)
768    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_POOL_WAIT);
769 
770  *waiters -= 1;
771}
772
773/**
774 * Wake a blocked resource. The resource has a counter that lets us know if
775 * there are any waiters.
776 *
777 * @param sema The semaphore to release.
778 * @param waiters The wait counter for this semaphore.
779 */
780static void
781rtems_bdbuf_wake (rtems_id sema, volatile uint32_t* waiters)
782{
783  if (*waiters)
784  {
785    rtems_status_code sc;
786
787    sc = rtems_semaphore_flush (sema);
788 
789    if (sc != RTEMS_SUCCESSFUL)
790      rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_POOL_WAKE);
791  }
792}
793
794/**
795 * Add a buffer descriptor to the modified list. This modified list is treated
796 * a litte differently to the other lists. To access it you must have the pool
[c21c850e]797 * locked and this is assumed to be the case on entry to this call.
798 *
799 * If the pool has a device being sync'ed and the bd is for that device the
800 * call must block and wait until the sync is over before adding the bd to the
801 * modified list. Once a sync happens for a device no bd's can be added the
802 * modified list. The disk image is forced to be snapshot at that moment in
803 * time.
804 *
805 * and you must
[3899a537]806 * hold the sync lock. The sync lock is used to block writes while a sync is
807 * active.
808 *
[c21c850e]809 * @param pool The pool the bd belongs to.
810 * @param bd The bd to queue to the pool's modified list.
[3899a537]811 */
812static void
813rtems_bdbuf_append_modified (rtems_bdbuf_pool* pool, rtems_bdbuf_buffer* bd)
814{
815  /*
[c21c850e]816   * If the pool has a device being sync'ed check if this bd is for that
817   * device. If it is unlock the pool and block on the sync lock. once we have
818   * the sync lock reelase it.
819   *
820   * If the
[3899a537]821   */
[c21c850e]822  if (pool->sync_active && (pool->sync_device == bd->dev))
[3899a537]823  {
824    rtems_bdbuf_unlock_pool (pool);
825    rtems_bdbuf_lock_sync (pool);
[c21c850e]826    rtems_bdbuf_unlock_sync (pool);
[3899a537]827    rtems_bdbuf_lock_pool (pool);
828  }
829     
830  bd->state = RTEMS_BDBUF_STATE_MODIFIED;
831
832  rtems_chain_append (&pool->modified, &bd->link);
833}
834
[c21c850e]835/**
836 * Wait the swapper task.
837 */
[3899a537]838static void
[a5fb40cf]839rtems_bdbuf_wake_swapper (void)
[3899a537]840{
841  rtems_status_code sc = rtems_event_send (rtems_bdbuf_ctx.swapout,
842                                           RTEMS_BDBUF_SWAPOUT_SYNC);
843  if (sc != RTEMS_SUCCESSFUL)
844    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_WAKE);
[e51bd96]845}
846
[c21c850e]847/**
848 * Initialize single buffer pool.
[e51bd96]849 *
[c21c850e]850 * @param config Buffer pool configuration
851 * @param pid Pool number
[e51bd96]852 *
[c21c850e]853 * @return RTEMS_SUCCESSFUL, if buffer pool initialized successfully, or error
854 *         code if error occured.
[e51bd96]855 */
856static rtems_status_code
[3899a537]857rtems_bdbuf_initialize_pool (rtems_bdbuf_pool_config* config,
858                             rtems_bdpool_id          pid)
[e51bd96]859{
[57aa979]860  int                 rv = 0;
[3899a537]861  unsigned char*      buffer = config->mem_area;
862  rtems_bdbuf_pool*   pool;
863  rtems_bdbuf_buffer* bd;
864  rtems_status_code   sc;
[cec5c069]865  uint32_t            b;
[57aa979]866  int                 cache_aligment = 32 /* FIXME rtems_cache_get_data_line_size() */;
867
868  /* For unspecified cache alignments we use the CPU alignment */
869  if (cache_aligment <= 0)
870  {
871    cache_aligment = CPU_ALIGNMENT;
872  }
[048dcd2b]873
[3899a537]874  pool = rtems_bdbuf_get_pool (pid);
875 
[c21c850e]876  pool->blksize        = config->size;
877  pool->nblks          = config->num;
878  pool->flags          = 0;
[4f971343]879  pool->sync_active    = false;
[c21c850e]880  pool->sync_device    = -1;
881  pool->sync_requester = 0;
882  pool->tree           = NULL;
883  pool->buffers        = NULL;
[3899a537]884
885  rtems_chain_initialize_empty (&pool->ready);
886  rtems_chain_initialize_empty (&pool->lru);
887  rtems_chain_initialize_empty (&pool->modified);
888  rtems_chain_initialize_empty (&pool->sync);
889
[c21c850e]890  pool->access           = 0;
891  pool->access_waiters   = 0;
892  pool->transfer         = 0;
[3899a537]893  pool->transfer_waiters = 0;
[c21c850e]894  pool->waiting          = 0;
895  pool->wait_waiters     = 0;
[3899a537]896 
897  /*
898   * Allocate memory for buffer descriptors
899   */
900  pool->bds = calloc (config->num, sizeof (rtems_bdbuf_buffer));
901 
902  if (!pool->bds)
903    return RTEMS_NO_MEMORY;
904
905  /*
[57aa979]906   * Allocate memory for buffers if required.  The pool memory will be cache
907   * aligned.  It is possible to free the memory allocated by rtems_memalign()
908   * with free().
[3899a537]909   */
910  if (buffer == NULL)
911  {
[57aa979]912    rv = rtems_memalign ((void **) &buffer,
913                         cache_aligment,
914                         config->num * config->size);
915    if (rv != 0)
[e51bd96]916    {
[3899a537]917      free (pool->bds);
918      return RTEMS_NO_MEMORY;
[e51bd96]919    }
[57aa979]920    pool->buffers = buffer;
[3899a537]921  }
[048dcd2b]922
[3899a537]923  for (b = 0, bd = pool->bds;
924       b < pool->nblks;
925       b++, bd++, buffer += pool->blksize)
926  {
927    bd->dev        = -1;
928    bd->block      = 0;
929    bd->buffer     = buffer;
930    bd->avl.left   = NULL;
931    bd->avl.right  = NULL;
932    bd->state      = RTEMS_BDBUF_STATE_EMPTY;
933    bd->pool       = pid;
934    bd->error      = 0;
935    bd->waiters    = 0;
936    bd->hold_timer = 0;
937   
938    rtems_chain_append (&pool->ready, &bd->link);
939  }
[048dcd2b]940
[3899a537]941  sc = rtems_semaphore_create (rtems_build_name ('B', 'P', '0' + pid, 'L'),
942                               1, RTEMS_BDBUF_POOL_LOCK_ATTRIBS, 0,
943                               &pool->lock);
944  if (sc != RTEMS_SUCCESSFUL)
945  {
946    free (pool->buffers);
947    free (pool->bds);
948    return sc;
949  }
[048dcd2b]950
[c21c850e]951  sc = rtems_semaphore_create (rtems_build_name ('B', 'P', '0' + pid, 'S'),
952                               1, RTEMS_BDBUF_POOL_LOCK_ATTRIBS, 0,
[3899a537]953                               &pool->sync_lock);
954  if (sc != RTEMS_SUCCESSFUL)
955  {
956    rtems_semaphore_delete (pool->lock);
957    free (pool->buffers);
958    free (pool->bds);
959    return sc;
960  }
961 
962  sc = rtems_semaphore_create (rtems_build_name ('B', 'P', '0' + pid, 'a'),
963                               0, RTEMS_BDBUF_POOL_WAITER_ATTRIBS, 0,
964                               &pool->access);
965  if (sc != RTEMS_SUCCESSFUL)
966  {
967    rtems_semaphore_delete (pool->sync_lock);
968    rtems_semaphore_delete (pool->lock);
969    free (pool->buffers);
970    free (pool->bds);
971    return sc;
972  }
[048dcd2b]973
[3899a537]974  sc = rtems_semaphore_create (rtems_build_name ('B', 'P', '0' + pid, 't'),
975                               0, RTEMS_BDBUF_POOL_WAITER_ATTRIBS, 0,
976                               &pool->transfer);
977  if (sc != RTEMS_SUCCESSFUL)
978  {
979    rtems_semaphore_delete (pool->access);
980    rtems_semaphore_delete (pool->sync_lock);
981    rtems_semaphore_delete (pool->lock);
982    free (pool->buffers);
983    free (pool->bds);
984    return sc;
985  }
986
987  sc = rtems_semaphore_create (rtems_build_name ('B', 'P', '0' + pid, 'w'),
988                               0, RTEMS_BDBUF_POOL_WAITER_ATTRIBS, 0,
989                               &pool->waiting);
990  if (sc != RTEMS_SUCCESSFUL)
991  {
992    rtems_semaphore_delete (pool->transfer);
993    rtems_semaphore_delete (pool->access);
994    rtems_semaphore_delete (pool->sync_lock);
995    rtems_semaphore_delete (pool->lock);
996    free (pool->buffers);
997    free (pool->bds);
998    return sc;
999  }
1000
1001  return RTEMS_SUCCESSFUL;
[e51bd96]1002}
1003
[c21c850e]1004/**
1005 * Free resources allocated for buffer pool with specified number.
[e51bd96]1006 *
[c21c850e]1007 * @param pid Buffer pool number
[e51bd96]1008 *
[c21c850e]1009 * @retval RTEMS_SUCCESSFUL
[e51bd96]1010 */
1011static rtems_status_code
[3899a537]1012rtems_bdbuf_release_pool (rtems_bdpool_id pid)
[e51bd96]1013{
[3899a537]1014  rtems_bdbuf_pool* pool = rtems_bdbuf_get_pool (pid);
1015 
1016  rtems_bdbuf_lock_pool (pool);
1017
1018  rtems_semaphore_delete (pool->waiting);
1019  rtems_semaphore_delete (pool->transfer);
1020  rtems_semaphore_delete (pool->access);
1021  rtems_semaphore_delete (pool->lock);
1022 
1023  free (pool->buffers);
1024  free (pool->bds);
1025 
1026  return RTEMS_SUCCESSFUL;
[e51bd96]1027}
1028
1029rtems_status_code
[a5fb40cf]1030rtems_bdbuf_init (void)
[e51bd96]1031{
[3899a537]1032  rtems_bdpool_id   p;
1033  rtems_status_code sc;
[e51bd96]1034
[3899a537]1035#if RTEMS_BDBUF_TRACE
1036  rtems_bdbuf_printf ("init\n");
1037#endif
[048dcd2b]1038
[3899a537]1039  if (rtems_bdbuf_pool_configuration_size <= 0)
1040    return RTEMS_INVALID_SIZE;
[e51bd96]1041
[3899a537]1042  if (rtems_bdbuf_ctx.npools)
1043    return RTEMS_RESOURCE_IN_USE;
[e51bd96]1044
[3899a537]1045  rtems_bdbuf_ctx.npools = rtems_bdbuf_pool_configuration_size;
[048dcd2b]1046
[3899a537]1047  /*
1048   * Allocate memory for buffer pool descriptors
1049   */
1050  rtems_bdbuf_ctx.pool = calloc (rtems_bdbuf_pool_configuration_size,
1051                                 sizeof (rtems_bdbuf_pool));
1052 
1053  if (rtems_bdbuf_ctx.pool == NULL)
1054    return RTEMS_NO_MEMORY;
1055
1056  /*
1057   * Initialize buffer pools and roll out if something failed,
1058   */
1059  for (p = 0; p < rtems_bdbuf_ctx.npools; p++)
1060  {
1061    sc = rtems_bdbuf_initialize_pool (&rtems_bdbuf_pool_configuration[p], p);
1062    if (sc != RTEMS_SUCCESSFUL)
[e51bd96]1063    {
[3899a537]1064      rtems_bdpool_id j;
1065      for (j = 0; j < p - 1; j++)
1066        rtems_bdbuf_release_pool (j);
1067      return sc;
[e51bd96]1068    }
[3899a537]1069  }
[e51bd96]1070
[3899a537]1071  /*
1072   * Create and start swapout task
1073   */
[048dcd2b]1074
[4f971343]1075  rtems_bdbuf_ctx.swapout_enabled = true;
[3899a537]1076 
1077  sc = rtems_task_create (rtems_build_name('B', 'S', 'W', 'P'),
1078                          (rtems_bdbuf_configuration.swapout_priority ?
1079                           rtems_bdbuf_configuration.swapout_priority :
1080                           RTEMS_BDBUF_SWAPOUT_TASK_PRIORITY_DEFAULT),
1081                          SWAPOUT_TASK_STACK_SIZE,
1082                          RTEMS_PREEMPT | RTEMS_NO_TIMESLICE | RTEMS_NO_ASR,
1083                          RTEMS_LOCAL | RTEMS_NO_FLOATING_POINT,
1084                          &rtems_bdbuf_ctx.swapout);
1085  if (sc != RTEMS_SUCCESSFUL)
1086  {
1087    for (p = 0; p < rtems_bdbuf_ctx.npools; p++)
1088      rtems_bdbuf_release_pool (p);
1089    free (rtems_bdbuf_ctx.pool);
1090    return sc;
1091  }
[e51bd96]1092
[3899a537]1093  sc = rtems_task_start (rtems_bdbuf_ctx.swapout,
1094                         rtems_bdbuf_swapout_task,
1095                         (rtems_task_argument) &rtems_bdbuf_ctx);
1096  if (sc != RTEMS_SUCCESSFUL)
1097  {
1098    rtems_task_delete (rtems_bdbuf_ctx.swapout);
1099    for (p = 0; p < rtems_bdbuf_ctx.npools; p++)
1100      rtems_bdbuf_release_pool (p);
1101    free (rtems_bdbuf_ctx.pool);
1102    return sc;
1103  }
[e51bd96]1104
[3899a537]1105  return RTEMS_SUCCESSFUL;
[e51bd96]1106}
1107
[3899a537]1108/**
1109 * Get a buffer for this device and block. This function returns a buffer once
1110 * placed into the AVL tree. If no buffer is available and it is not a read
[c21c850e]1111 * ahead request and no buffers are waiting to the written to disk wait until
1112 * one is available. If buffers are waiting to be written to disk and non are
1113 * available expire the hold timer and wake the swap out task. If the buffer is
1114 * for a read ahead transfer return NULL if there is not buffer or it is in the
1115 * cache.
[e51bd96]1116 *
[3899a537]1117 * The AVL tree of buffers for the pool is searched and if not located check
1118 * obtain a buffer and insert it into the AVL tree. Buffers are first obtained
[c21c850e]1119 * from the ready list until all empty/ready buffers are used. Once all buffers
1120 * are in use buffers are taken from the LRU list with the least recently used
[3899a537]1121 * buffer taken first. A buffer taken from the LRU list is removed from the AVL
[c21c850e]1122 * tree. The ready list or LRU list buffer is initialised to this device and
1123 * block. If no buffers are available due to the ready and LRU lists being
1124 * empty a check is made of the modified list. Buffers may be queued waiting
1125 * for the hold timer to expire. These buffers should be written to disk and
1126 * returned to the LRU list where they can be used rather than this call
1127 * blocking. If buffers are on the modified list the max. write block size of
1128 * buffers have their hold timer expired and the swap out task woken. The
1129 * caller then blocks on the waiting semaphore and counter. When buffers return
1130 * from the upper layers (access) or lower driver (transfer) the blocked caller
1131 * task is woken and this procedure is repeated. The repeat handles a case of a
1132 * another thread pre-empting getting a buffer first and adding it to the AVL
1133 * tree.
[3899a537]1134 *
1135 * A buffer located in the AVL tree means it is already in the cache and maybe
1136 * in use somewhere. The buffer can be either:
[e51bd96]1137 *
[3899a537]1138 * # Cached. Not being accessed or part of a media transfer.
[c21c850e]1139 * # Access or modifed access. Is with an upper layer being accessed.
[3899a537]1140 * # Transfer. Is with the driver and part of a media transfer.
1141 *
[c21c850e]1142 * If cached we assign the new state, extract it from any list it maybe part of
1143 * and return to the user.
[3899a537]1144 *
1145 * This function assumes the pool the buffer is being taken from is locked and
[c21c850e]1146 * it will make sure the pool is locked when it returns. The pool will be
1147 * unlocked if the call could block.
[3899a537]1148 *
[57aa979]1149 * @param pdd The physical disk device
[c21c850e]1150 * @param pool The pool reference
1151 * @param block Absolute media block number
1152 * @param read_ahead The get is for a read ahead buffer
[e51bd96]1153 *
[c21c850e]1154 * @return RTEMS status code ( if operation completed successfully or error
1155 *         code if error is occured)
[e51bd96]1156 */
[3899a537]1157static rtems_bdbuf_buffer*
1158rtems_bdbuf_get_buffer (rtems_disk_device* pdd,
1159                        rtems_bdbuf_pool*  pool,
1160                        rtems_blkdev_bnum  block,
[4f971343]1161                        bool               read_ahead)
[e51bd96]1162{
[3899a537]1163  dev_t               device = pdd->dev;
1164  rtems_bdbuf_buffer* bd;
[4f971343]1165  bool                available;
[3899a537]1166
1167  /*
1168   * Loop until we get a buffer. Under load we could find no buffers are
[c21c850e]1169   * available requiring this task to wait until some become available before
1170   * proceeding. There is no timeout. If the call is to block and the buffer is
[3899a537]1171   * for a read ahead buffer return NULL.
1172   *
1173   * The search procedure is repeated as another thread could have pre-empted
1174   * us while we waited for a buffer, obtained an empty buffer and loaded the
[c21c850e]1175   * AVL tree with the one we are after.
[3899a537]1176   */
1177  do
1178  {
1179    /*
1180     * Search for buffer descriptor for this dev/block key.
1181     */
1182    bd = rtems_bdbuf_avl_search (&pool->tree, device, block);
[048dcd2b]1183
[3899a537]1184    /*
1185     * No buffer in the cache for this block. We need to obtain a buffer and
1186     * this means take a buffer that is ready to use. If all buffers are in use
1187     * take the least recently used buffer. If there are none then the cache is
1188     * empty. All the buffers are either queued to be written to disk or with
1189     * the user. We cannot do much with the buffers with the user how-ever with
1190     * the modified buffers waiting to be written to disk flush the maximum
[c21c850e]1191     * number transfered in a block to disk. After this all that can be done is
1192     * to wait for a buffer to return to the cache.
[3899a537]1193     */
1194    if (!bd)
[e51bd96]1195    {
[3899a537]1196      /*
1197       * Assign new buffer descriptor from the empty list if one is present. If
1198       * the empty queue is empty get the oldest buffer from LRU list. If the
[c21c850e]1199       * LRU list is empty there are no available buffers check the modified
1200       * list.
[3899a537]1201       */
1202      if (rtems_chain_is_empty (&pool->ready))
1203      {
1204        /*
1205         * No unsed or read-ahead buffers.
1206         *
[c21c850e]1207         * If this is a read ahead buffer just return. No need to place further
1208         * pressure on the cache by reading something that may be needed when
1209         * we have data in the cache that was needed and may still be.
[3899a537]1210         */
1211        if (read_ahead)
1212          return NULL;
1213
1214        /*
1215         * Check the LRU list.
1216         */
1217        bd = (rtems_bdbuf_buffer *) rtems_chain_get (&pool->lru);
1218       
1219        if (bd)
[e51bd96]1220        {
[3899a537]1221          /*
1222           * Remove the buffer from the AVL tree.
1223           */
1224          if (rtems_bdbuf_avl_remove (&pool->tree, bd) != 0)
1225            rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY);
[e51bd96]1226        }
1227        else
1228        {
[3899a537]1229          /*
1230           * If there are buffers on the modified list expire the hold timer
[c21c850e]1231           * and wake the swap out task then wait else just go and wait.
[3899a537]1232           */
1233          if (!rtems_chain_is_empty (&pool->modified))
1234          {
1235            rtems_chain_node* node = rtems_chain_head (&pool->modified);
[cec5c069]1236            uint32_t          write_blocks = 0;
[3899a537]1237           
1238            node = node->next;
1239            while ((write_blocks < rtems_bdbuf_configuration.max_write_blocks) &&
1240                   !rtems_chain_is_tail (&pool->modified, node))
[e51bd96]1241            {
[3899a537]1242              rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
1243              bd->hold_timer = 0;
1244              write_blocks++;
1245              node = node->next;
[e51bd96]1246            }
1247
[3899a537]1248            rtems_bdbuf_wake_swapper ();
1249          }
1250         
1251          /*
1252           * Wait for a buffer to be returned to the pool. The buffer will be
1253           * placed on the LRU list.
1254           */
1255          rtems_bdbuf_wait (pool, &pool->waiting, &pool->wait_waiters);
[e51bd96]1256        }
[3899a537]1257      }
1258      else
1259      {
1260        bd = (rtems_bdbuf_buffer *) rtems_chain_get (&(pool->ready));
[e51bd96]1261
[3899a537]1262        if ((bd->state != RTEMS_BDBUF_STATE_EMPTY) &&
1263            (bd->state != RTEMS_BDBUF_STATE_READ_AHEAD))
1264          rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY);
1265
1266        if (bd->state == RTEMS_BDBUF_STATE_READ_AHEAD)
[e51bd96]1267        {
[3899a537]1268          if (rtems_bdbuf_avl_remove (&pool->tree, bd) != 0)
1269            rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY);
[e51bd96]1270        }
[3899a537]1271      }
1272
1273      if (bd)
1274      {
1275        bd->dev       = device;
1276        bd->block     = block;
1277        bd->avl.left  = NULL;
1278        bd->avl.right = NULL;
1279        bd->state     = RTEMS_BDBUF_STATE_EMPTY;
1280        bd->error     = 0;
1281        bd->waiters   = 0;
1282
1283        if (rtems_bdbuf_avl_insert (&pool->tree, bd) != 0)
1284          rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY);
1285
1286        return bd;
1287      }
[e51bd96]1288    }
[3899a537]1289  }
1290  while (!bd);
1291
1292  /*
1293   * If the buffer is for read ahead and it exists in the AVL cache or is being
1294   * accessed or being transfered then return NULL.
1295   */
1296  if (read_ahead)
1297    return NULL;
1298
1299  /*
[c21c850e]1300   * Loop waiting for the buffer to enter the cached state. If the buffer is in
1301   * the access or transfer state then wait until it is not.
[3899a537]1302   */
[4f971343]1303  available = false;
[3899a537]1304  while (!available)
1305  {
1306    switch (bd->state)
[e51bd96]1307    {
[3899a537]1308      case RTEMS_BDBUF_STATE_CACHED:
1309      case RTEMS_BDBUF_STATE_MODIFIED:
1310      case RTEMS_BDBUF_STATE_READ_AHEAD:
[4f971343]1311        available = true;
[3899a537]1312        break;
1313
1314      case RTEMS_BDBUF_STATE_ACCESS:
1315      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
1316        bd->waiters++;
1317        rtems_bdbuf_wait (pool, &pool->access, &pool->access_waiters);
1318        bd->waiters--;
1319        break;
1320
1321      case RTEMS_BDBUF_STATE_SYNC:
1322      case RTEMS_BDBUF_STATE_TRANSFER:
1323        bd->waiters++;
1324        rtems_bdbuf_wait (pool, &pool->transfer, &pool->transfer_waiters);
1325        bd->waiters--;
1326        break;
1327
1328      default:
1329        rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY);
[e51bd96]1330    }
[3899a537]1331  }
[048dcd2b]1332
[3899a537]1333  /*
1334   * Buffer is linked to the LRU, modifed, or sync lists. Remove it from there.
1335   */
1336  rtems_chain_extract (&bd->link);
[e51bd96]1337
[3899a537]1338  return bd;
[e51bd96]1339}
1340
[3899a537]1341rtems_status_code
1342rtems_bdbuf_get (dev_t                device,
1343                 rtems_blkdev_bnum    block,
1344                 rtems_bdbuf_buffer** bdp)
[e51bd96]1345{
[3899a537]1346  rtems_disk_device*  dd;
1347  rtems_bdbuf_pool*   pool;
1348  rtems_bdbuf_buffer* bd;
1349
1350  /*
1351   * Do not hold the pool lock when obtaining the disk table.
1352   */
1353  dd = rtems_disk_obtain (device);
1354  if (dd == NULL)
1355    return RTEMS_INVALID_ID;
1356
1357  if (block >= dd->size)
1358  {
1359    rtems_disk_release (dd);
1360    return RTEMS_INVALID_NUMBER;
1361  }
1362
1363  pool = rtems_bdbuf_get_pool (dd->phys_dev->pool);
1364 
1365  rtems_bdbuf_lock_pool (pool);
1366
1367#if RTEMS_BDBUF_TRACE
[57aa979]1368  /* Print the block index relative to the physical disk */
1369  rtems_bdbuf_printf ("get: %d (dev = %08x)\n", block + dd->start, device);
[3899a537]1370#endif
1371
[57aa979]1372  bd = rtems_bdbuf_get_buffer (dd->phys_dev, pool, block + dd->start, false);
[3899a537]1373
1374  if (bd->state == RTEMS_BDBUF_STATE_MODIFIED)
1375    bd->state = RTEMS_BDBUF_STATE_ACCESS_MODIFIED;
1376  else
1377    bd->state = RTEMS_BDBUF_STATE_ACCESS;
1378 
1379  rtems_bdbuf_unlock_pool (pool);
1380
[6f162ed]1381  rtems_disk_release(dd);
1382
[3899a537]1383  *bdp = bd;
1384 
1385  return RTEMS_SUCCESSFUL;
[e51bd96]1386}
1387
[c21c850e]1388/**
1389 * Call back handler called by the low level driver when the transfer has
1390 * completed. This function may be invoked from interrupt handler.
[e51bd96]1391 *
[c21c850e]1392 * @param arg Arbitrary argument specified in block device request
1393 *            structure (in this case - pointer to the appropriate
1394 *            block device request structure).
1395 * @param status I/O completion status
1396 * @param error errno error code if status != RTEMS_SUCCESSFUL
[e51bd96]1397 */
1398static void
[3899a537]1399rtems_bdbuf_read_done (void* arg, rtems_status_code status, int error)
[e51bd96]1400{
[3899a537]1401  rtems_blkdev_request* req = (rtems_blkdev_request*) arg;
1402
1403  req->error = error;
1404  req->status = status;
1405
1406  rtems_event_send (req->io_task, RTEMS_BDBUF_TRANSFER_SYNC);
[e51bd96]1407}
1408
1409rtems_status_code
[3899a537]1410rtems_bdbuf_read (dev_t                device,
1411                  rtems_blkdev_bnum    block,
1412                  rtems_bdbuf_buffer** bdp)
[e51bd96]1413{
[3899a537]1414  rtems_disk_device*    dd;
1415  rtems_bdbuf_pool*     pool;
1416  rtems_bdbuf_buffer*   bd = NULL;
[cec5c069]1417  uint32_t              read_ahead_count;
[3899a537]1418  rtems_blkdev_request* req;
1419 
1420  /*
1421   * @todo This type of request structure is wrong and should be removed.
1422   */
1423#define bdbuf_alloc(size) __builtin_alloca (size)
1424
1425  req = bdbuf_alloc (sizeof (rtems_blkdev_request) +
1426                     (sizeof ( rtems_blkdev_sg_buffer) *
1427                      rtems_bdbuf_configuration.max_read_ahead_blocks));
1428
1429  /*
1430   * Do not hold the pool lock when obtaining the disk table.
1431   */
1432  dd = rtems_disk_obtain (device);
1433  if (dd == NULL)
1434    return RTEMS_INVALID_ID;
1435 
[57aa979]1436  if (block >= dd->size) {
[3899a537]1437    rtems_disk_release(dd);
1438    return RTEMS_INVALID_NUMBER;
1439  }
[57aa979]1440 
1441#if RTEMS_BDBUF_TRACE
1442  /* Print the block index relative to the physical disk */
1443  rtems_bdbuf_printf ("read: %d (dev = %08x)\n", block + dd->start, device);
1444#endif
[048dcd2b]1445
[3899a537]1446  req->bufnum = 0;
[e51bd96]1447
[3899a537]1448  /*
1449   * Read the block plus the required number of blocks ahead. The number of
1450   * blocks to read ahead is configured by the user and limited by the size of
1451   * the disk or reaching a read ahead block that is also cached.
1452   *
1453   * Limit the blocks read by the size of the disk.
1454   */
1455  if ((rtems_bdbuf_configuration.max_read_ahead_blocks + block) < dd->size)
1456    read_ahead_count = rtems_bdbuf_configuration.max_read_ahead_blocks;
1457  else
1458    read_ahead_count = dd->size - block;
[048dcd2b]1459
[3899a537]1460  pool = rtems_bdbuf_get_pool (dd->phys_dev->pool);
[048dcd2b]1461
[3899a537]1462  rtems_bdbuf_lock_pool (pool);
[e51bd96]1463
[2eb89ad]1464  while (req->bufnum < read_ahead_count)
[3899a537]1465  {
1466    /*
1467     * Get the buffer for the requested block. If the block is cached then
1468     * return it. If it is not cached transfer the block from the disk media
1469     * into memory.
1470     *
1471     * We need to clean up any buffers allocated and not passed back to the
1472     * caller.
1473     */
1474    bd = rtems_bdbuf_get_buffer (dd->phys_dev, pool,
[57aa979]1475                                 block + dd->start + req->bufnum,
[4f971343]1476                                 req->bufnum == 0 ? false : true);
[e51bd96]1477
[3899a537]1478    /*
1479     * Read ahead buffer is in the cache or none available. Read what we
1480     * can.
1481     */
1482    if (!bd)
1483      break;
[e51bd96]1484
[3899a537]1485    /*
1486     * Is the block we are interested in the cache ?
1487     */
1488    if ((bd->state == RTEMS_BDBUF_STATE_CACHED) ||
1489        (bd->state == RTEMS_BDBUF_STATE_MODIFIED))
1490      break;
[048dcd2b]1491
[3899a537]1492    bd->state = RTEMS_BDBUF_STATE_TRANSFER;
1493    bd->error = 0;
[e51bd96]1494
[3899a537]1495    /*
[c21c850e]1496     * @todo The use of these req blocks is not a great design. The req is a
1497     *       struct with a single 'bufs' declared in the req struct and the
1498     *       others are added in the outer level struct. This relies on the
1499     *       structs joining as a single array and that assumes the compiler
1500     *       packs the structs. Why not just place on a list ? The BD has a
1501     *       node that can be used.
[3899a537]1502     */
[2eb89ad]1503    req->bufs[req->bufnum].user   = bd;
1504    req->bufs[req->bufnum].block  = bd->block;
1505    req->bufs[req->bufnum].length = dd->block_size;
1506    req->bufs[req->bufnum].buffer = bd->buffer;
[3899a537]1507    req->bufnum++;
1508  }
[0ebfac19]1509
[3899a537]1510  /*
1511   * Transfer any requested buffers. If the request count is 0 we have found
1512   * the block in the cache so return it.
1513   */
[2eb89ad]1514  if (req->bufnum)
[3899a537]1515  {
1516    /*
1517     * Unlock the pool. We have the buffer for the block and it will be in the
1518     * access or transfer state. We may also have a number of read ahead blocks
1519     * if we need to transfer data. At this point any other threads can gain
1520     * access to the pool and if they are after any of the buffers we have they
1521     * will block and be woken when the buffer is returned to the pool.
1522     *
1523     * If a transfer is needed the I/O operation will occur with pre-emption
1524     * enabled and the pool unlocked. This is a change to the previous version
1525     * of the bdbuf code.
1526     */
[8c44190a]1527    rtems_event_set out;
1528    int             result;
1529    uint32_t        b;
1530
1531    /*
1532     * Flush any events.
1533     */
1534    rtems_event_receive (RTEMS_BDBUF_TRANSFER_SYNC,
1535                         RTEMS_EVENT_ALL | RTEMS_NO_WAIT,
1536                         0, &out);
1537                         
[3899a537]1538    rtems_bdbuf_unlock_pool (pool);
1539
1540    req->req = RTEMS_BLKDEV_REQ_READ;
1541    req->req_done = rtems_bdbuf_read_done;
1542    req->done_arg = req;
1543    req->io_task = rtems_task_self ();
1544    req->status = RTEMS_RESOURCE_IN_USE;
1545    req->error = 0;
1546 
1547    result = dd->ioctl (dd->phys_dev->dev, RTEMS_BLKIO_REQUEST, req);
1548
1549    /*
1550     * Inspection of the DOS FS code shows the result from this function is
1551     * handled and a buffer must be returned.
1552     */
1553    if (result < 0)
[0ebfac19]1554    {
[3899a537]1555      req->error = errno;
1556      req->status = RTEMS_IO_ERROR;
1557    }
1558    else
1559    {
1560      rtems_status_code sc;
[8c44190a]1561     
[3899a537]1562      sc = rtems_event_receive (RTEMS_BDBUF_TRANSFER_SYNC,
1563                                RTEMS_EVENT_ALL | RTEMS_WAIT,
1564                                0, &out);
1565
1566      if (sc != RTEMS_SUCCESSFUL)
1567        rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
[0ebfac19]1568    }
1569
[3899a537]1570    rtems_bdbuf_lock_pool (pool);
[0ebfac19]1571
[2eb89ad]1572    for (b = 1; b < req->bufnum; b++)
[0ebfac19]1573    {
[3899a537]1574      bd = req->bufs[b].user;
1575      bd->error = req->error;
1576      bd->state = RTEMS_BDBUF_STATE_READ_AHEAD;
1577      rtems_bdbuf_release (bd);
[0ebfac19]1578    }
1579
[3899a537]1580    bd = req->bufs[0].user;
1581  }
[0ebfac19]1582
[3899a537]1583  /*
1584   * The data for this block is cached in the buffer.
1585   */
1586  if (bd->state == RTEMS_BDBUF_STATE_MODIFIED)
1587    bd->state = RTEMS_BDBUF_STATE_ACCESS_MODIFIED;
1588  else
1589    bd->state = RTEMS_BDBUF_STATE_ACCESS;
[048dcd2b]1590
[3899a537]1591  rtems_bdbuf_unlock_pool (pool);
1592  rtems_disk_release (dd);
[0ebfac19]1593
[3899a537]1594  *bdp = bd;
[e51bd96]1595
[3899a537]1596  return RTEMS_SUCCESSFUL;
1597}
[e51bd96]1598
[3899a537]1599rtems_status_code
1600rtems_bdbuf_release (rtems_bdbuf_buffer* bd)
[e51bd96]1601{
[3899a537]1602  rtems_bdbuf_pool* pool;
[e51bd96]1603
[3899a537]1604  if (bd == NULL)
1605    return RTEMS_INVALID_ADDRESS;
[048dcd2b]1606
[3899a537]1607  pool = rtems_bdbuf_get_pool (bd->pool);
[e51bd96]1608
[3899a537]1609  rtems_bdbuf_lock_pool (pool);
[e51bd96]1610
[3899a537]1611#if RTEMS_BDBUF_TRACE
1612  rtems_bdbuf_printf ("release: %d\n", bd->block);
1613#endif
1614 
1615  if (bd->state == RTEMS_BDBUF_STATE_ACCESS_MODIFIED)
1616  {
1617    rtems_bdbuf_append_modified (pool, bd);
1618  }
1619  else
1620  {
1621    /*
[c21c850e]1622     * If this is a read ahead buffer place the ready queue. Buffers are taken
1623     * from here first. If we prepend then get from the queue the buffers
1624     * furthermost from the read buffer will be used.
[3899a537]1625     */
1626    if (bd->state == RTEMS_BDBUF_STATE_READ_AHEAD)
1627      rtems_chain_prepend (&pool->ready, &bd->link);
1628    else
[e51bd96]1629    {
[3899a537]1630      bd->state = RTEMS_BDBUF_STATE_CACHED;
1631      rtems_chain_append (&pool->lru, &bd->link);
[e51bd96]1632    }
[3899a537]1633  }
1634 
1635  /*
1636   * If there are threads waiting to access the buffer wake them. Wake any
1637   * waiters if this is the first buffer to placed back onto the queue.
1638   */
1639  if (bd->waiters)
1640    rtems_bdbuf_wake (pool->access, &pool->access_waiters);
1641  else
1642  {
1643    if (bd->state == RTEMS_BDBUF_STATE_READ_AHEAD)
1644    {
1645      if (rtems_chain_has_only_one_node (&pool->ready))
1646        rtems_bdbuf_wake (pool->waiting, &pool->wait_waiters);
1647    }
1648    else
1649    {
1650      if (rtems_chain_has_only_one_node (&pool->lru))
1651        rtems_bdbuf_wake (pool->waiting, &pool->wait_waiters);
1652    }
1653  }
1654 
1655  rtems_bdbuf_unlock_pool (pool);
[e51bd96]1656
[3899a537]1657  return RTEMS_SUCCESSFUL;
1658}
[e51bd96]1659
1660rtems_status_code
[3899a537]1661rtems_bdbuf_release_modified (rtems_bdbuf_buffer* bd)
[e51bd96]1662{
[3899a537]1663  rtems_bdbuf_pool* pool;
1664
1665  if (bd == NULL)
1666    return RTEMS_INVALID_ADDRESS;
[e51bd96]1667
[3899a537]1668  pool = rtems_bdbuf_get_pool (bd->pool);
[048dcd2b]1669
[3899a537]1670  rtems_bdbuf_lock_pool (pool);
[048dcd2b]1671
[3899a537]1672#if RTEMS_BDBUF_TRACE
1673  rtems_bdbuf_printf ("release modified: %d\n", bd->block);
1674#endif
1675
1676  bd->hold_timer = rtems_bdbuf_configuration.swap_block_hold;
1677 
1678  rtems_bdbuf_append_modified (pool, bd);
[048dcd2b]1679
[3899a537]1680  if (bd->waiters)
1681    rtems_bdbuf_wake (pool->access, &pool->access_waiters);
1682 
1683  rtems_bdbuf_unlock_pool (pool);
[048dcd2b]1684
[3899a537]1685  return RTEMS_SUCCESSFUL;
[e51bd96]1686}
1687
1688rtems_status_code
[3899a537]1689rtems_bdbuf_sync (rtems_bdbuf_buffer* bd)
[e51bd96]1690{
[3899a537]1691  rtems_bdbuf_pool* pool;
[4f971343]1692  bool              available;
[3899a537]1693
1694#if RTEMS_BDBUF_TRACE
1695  rtems_bdbuf_printf ("sync: %d\n", bd->block);
1696#endif
1697 
1698  if (bd == NULL)
1699    return RTEMS_INVALID_ADDRESS;
1700
1701  pool = rtems_bdbuf_get_pool (bd->pool);
1702
1703  rtems_bdbuf_lock_pool (pool);
1704
1705  bd->state = RTEMS_BDBUF_STATE_SYNC;
[e51bd96]1706
[3899a537]1707  rtems_chain_append (&pool->sync, &bd->link);
[048dcd2b]1708
[3899a537]1709  rtems_bdbuf_wake_swapper ();
[e51bd96]1710
[4f971343]1711  available = false;
[3899a537]1712  while (!available)
1713  {
1714    switch (bd->state)
[e51bd96]1715    {
[3899a537]1716      case RTEMS_BDBUF_STATE_CACHED:
1717      case RTEMS_BDBUF_STATE_READ_AHEAD:
1718      case RTEMS_BDBUF_STATE_MODIFIED:
1719      case RTEMS_BDBUF_STATE_ACCESS:
1720      case RTEMS_BDBUF_STATE_ACCESS_MODIFIED:
[4f971343]1721        available = true;
[3899a537]1722        break;
1723
1724      case RTEMS_BDBUF_STATE_SYNC:
1725      case RTEMS_BDBUF_STATE_TRANSFER:
1726        bd->waiters++;
1727        rtems_bdbuf_wait (pool, &pool->transfer, &pool->transfer_waiters);
1728        bd->waiters--;
1729        break;
1730
1731      default:
1732        rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_CONSISTENCY);
[e51bd96]1733    }
[3899a537]1734  }
[048dcd2b]1735
[3899a537]1736  rtems_bdbuf_unlock_pool (pool);
1737 
1738  return RTEMS_SUCCESSFUL;
[e51bd96]1739}
1740
1741rtems_status_code
[3899a537]1742rtems_bdbuf_syncdev (dev_t dev)
[e51bd96]1743{
[3899a537]1744  rtems_disk_device*  dd;
1745  rtems_bdbuf_pool*   pool;
1746  rtems_status_code   sc;
1747  rtems_event_set     out;
[e51bd96]1748
[3899a537]1749#if RTEMS_BDBUF_TRACE
1750  rtems_bdbuf_printf ("syncdev: %08x\n", dev);
1751#endif
[e51bd96]1752
[3899a537]1753  /*
1754   * Do not hold the pool lock when obtaining the disk table.
1755   */
1756  dd = rtems_disk_obtain (dev);
1757  if (dd == NULL)
1758    return RTEMS_INVALID_ID;
1759
1760  pool = rtems_bdbuf_get_pool (dd->pool);
1761
1762  /*
1763   * Take the sync lock before locking the pool. Once we have the sync lock
1764   * we can lock the pool. If another thread has the sync lock it will cause
1765   * this thread to block until it owns the sync lock then it can own the
1766   * pool. The sync lock can only be obtained with the pool unlocked.
1767   */
1768 
1769  rtems_bdbuf_lock_sync (pool);
1770  rtems_bdbuf_lock_pool (pool); 
[e51bd96]1771
[c21c850e]1772  /*
1773   * Set the pool to have a sync active for a specific device and let the swap
1774   * out task know the id of the requester to wake when done.
1775   *
1776   * The swap out task will negate the sync active flag when no more buffers
1777   * for the device are held on the modified for sync queues.
1778   */
[4f971343]1779  pool->sync_active    = true;
[3899a537]1780  pool->sync_requester = rtems_task_self ();
1781  pool->sync_device    = dev;
1782 
1783  rtems_bdbuf_wake_swapper ();
1784  rtems_bdbuf_unlock_pool (pool);
1785 
1786  sc = rtems_event_receive (RTEMS_BDBUF_TRANSFER_SYNC,
1787                            RTEMS_EVENT_ALL | RTEMS_WAIT,
1788                            0, &out);
[e51bd96]1789
[3899a537]1790  if (sc != RTEMS_SUCCESSFUL)
1791    rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
1792     
1793  rtems_bdbuf_unlock_sync (pool);
1794 
1795  return rtems_disk_release(dd);
[e51bd96]1796}
1797
[c21c850e]1798/**
1799 * Call back handler called by the low level driver when the transfer has
1800 * completed. This function may be invoked from interrupt handler.
[e51bd96]1801 *
[c21c850e]1802 * @param arg Arbitrary argument specified in block device request
1803 *            structure (in this case - pointer to the appropriate
1804 *            block device request structure).
1805 * @param status I/O completion status
1806 * @param error errno error code if status != RTEMS_SUCCESSFUL
[e51bd96]1807 */
[3899a537]1808static void
1809rtems_bdbuf_write_done(void *arg, rtems_status_code status, int error)
1810{
1811  rtems_blkdev_request* req = (rtems_blkdev_request*) arg;
1812
1813  req->error = error;
1814  req->status = status;
1815
1816  rtems_event_send (req->io_task, RTEMS_BDBUF_TRANSFER_SYNC);
1817}
1818
1819/**
[c21c850e]1820 * Process the modified list of buffers. There us a sync or modified list that
1821 * needs to be handled.
1822 *
1823 * @param pid The pool id to process modified buffers on.
1824 * @param dev The device to handle. If -1 no device is selected so select the
1825 *            device of the first buffer to be written to disk.
1826 * @param chain The modified chain to process.
1827 * @param transfer The chain to append buffers to be written too.
[4f971343]1828 * @param sync_active If true this is a sync operation so expire all timers.
1829 * @param update_timers If true update the timers.
1830 * @param timer_delta It update_timers is true update the timers by this
[c21c850e]1831 *                    amount.
[3899a537]1832 */
1833static void
1834rtems_bdbuf_swapout_modified_processing (rtems_bdpool_id      pid,
1835                                         dev_t*               dev,
1836                                         rtems_chain_control* chain,
1837                                         rtems_chain_control* transfer,
[4f971343]1838                                         bool                 sync_active,
1839                                         bool                 update_timers,
[3899a537]1840                                         uint32_t             timer_delta)
[e51bd96]1841{
[3899a537]1842  if (!rtems_chain_is_empty (chain))
1843  {
1844    rtems_chain_node* node = rtems_chain_head (chain);
1845    node = node->next;
1846
1847    while (!rtems_chain_is_tail (chain, node))
1848    {
1849      rtems_bdbuf_buffer* bd = (rtems_bdbuf_buffer*) node;
1850   
1851      if (bd->pool == pid)
1852      {
1853        /*
[c21c850e]1854         * Check if the buffer's hold timer has reached 0. If a sync is active
1855         * force all the timers to 0.
[3899a537]1856         *
[c21c850e]1857         * @note Lots of sync requests will skew this timer. It should be based
1858         *       on TOD to be accurate. Does it matter ?
[3899a537]1859         */
1860        if (sync_active)
1861          bd->hold_timer = 0;
1862 
1863        if (bd->hold_timer)
1864        {
1865          if (update_timers)
1866          {
1867            if (bd->hold_timer > timer_delta)
1868              bd->hold_timer -= timer_delta;
1869            else
1870              bd->hold_timer = 0;
1871          }
1872
1873          if (bd->hold_timer)
1874          {
1875            node = node->next;
1876            continue;
1877          }
1878        }
1879
1880        /*
1881         * This assumes we can set dev_t to -1 which is just an
[c21c850e]1882         * assumption. Cannot use the transfer list being empty the sync dev
1883         * calls sets the dev to use.
[3899a537]1884         */
[cec5c069]1885        if (*dev == (dev_t)-1)
[3899a537]1886          *dev = bd->dev;
1887
1888        if (bd->dev == *dev)
1889        {
1890          rtems_chain_node* next_node = node->next;
[2eb89ad]1891          rtems_chain_node* tnode = rtems_chain_tail (transfer);
1892   
1893          /*
1894           * The blocks on the transfer list are sorted in block order. This
1895           * means multi-block transfers for drivers that require consecutive
1896           * blocks perform better with sorted blocks and for real disks it may
1897           * help lower head movement.
1898           */
1899
1900          bd->state = RTEMS_BDBUF_STATE_TRANSFER;
1901
[3899a537]1902          rtems_chain_extract (node);
[2eb89ad]1903
1904          tnode = tnode->previous;
1905         
1906          while (node && !rtems_chain_is_head (transfer, tnode))
1907          {
1908            rtems_bdbuf_buffer* tbd = (rtems_bdbuf_buffer*) tnode;
1909
1910            if (bd->block > tbd->block)
1911            {
1912              rtems_chain_insert (tnode, node);
1913              node = NULL;
1914            }
1915            else
1916              tnode = tnode->previous;
1917          }
1918
1919          if (node)
1920            rtems_chain_prepend (transfer, node);
1921         
[3899a537]1922          node = next_node;
1923        }
1924        else
1925        {
1926          node = node->next;
1927        }
1928      }
1929    }
1930  }
1931}
[048dcd2b]1932
[3899a537]1933/**
[c21c850e]1934 * Process a pool's modified buffers. Check the sync list first then the
1935 * modified list extracting the buffers suitable to be written to disk. We have
1936 * a device at a time. The task level loop will repeat this operation while
1937 * there are buffers to be written. If the transfer fails place the buffers
1938 * back on the modified list and try again later. The pool is unlocked while
1939 * the buffers are beign written to disk.
1940 *
1941 * @param pid The pool id to process modified buffers on.
[4f971343]1942 * @param timer_delta It update_timers is true update the timers by this
[c21c850e]1943 *                    amount.
[4f971343]1944 * @param update_timers If true update the timers.
[c21c850e]1945 * @param write_req The write request structure. There is only one.
1946 *
[4f971343]1947 * @retval true Buffers where written to disk so scan again.
1948 * @retval false No buffers where written to disk.
[3899a537]1949 */
[4f971343]1950static bool
[3899a537]1951rtems_bdbuf_swapout_pool_processing (rtems_bdpool_id       pid,
1952                                     unsigned long         timer_delta,
[4f971343]1953                                     bool                  update_timers,
[3899a537]1954                                     rtems_blkdev_request* write_req)
1955{
1956  rtems_bdbuf_pool*   pool = rtems_bdbuf_get_pool (pid);
1957  rtems_chain_control transfer;
1958  dev_t               dev = -1;
1959  rtems_disk_device*  dd;
[4f971343]1960  bool                transfered_buffers = true;
[048dcd2b]1961
[3899a537]1962  rtems_chain_initialize_empty (&transfer);
1963   
1964  rtems_bdbuf_lock_pool (pool);
1965
[c21c850e]1966  /*
1967   * When the sync is for a device limit the sync to that device. If the sync
1968   * is for a buffer handle the devices in the order on the sync list. This
1969   * means the dev is -1.
1970   */
[3899a537]1971  if (pool->sync_active)
1972    dev = pool->sync_device;
1973
1974  /*
[c21c850e]1975   * If we have any buffers in the sync queue move then to the modified
1976   * list. The first sync buffer will select the device we use.
[3899a537]1977   */
1978  rtems_bdbuf_swapout_modified_processing (pid, &dev,
1979                                           &pool->sync, &transfer,
[4f971343]1980                                           true, false,
[3899a537]1981                                           timer_delta);
1982
1983  /*
1984   * Process the pool's modified list.
1985   */
1986  rtems_bdbuf_swapout_modified_processing (pid, &dev,
1987                                           &pool->modified, &transfer,
1988                                           pool->sync_active,
1989                                           update_timers,
1990                                           timer_delta);
1991
1992  /*
1993   * We have all the buffers that have been modified for this device so
1994   * the pool can be unlocked because the state is set to TRANSFER.
1995   */
[c21c850e]1996
[3899a537]1997  rtems_bdbuf_unlock_pool (pool);
1998
1999  /*
2000   * If there are buffers to transfer to the media tranfer them.
2001   */
2002  if (rtems_chain_is_empty (&transfer))
[4f971343]2003    transfered_buffers = false;
[3899a537]2004  else
2005  {
2006    /*
2007     * Obtain the disk device. Release the pool mutex to avoid a dead
2008     * lock.
2009     */
2010    dd = rtems_disk_obtain (dev);
[e51bd96]2011    if (dd == NULL)
[4f971343]2012       transfered_buffers = false;
[3899a537]2013    else
2014    {
2015      /*
2016       * The last block number used when the driver only supports
2017       * continuous blocks in a single request.
2018       */
2019      uint32_t last_block = 0;
2020     
2021      /*
[c21c850e]2022       * Take as many buffers as configured and pass to the driver. Note, the
2023       * API to the drivers has the array of buffers and if a chain was passed
2024       * we could have just passed the list. If the driver API is updated it
2025       * should be possible to make this change with little effect in this
2026       * code. The array that is passed is broken in design and should be
2027       * removed. Merging to members of a struct into the first member is
2028       * trouble waiting to happen.
[3899a537]2029       */
2030
2031      write_req->status = RTEMS_RESOURCE_IN_USE;
2032      write_req->error = 0;
2033      write_req->bufnum = 0;
2034
2035      while (!rtems_chain_is_empty (&transfer))
2036      {
2037        rtems_bdbuf_buffer* bd =
2038          (rtems_bdbuf_buffer*) rtems_chain_get (&transfer);
2039
[8c5d3743]2040        bool write = false;
[3899a537]2041       
2042        /*
[c21c850e]2043         * If the device only accepts sequential buffers and this is not the
2044         * first buffer (the first is always sequential, and the buffer is not
2045         * sequential then put the buffer back on the transfer chain and write
2046         * the committed buffers.
[3899a537]2047         */
2048       
2049        if ((dd->capabilities & RTEMS_BLKDEV_CAP_MULTISECTOR_CONT) &&
[2eb89ad]2050            write_req->bufnum &&
[3899a537]2051            (bd->block != (last_block + 1)))
2052        {
2053          rtems_chain_prepend (&transfer, &bd->link);
[4f971343]2054          write = true;
[3899a537]2055        }
2056        else
2057        {
[2eb89ad]2058          write_req->bufs[write_req->bufnum].user   = bd;
2059          write_req->bufs[write_req->bufnum].block  = bd->block;
2060          write_req->bufs[write_req->bufnum].length = dd->block_size;
2061          write_req->bufs[write_req->bufnum].buffer = bd->buffer;
[3899a537]2062          write_req->bufnum++;
2063          last_block = bd->block;
2064        }
2065
2066        /*
[c21c850e]2067         * Perform the transfer if there are no more buffers, or the transfer
2068         * size has reached the configured max. value.
[3899a537]2069         */
[048dcd2b]2070
[3899a537]2071        if (rtems_chain_is_empty (&transfer) ||
[2eb89ad]2072            (write_req->bufnum >= rtems_bdbuf_configuration.max_write_blocks))
[4f971343]2073          write = true;
[048dcd2b]2074
[3899a537]2075        if (write)
[e51bd96]2076        {
[3899a537]2077          int result;
[cec5c069]2078          uint32_t b;
[3899a537]2079
2080          /*
[c21c850e]2081           * Perform the transfer. No pool locks, no preemption, only the disk
2082           * device is being held.
[3899a537]2083           */
2084          result = dd->ioctl (dd->phys_dev->dev,
2085                              RTEMS_BLKIO_REQUEST, write_req);
2086
2087          if (result < 0)
2088          {
2089            rtems_bdbuf_lock_pool (pool);
2090             
[2eb89ad]2091            for (b = 0; b < write_req->bufnum; b++)
[3899a537]2092            {
2093              bd = write_req->bufs[b].user;
2094              bd->state  = RTEMS_BDBUF_STATE_MODIFIED;
2095              bd->error = errno;
2096
2097              /*
2098               * Place back on the pools modified queue and try again.
2099               *
[c21c850e]2100               * @warning Not sure this is the best option but I do not know
2101               *          what else can be done.
[3899a537]2102               */
2103              rtems_chain_append (&pool->modified, &bd->link);
2104            }
2105          }
2106          else
2107          {
2108            rtems_status_code sc = 0;
2109            rtems_event_set   out;
2110
2111            sc = rtems_event_receive (RTEMS_BDBUF_TRANSFER_SYNC,
2112                                      RTEMS_EVENT_ALL | RTEMS_WAIT,
2113                                      0, &out);
2114
2115            if (sc != RTEMS_SUCCESSFUL)
2116              rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
2117
2118            rtems_bdbuf_lock_pool (pool);
2119
[2eb89ad]2120            for (b = 0; b < write_req->bufnum; b++)
[3899a537]2121            {
2122              bd = write_req->bufs[b].user;
2123              bd->state = RTEMS_BDBUF_STATE_CACHED;
2124              bd->error = 0;
2125
2126              rtems_chain_append (&pool->lru, &bd->link);
2127             
2128              if (bd->waiters)
2129                rtems_bdbuf_wake (pool->transfer, &pool->transfer_waiters);
2130              else
2131              {
2132                if (rtems_chain_has_only_one_node (&pool->lru))
2133                  rtems_bdbuf_wake (pool->waiting, &pool->wait_waiters);
2134              }
2135            }
2136          }
2137
2138          rtems_bdbuf_unlock_pool (pool);
2139
2140          write_req->status = RTEMS_RESOURCE_IN_USE;
2141          write_req->error = 0;
2142          write_req->bufnum = 0;
[e51bd96]2143        }
[3899a537]2144      }
2145         
2146      rtems_disk_release (dd);
2147    }
2148  }
2149
[c21c850e]2150  if (pool->sync_active && !  transfered_buffers)
2151  {
2152    rtems_id sync_requester = pool->sync_requester;
[4f971343]2153    pool->sync_active = false;
[c21c850e]2154    pool->sync_requester = 0;
2155    if (sync_requester)
2156      rtems_event_send (sync_requester, RTEMS_BDBUF_TRANSFER_SYNC);
2157  }
[3899a537]2158 
[c21c850e]2159  return  transfered_buffers;
[e51bd96]2160}
2161
[3899a537]2162/**
[c21c850e]2163 * Body of task which takes care on flushing modified buffers to the disk.
2164 *
2165 * @param arg The task argument which is the context.
[e51bd96]2166 */
[8b96149]2167static rtems_task
[3899a537]2168rtems_bdbuf_swapout_task (rtems_task_argument arg)
[e51bd96]2169{
[3899a537]2170  rtems_bdbuf_context*  context = (rtems_bdbuf_context*) arg;
2171  rtems_blkdev_request* write_req;
2172  uint32_t              period_in_ticks;
2173  const uint32_t        period_in_msecs = rtems_bdbuf_configuration.swapout_period;
2174  uint32_t              timer_delta;
2175  rtems_status_code     sc;
2176
2177  /*
2178   * @note chrisj The rtems_blkdev_request and the array at the end is a hack.
2179   * I am disappointment at finding code like this in RTEMS. The request should
2180   * have been a rtems_chain_control. Simple, fast and less storage as the node
2181   * is already part of the buffer structure.
2182   */
2183  write_req =
2184    malloc (sizeof (rtems_blkdev_request) +
2185            (rtems_bdbuf_configuration.max_write_blocks *
2186             sizeof (rtems_blkdev_sg_buffer)));
2187
2188  if (!write_req)
2189    rtems_fatal_error_occurred (RTEMS_BLKDEV_FATAL_BDBUF_SO_NOMEM);
2190
2191  write_req->req = RTEMS_BLKDEV_REQ_WRITE;
2192  write_req->req_done = rtems_bdbuf_write_done;
2193  write_req->done_arg = write_req;
2194  write_req->io_task = rtems_task_self ();
2195
[26fb4aa]2196  period_in_ticks = RTEMS_MICROSECONDS_TO_TICKS (period_in_msecs * 1000);
[3899a537]2197
2198  /*
[c21c850e]2199   * This is temporary. Needs to be changed to use the real time clock.
[3899a537]2200   */
2201  timer_delta = period_in_msecs;
2202
2203  while (context->swapout_enabled)
2204  {
2205    rtems_event_set out;
[e51bd96]2206
[0ebfac19]2207    /*
[3899a537]2208     * Only update the timers once in the processing cycle.
2209     */
[4f971343]2210    bool update_timers = true;
[3899a537]2211   
2212    /*
[c21c850e]2213     * If we write buffers to any disk perform a check again. We only write a
2214     * single device at a time and a pool may have more than one devices
2215     * buffers modified waiting to be written.
[0ebfac19]2216     */
[4f971343]2217    bool transfered_buffers;
[3899a537]2218
2219    do
[e51bd96]2220    {
[3899a537]2221      rtems_bdpool_id pid;
2222   
[4f971343]2223      transfered_buffers = false;
[3899a537]2224
2225      /*
2226       * Loop over each pool extacting all the buffers we find for a specific
2227       * device. The device is the first one we find on a modified list of a
2228       * pool. Process the sync queue of buffers first.
2229       */
2230      for (pid = 0; pid < context->npools; pid++)
2231      {
2232        if (rtems_bdbuf_swapout_pool_processing (pid,
2233                                                 timer_delta,
2234                                                 update_timers,
2235                                                 write_req))
2236        {
[4f971343]2237          transfered_buffers = true;
[e51bd96]2238        }
[3899a537]2239      }
2240
2241      /*
2242       * Only update the timers once.
2243       */
[4f971343]2244      update_timers = false;
[e51bd96]2245    }
[3899a537]2246    while (transfered_buffers);
2247
2248    sc = rtems_event_receive (RTEMS_BDBUF_SWAPOUT_SYNC,
2249                              RTEMS_EVENT_ALL | RTEMS_WAIT,
2250                              period_in_ticks,
2251                              &out);
2252
2253    if ((sc != RTEMS_SUCCESSFUL) && (sc != RTEMS_TIMEOUT))
2254      rtems_fatal_error_occurred (BLKDEV_FATAL_BDBUF_SWAPOUT_RE);
2255  }
2256
2257  free (write_req);
2258
2259  rtems_task_delete (RTEMS_SELF);
[e51bd96]2260}
2261
2262rtems_status_code
[cec5c069]2263rtems_bdbuf_find_pool (uint32_t block_size, rtems_bdpool_id *pool)
[e51bd96]2264{
[3899a537]2265  rtems_bdbuf_pool* p;
2266  rtems_bdpool_id   i;
2267  rtems_bdpool_id   curid = -1;
[4f971343]2268  bool              found = false;
[cec5c069]2269  uint32_t          cursize = UINT_MAX;
[3899a537]2270  int               j;
2271
2272  for (j = block_size; (j != 0) && ((j & 1) == 0); j >>= 1);
2273  if (j != 1)
2274    return RTEMS_INVALID_SIZE;
2275
2276  for (i = 0; i < rtems_bdbuf_ctx.npools; i++)
2277  {
2278    p = rtems_bdbuf_get_pool (i);
2279    if ((p->blksize >= block_size) &&
2280        (p->blksize < cursize))
[e51bd96]2281    {
[3899a537]2282      curid = i;
2283      cursize = p->blksize;
[4f971343]2284      found = true;
[e51bd96]2285    }
[3899a537]2286  }
[048dcd2b]2287
[3899a537]2288  if (found)
2289  {
2290    if (pool != NULL)
2291      *pool = curid;
2292    return RTEMS_SUCCESSFUL;
2293  }
2294  else
2295  {
2296    return RTEMS_NOT_DEFINED;
2297  }
[e51bd96]2298}
2299
[57aa979]2300rtems_status_code rtems_bdbuf_get_pool_info(
2301  rtems_bdpool_id pool,
2302  uint32_t *block_size,
2303  uint32_t *blocks
2304)
[e51bd96]2305{
[3899a537]2306  if (pool >= rtems_bdbuf_ctx.npools)
2307    return RTEMS_INVALID_NUMBER;
[048dcd2b]2308
[3899a537]2309  if (block_size != NULL)
2310  {
2311    *block_size = rtems_bdbuf_ctx.pool[pool].blksize;
2312  }
[048dcd2b]2313
[3899a537]2314  if (blocks != NULL)
2315  {
2316    *blocks = rtems_bdbuf_ctx.pool[pool].nblks;
2317  }
[048dcd2b]2318
[3899a537]2319  return RTEMS_SUCCESSFUL;
[e51bd96]2320}
Note: See TracBrowser for help on using the repository browser.