source: rtems-libbsd/freebsd/sys/arm/at91/at91_mci.c @ 854427b

55-freebsd-126-freebsd-12
Last change on this file since 854427b was 9569b39, checked in by Christian Mauderer <Christian.Mauderer@…>, on 11/17/17 at 13:23:55

at91_mci: Fix for 32k buffer.

  • Property mode set to 100644
File size: 49.3 KB
Line 
1#include <machine/rtems-bsd-kernel-space.h>
2
3/*-
4 * Copyright (c) 2006 Bernd Walter.  All rights reserved.
5 * Copyright (c) 2006 M. Warner Losh.  All rights reserved.
6 * Copyright (c) 2010 Greg Ansley.  All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 *    notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 *    notice, this list of conditions and the following disclaimer in the
15 *    documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#include <rtems/bsd/local/opt_platform.h>
31
32#include <sys/cdefs.h>
33__FBSDID("$FreeBSD$");
34
35#include <sys/param.h>
36#include <sys/systm.h>
37#include <sys/bus.h>
38#include <sys/endian.h>
39#include <sys/kernel.h>
40#include <sys/lock.h>
41#include <sys/malloc.h>
42#include <sys/module.h>
43#include <sys/mutex.h>
44#include <rtems/bsd/sys/resource.h>
45#include <sys/rman.h>
46#include <sys/sysctl.h>
47
48#include <machine/bus.h>
49#include <machine/resource.h>
50#include <machine/intr.h>
51
52#include <arm/at91/at91var.h>
53#include <arm/at91/at91_mcireg.h>
54#include <arm/at91/at91_pdcreg.h>
55
56#include <dev/mmc/bridge.h>
57#include <dev/mmc/mmcbrvar.h>
58
59#ifdef FDT
60#include <dev/ofw/ofw_bus.h>
61#include <dev/ofw/ofw_bus_subr.h>
62#endif
63
64#include <rtems/bsd/local/mmcbr_if.h>
65
66#include <rtems/bsd/local/opt_at91.h>
67
68#ifdef __rtems__
69#include <bsp.h>
70#endif /* __rtems__ */
71#if defined(__rtems) && defined(LIBBSP_ARM_ATSAM_BSP_H)
72#ifdef __rtems__
73#include <libchip/chip.h>
74
75#define AT91_MCI_HAS_4WIRE 1
76
77uint32_t at91_master_clock = BOARD_MCK;
78
79static sXdmad *pXdmad = &XDMAD_Instance;
80#endif /* __rtems__ */
81/*
82 * About running the MCI bus above 25MHz
83 *
84 * Historically, the MCI bus has been run at 30MHz on systems with a 60MHz
85 * master clock, in part due to a bug in dev/mmc.c making always request
86 * 30MHz, and in part over clocking the bus because 15MHz was too slow.
87 * Fixing that bug causes the mmc driver to request a 25MHz clock (as it
88 * should) and the logic in at91_mci_update_ios() picks the highest speed that
89 * doesn't exceed that limit.  With a 60MHz MCK that would be 15MHz, and
90 * that's a real performance buzzkill when you've been getting away with 30MHz
91 * all along.
92 *
93 * By defining AT91_MCI_ALLOW_OVERCLOCK (or setting the allow_overclock=1
94 * device hint or sysctl) you can enable logic in at91_mci_update_ios() to
95 * overlcock the SD bus a little by running it at MCK / 2 when the requested
96 * speed is 25MHz and the next highest speed is 15MHz or less.  This appears
97 * to work on virtually all SD cards, since it is what this driver has been
98 * doing prior to the introduction of this option, where the overclocking vs
99 * underclocking decision was automatically "overclock".  Modern SD cards can
100 * run at 45mhz/1-bit in standard mode (high speed mode enable commands not
101 * sent) without problems.
102 *
103 * Speaking of high-speed mode, the rm9200 manual says the MCI device supports
104 * the SD v1.0 specification and can run up to 50MHz.  This is interesting in
105 * that the SD v1.0 spec caps the speed at 25MHz; high speed mode was added in
106 * the v1.10 spec.  Furthermore, high speed mode doesn't just crank up the
107 * clock, it alters the signal timing.  The rm9200 MCI device doesn't support
108 * these altered timings.  So while speeds over 25MHz may work, they only work
109 * in what the SD spec calls "default" speed mode, and it amounts to violating
110 * the spec by overclocking the bus.
111 *
112 * If you also enable 4-wire mode it's possible transfers faster than 25MHz
113 * will fail.  On the AT91RM9200, due to bugs in the bus contention logic, if
114 * you have the USB host device and OHCI driver enabled will fail.  Even
115 * underclocking to 15MHz, intermittant overrun and underrun errors occur.
116 * Note that you don't even need to have usb devices attached to the system,
117 * the errors begin to occur as soon as the OHCI driver sets the register bit
118 * to enable periodic transfers.  It appears (based on brief investigation)
119 * that the usb host controller uses so much ASB bandwidth that sometimes the
120 * DMA for MCI transfers doesn't get a bus grant in time and data gets
121 * dropped.  Adding even a modicum of network activity changes the symptom
122 * from intermittant to very frequent.  Members of the AT91SAM9 family have
123 * corrected this problem, or are at least better about their use of the bus.
124 */
125#ifndef AT91_MCI_ALLOW_OVERCLOCK
126#define AT91_MCI_ALLOW_OVERCLOCK 1
127#endif
128
129/*
130 * Allocate 2 bounce buffers we'll use to endian-swap the data due to the rm9200
131 * erratum.  We use a pair of buffers because when reading that lets us begin
132 * endian-swapping the data in the first buffer while the DMA is reading into
133 * the second buffer.  (We can't use the same trick for writing because we might
134 * not get all the data in the 2nd buffer swapped before the hardware needs it;
135 * dealing with that would add complexity to the driver.)
136 *
137 * The buffers are sized at 16K each due to the way the busdma cache sync
138 * operations work on arm.  A dcache_inv_range() operation on a range larger
139 * than 16K gets turned into a dcache_wbinv_all().  That needlessly flushes the
140 * entire data cache, impacting overall system performance.
141 */
142#define BBCOUNT     2
143#ifndef __rtems__
144#define BBSIZE      (16*1024)
145#define MAX_BLOCKS  ((BBSIZE*BBCOUNT)/512)
146#else /* __rtems__ */
147#define BBSIZE      (32*1024)
148#define MAX_BLOCKS  ((BBSIZE)/512)
149/* FIXME: It would be better to split the DMA up in that case like in the
150 * original driver. But that would need some rework. */
151#endif /* __rtems__ */
152
153static int mci_debug;
154
155struct at91_mci_softc {
156        void *intrhand;                 /* Interrupt handle */
157        device_t dev;
158        int sc_cap;
159#define CAP_HAS_4WIRE           1       /* Has 4 wire bus */
160#define CAP_NEEDS_BYTESWAP      2       /* broken hardware needing bounce */
161#define CAP_MCI1_REV2XX         4       /* MCI 1 rev 2.x */
162        int flags;
163#define PENDING_CMD     0x01
164#define PENDING_STOP    0x02
165#define CMD_MULTIREAD   0x10
166#define CMD_MULTIWRITE  0x20
167        int has_4wire;
168        int allow_overclock;
169        struct resource *irq_res;       /* IRQ resource */
170        struct resource *mem_res;       /* Memory resource */
171        struct mtx sc_mtx;
172        bus_dma_tag_t dmatag;
173        struct mmc_host host;
174        int bus_busy;
175        struct mmc_request *req;
176        struct mmc_command *curcmd;
177        bus_dmamap_t bbuf_map[BBCOUNT];
178        char      *  bbuf_vaddr[BBCOUNT]; /* bounce bufs in KVA space */
179        uint32_t     bbuf_len[BBCOUNT];   /* len currently queued for bounce buf */
180        uint32_t     bbuf_curidx;         /* which bbuf is the active DMA buffer */
181        uint32_t     xfer_offset;         /* offset so far into caller's buf */
182#ifdef __rtems__
183        uint32_t xdma_tx_channel;
184        uint32_t xdma_rx_channel;
185        uint8_t xdma_tx_perid;
186        uint8_t xdma_rx_perid;
187        sXdmadCfg xdma_tx_cfg;
188        sXdmadCfg xdma_rx_cfg;
189#endif /* __rtems__ */
190};
191
192/* bus entry points */
193static int at91_mci_probe(device_t dev);
194static int at91_mci_attach(device_t dev);
195static int at91_mci_detach(device_t dev);
196static void at91_mci_intr(void *);
197
198/* helper routines */
199static int at91_mci_activate(device_t dev);
200static void at91_mci_deactivate(device_t dev);
201static int at91_mci_is_mci1rev2xx(void);
202#ifdef __rtems__
203static void at91_mci_read_done(struct at91_mci_softc *sc, uint32_t sr);
204static void at91_mci_write_done(struct at91_mci_softc *sc, uint32_t sr);
205#endif /* __rtems__ */
206
207#define AT91_MCI_LOCK(_sc)              mtx_lock(&(_sc)->sc_mtx)
208#define AT91_MCI_UNLOCK(_sc)            mtx_unlock(&(_sc)->sc_mtx)
209#define AT91_MCI_LOCK_INIT(_sc) \
210        mtx_init(&_sc->sc_mtx, device_get_nameunit(_sc->dev), \
211            "mci", MTX_DEF)
212#define AT91_MCI_LOCK_DESTROY(_sc)      mtx_destroy(&_sc->sc_mtx);
213#define AT91_MCI_ASSERT_LOCKED(_sc)     mtx_assert(&_sc->sc_mtx, MA_OWNED);
214#define AT91_MCI_ASSERT_UNLOCKED(_sc) mtx_assert(&_sc->sc_mtx, MA_NOTOWNED);
215
216static inline uint32_t
217RD4(struct at91_mci_softc *sc, bus_size_t off)
218{
219        return (bus_read_4(sc->mem_res, off));
220}
221
222static inline void
223WR4(struct at91_mci_softc *sc, bus_size_t off, uint32_t val)
224{
225        bus_write_4(sc->mem_res, off, val);
226}
227
228static void
229at91_bswap_buf(struct at91_mci_softc *sc, void * dptr, void * sptr, uint32_t memsize)
230{
231        uint32_t * dst = (uint32_t *)dptr;
232        uint32_t * src = (uint32_t *)sptr;
233        uint32_t   i;
234
235        /*
236         * If the hardware doesn't need byte-swapping, let bcopy() do the
237         * work.  Use bounce buffer even if we don't need byteswap, since
238         * buffer may straddle a page boundary, and we don't handle
239         * multi-segment transfers in hardware.  Seen from 'bsdlabel -w' which
240         * uses raw geom access to the volume.  Greg Ansley (gja (at)
241         * ansley.com)
242         */
243        if (!(sc->sc_cap & CAP_NEEDS_BYTESWAP)) {
244                memcpy(dptr, sptr, memsize);
245                return;
246        }
247
248        /*
249         * Nice performance boost for slightly unrolling this loop.
250         * (But very little extra boost for further unrolling it.)
251         */
252        for (i = 0; i < memsize; i += 16) {
253                *dst++ = bswap32(*src++);
254                *dst++ = bswap32(*src++);
255                *dst++ = bswap32(*src++);
256                *dst++ = bswap32(*src++);
257        }
258
259        /* Mop up the last 1-3 words, if any. */
260        for (i = 0; i < (memsize & 0x0F); i += 4) {
261                *dst++ = bswap32(*src++);
262        }
263}
264
265static void
266at91_mci_getaddr(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
267{
268        if (error != 0)
269                return;
270        *(bus_addr_t *)arg = segs[0].ds_addr;
271}
272
273static void
274at91_mci_pdc_disable(struct at91_mci_softc *sc)
275{
276#ifndef __rtems__
277        WR4(sc, PDC_PTCR, PDC_PTCR_TXTDIS | PDC_PTCR_RXTDIS);
278        WR4(sc, PDC_RPR, 0);
279        WR4(sc, PDC_RCR, 0);
280        WR4(sc, PDC_RNPR, 0);
281        WR4(sc, PDC_RNCR, 0);
282        WR4(sc, PDC_TPR, 0);
283        WR4(sc, PDC_TCR, 0);
284        WR4(sc, PDC_TNPR, 0);
285        WR4(sc, PDC_TNCR, 0);
286#else /* __rtems__ */
287        /* On SAMV71 there is no PDC but a DMAC */
288        XDMAD_StopTransfer(pXdmad, sc->xdma_rx_channel);
289        XDMAD_StopTransfer(pXdmad, sc->xdma_tx_channel);
290        WR4(sc, MCI_DMA, 0);
291#endif /* __rtems__ */
292}
293
294/*
295 * Reset the controller, then restore most of the current state.
296 *
297 * This is called after detecting an error.  It's also called after stopping a
298 * multi-block write, to un-wedge the device so that it will handle the NOTBUSY
299 * signal correctly.  See comments in at91_mci_stop_done() for more details.
300 */
301static void at91_mci_reset(struct at91_mci_softc *sc)
302{
303        uint32_t mr;
304        uint32_t sdcr;
305        uint32_t dtor;
306        uint32_t imr;
307
308        at91_mci_pdc_disable(sc);
309
310        /* save current state */
311
312        imr  = RD4(sc, MCI_IMR);
313#ifndef __rtems__
314        mr   = RD4(sc, MCI_MR) & 0x7fff;
315#else /* __rtems__ */
316        mr   = RD4(sc, MCI_MR);
317#endif /* __rtems__ */
318        sdcr = RD4(sc, MCI_SDCR);
319        dtor = RD4(sc, MCI_DTOR);
320
321        /* reset the controller */
322
323        WR4(sc, MCI_IDR, 0xffffffff);
324        WR4(sc, MCI_CR, MCI_CR_MCIDIS | MCI_CR_SWRST);
325
326        /* restore state */
327
328        WR4(sc, MCI_CR, MCI_CR_MCIEN|MCI_CR_PWSEN);
329        WR4(sc, MCI_MR, mr);
330        WR4(sc, MCI_SDCR, sdcr);
331        WR4(sc, MCI_DTOR, dtor);
332        WR4(sc, MCI_IER, imr);
333
334        /*
335         * Make sure sdio interrupts will fire.  Not sure why reading
336         * SR ensures that, but this is in the linux driver.
337         */
338
339        RD4(sc, MCI_SR);
340}
341
342static void
343at91_mci_init(device_t dev)
344{
345        struct at91_mci_softc *sc = device_get_softc(dev);
346        uint32_t val;
347
348        WR4(sc, MCI_CR, MCI_CR_MCIDIS | MCI_CR_SWRST); /* device into reset */
349        WR4(sc, MCI_IDR, 0xffffffff);           /* Turn off interrupts */
350        WR4(sc, MCI_DTOR, MCI_DTOR_DTOMUL_1M | 1);
351#ifndef __rtems__
352        val = MCI_MR_PDCMODE;
353#else /* __rtems__ */
354        val = 0;
355        val |= MCI_MR_RDPROOF | MCI_MR_WRPROOF;
356#endif /* __rtems__ */
357        val |= 0x34a;                           /* PWSDIV = 3; CLKDIV = 74 */
358//      if (sc->sc_cap & CAP_MCI1_REV2XX)
359//              val |= MCI_MR_RDPROOF | MCI_MR_WRPROOF;
360        WR4(sc, MCI_MR, val);
361#ifndef  AT91_MCI_SLOT_B
362        WR4(sc, MCI_SDCR, 0);                   /* SLOT A, 1 bit bus */
363#else
364        /*
365         * XXX Really should add second "unit" but nobody using using
366         * a two slot card that we know of. XXX
367         */
368        WR4(sc, MCI_SDCR, 1);                   /* SLOT B, 1 bit bus */
369#endif
370        /*
371         * Enable controller, including power-save.  The slower clock
372         * of the power-save mode is only in effect when there is no
373         * transfer in progress, so it can be left in this mode all
374         * the time.
375         */
376        WR4(sc, MCI_CR, MCI_CR_MCIEN|MCI_CR_PWSEN);
377}
378
379static void
380at91_mci_fini(device_t dev)
381{
382        struct at91_mci_softc *sc = device_get_softc(dev);
383
384        WR4(sc, MCI_IDR, 0xffffffff);           /* Turn off interrupts */
385        at91_mci_pdc_disable(sc);
386        WR4(sc, MCI_CR, MCI_CR_MCIDIS | MCI_CR_SWRST); /* device into reset */
387}
388
389static int
390at91_mci_probe(device_t dev)
391{
392#ifdef FDT
393        if (!ofw_bus_is_compatible(dev, "atmel,hsmci"))
394                return (ENXIO);
395#endif
396        device_set_desc(dev, "MCI mmc/sd host bridge");
397        return (0);
398}
399
400static int
401at91_mci_attach(device_t dev)
402{
403        struct at91_mci_softc *sc = device_get_softc(dev);
404        struct sysctl_ctx_list *sctx;
405        struct sysctl_oid *soid;
406        device_t child;
407        int err, i;
408
409#ifdef __rtems__
410#ifdef LIBBSP_ARM_ATSAM_BSP_H
411        PMC_EnablePeripheral(ID_HSMCI);
412        sc->xdma_tx_channel = XDMAD_ALLOC_FAILED;
413        sc->xdma_rx_channel = XDMAD_ALLOC_FAILED;
414#endif /* LIBBSP_ARM_ATSAM_BSP_H */
415#endif /* __rtems__ */
416        sctx = device_get_sysctl_ctx(dev);
417        soid = device_get_sysctl_tree(dev);
418
419        sc->dev = dev;
420        sc->sc_cap = 0;
421#ifndef __rtems__
422        if (at91_is_rm92())
423                sc->sc_cap |= CAP_NEEDS_BYTESWAP;
424#endif /* __rtems__ */
425        /*
426         * MCI1 Rev 2 controllers need some workarounds, flag if so.
427         */
428        if (at91_mci_is_mci1rev2xx())
429                sc->sc_cap |= CAP_MCI1_REV2XX;
430
431        err = at91_mci_activate(dev);
432        if (err)
433                goto out;
434
435#ifdef __rtems__
436        eXdmadRC rc;
437
438        /* Prepare some configurations so they don't have to be fetched on every
439         * setup */
440        sc->xdma_rx_perid = XDMAIF_Get_ChannelNumber(ID_HSMCI,
441            XDMAD_TRANSFER_RX);
442        sc->xdma_tx_perid = XDMAIF_Get_ChannelNumber(ID_HSMCI,
443            XDMAD_TRANSFER_TX);
444        memset(&sc->xdma_rx_cfg, 0, sizeof(sc->xdma_rx_cfg));
445        sc->xdma_rx_cfg.mbr_cfg = XDMAC_CC_TYPE_PER_TRAN |
446            XDMAC_CC_MBSIZE_SINGLE | XDMAC_CC_DSYNC_PER2MEM |
447            XDMAC_CC_SWREQ_HWR_CONNECTED | XDMAC_CC_MEMSET_NORMAL_MODE |
448            XDMAC_CC_CSIZE_CHK_1 | XDMAC_CC_DWIDTH_WORD |
449            XDMAC_CC_SIF_AHB_IF1 | XDMAC_CC_DIF_AHB_IF1 |
450            XDMAC_CC_SAM_FIXED_AM | XDMAC_CC_DAM_INCREMENTED_AM |
451            XDMAC_CC_PERID(
452                XDMAIF_Get_ChannelNumber(ID_HSMCI,XDMAD_TRANSFER_RX));
453        memset(&sc->xdma_tx_cfg, 0, sizeof(sc->xdma_tx_cfg));
454        sc->xdma_tx_cfg.mbr_cfg = XDMAC_CC_TYPE_PER_TRAN |
455            XDMAC_CC_MBSIZE_SINGLE | XDMAC_CC_DSYNC_MEM2PER |
456            XDMAC_CC_SWREQ_HWR_CONNECTED | XDMAC_CC_MEMSET_NORMAL_MODE |
457            XDMAC_CC_CSIZE_CHK_1 | XDMAC_CC_DWIDTH_WORD |
458            XDMAC_CC_SIF_AHB_IF1 | XDMAC_CC_DIF_AHB_IF1 |
459            XDMAC_CC_SAM_INCREMENTED_AM | XDMAC_CC_DAM_FIXED_AM |
460            XDMAC_CC_PERID(
461                XDMAIF_Get_ChannelNumber(ID_HSMCI,XDMAD_TRANSFER_TX));
462
463        sc->xdma_tx_channel = XDMAD_AllocateChannel(pXdmad,
464            XDMAD_TRANSFER_MEMORY, ID_HSMCI);
465        if (sc->xdma_tx_channel == XDMAD_ALLOC_FAILED)
466                goto out;
467
468        /* FIXME: The two DMA channels are not really necessary for the driver.
469         * But the XDMAD interface does not allow to allocate one and use it
470         * into two directions. The current (2017-07-11) implementation of
471         * the XDMAD interface should work with it. So we might could try it. */
472        sc->xdma_rx_channel = XDMAD_AllocateChannel(pXdmad, ID_HSMCI,
473            XDMAD_TRANSFER_MEMORY);
474        if (sc->xdma_rx_channel == XDMAD_ALLOC_FAILED)
475                goto out;
476
477        rc = XDMAD_PrepareChannel(pXdmad, sc->xdma_rx_channel);
478        if (rc != XDMAD_OK)
479                goto out;
480
481        rc = XDMAD_PrepareChannel(pXdmad, sc->xdma_tx_channel);
482        if (rc != XDMAD_OK)
483                goto out;
484
485#endif /* __rtems__ */
486        AT91_MCI_LOCK_INIT(sc);
487
488        at91_mci_fini(dev);
489        at91_mci_init(dev);
490
491        /*
492         * Allocate DMA tags and maps and bounce buffers.
493         *
494         * The parms in the tag_create call cause the dmamem_alloc call to
495         * create each bounce buffer as a single contiguous buffer of BBSIZE
496         * bytes aligned to a 4096 byte boundary.
497         *
498         * Do not use DMA_COHERENT for these buffers because that maps the
499         * memory as non-cachable, which prevents cache line burst fills/writes,
500         * which is something we need since we're trying to overlap the
501         * byte-swapping with the DMA operations.
502         */
503        err = bus_dma_tag_create(bus_get_dma_tag(dev), 4096, 0,
504            BUS_SPACE_MAXADDR_32BIT, BUS_SPACE_MAXADDR, NULL, NULL,
505            BBSIZE, 1, BBSIZE, 0, NULL, NULL, &sc->dmatag);
506        if (err != 0)
507                goto out;
508
509        for (i = 0; i < BBCOUNT; ++i) {
510                err = bus_dmamem_alloc(sc->dmatag, (void **)&sc->bbuf_vaddr[i],
511                    BUS_DMA_NOWAIT, &sc->bbuf_map[i]);
512                if (err != 0)
513                        goto out;
514        }
515
516        /*
517         * Activate the interrupt
518         */
519        err = bus_setup_intr(dev, sc->irq_res, INTR_TYPE_MISC | INTR_MPSAFE,
520            NULL, at91_mci_intr, sc, &sc->intrhand);
521        if (err) {
522                AT91_MCI_LOCK_DESTROY(sc);
523                goto out;
524        }
525
526        /*
527         * Allow 4-wire to be initially set via #define.
528         * Allow a device hint to override that.
529         * Allow a sysctl to override that.
530         */
531#if defined(AT91_MCI_HAS_4WIRE) && AT91_MCI_HAS_4WIRE != 0
532        sc->has_4wire = 1;
533#endif
534        resource_int_value(device_get_name(dev), device_get_unit(dev),
535                           "4wire", &sc->has_4wire);
536        SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "4wire",
537            CTLFLAG_RW, &sc->has_4wire, 0, "has 4 wire SD Card bus");
538        if (sc->has_4wire)
539                sc->sc_cap |= CAP_HAS_4WIRE;
540
541        sc->allow_overclock = AT91_MCI_ALLOW_OVERCLOCK;
542        resource_int_value(device_get_name(dev), device_get_unit(dev),
543                           "allow_overclock", &sc->allow_overclock);
544        SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "allow_overclock",
545            CTLFLAG_RW, &sc->allow_overclock, 0,
546            "Allow up to 30MHz clock for 25MHz request when next highest speed 15MHz or less.");
547
548        SYSCTL_ADD_UINT(sctx, SYSCTL_CHILDREN(soid), OID_AUTO, "debug",
549            CTLFLAG_RWTUN, &mci_debug, 0, "enable debug output");
550
551        /*
552         * Our real min freq is master_clock/512, but upper driver layers are
553         * going to set the min speed during card discovery, and the right speed
554         * for that is 400kHz, so advertise a safe value just under that.
555         *
556         * For max speed, while the rm9200 manual says the max is 50mhz, it also
557         * says it supports only the SD v1.0 spec, which means the real limit is
558         * 25mhz. On the other hand, historical use has been to slightly violate
559         * the standard by running the bus at 30MHz.  For more information on
560         * that, see the comments at the top of this file.
561         */
562        sc->host.f_min = 375000;
563        sc->host.f_max = at91_master_clock / 2;
564        if (sc->host.f_max > 25000000)
565                sc->host.f_max = 25000000;
566        sc->host.host_ocr = MMC_OCR_320_330 | MMC_OCR_330_340;
567        sc->host.caps = 0;
568        if (sc->sc_cap & CAP_HAS_4WIRE)
569                sc->host.caps |= MMC_CAP_4_BIT_DATA;
570
571        child = device_add_child(dev, "mmc", 0);
572        device_set_ivars(dev, &sc->host);
573        err = bus_generic_attach(dev);
574out:
575        if (err)
576                at91_mci_deactivate(dev);
577        return (err);
578}
579
580static int
581at91_mci_detach(device_t dev)
582{
583        struct at91_mci_softc *sc = device_get_softc(dev);
584
585        at91_mci_fini(dev);
586        at91_mci_deactivate(dev);
587
588        bus_dmamem_free(sc->dmatag, sc->bbuf_vaddr[0], sc->bbuf_map[0]);
589        bus_dmamem_free(sc->dmatag, sc->bbuf_vaddr[1], sc->bbuf_map[1]);
590        bus_dma_tag_destroy(sc->dmatag);
591
592        return (EBUSY); /* XXX */
593}
594
595static int
596at91_mci_activate(device_t dev)
597{
598        struct at91_mci_softc *sc;
599        int rid;
600
601        sc = device_get_softc(dev);
602        rid = 0;
603        sc->mem_res = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
604            RF_ACTIVE);
605        if (sc->mem_res == NULL)
606                goto errout;
607
608        rid = 0;
609        sc->irq_res = bus_alloc_resource_any(dev, SYS_RES_IRQ, &rid,
610            RF_ACTIVE);
611        if (sc->irq_res == NULL)
612                goto errout;
613
614        return (0);
615errout:
616        at91_mci_deactivate(dev);
617        return (ENOMEM);
618}
619
620static void
621at91_mci_deactivate(device_t dev)
622{
623        struct at91_mci_softc *sc;
624
625        sc = device_get_softc(dev);
626        if (sc->intrhand)
627                bus_teardown_intr(dev, sc->irq_res, sc->intrhand);
628        sc->intrhand = NULL;
629        bus_generic_detach(sc->dev);
630        if (sc->mem_res)
631                bus_release_resource(dev, SYS_RES_MEMORY,
632                    rman_get_rid(sc->mem_res), sc->mem_res);
633        sc->mem_res = NULL;
634        if (sc->irq_res)
635                bus_release_resource(dev, SYS_RES_IRQ,
636                    rman_get_rid(sc->irq_res), sc->irq_res);
637        sc->irq_res = NULL;
638#ifdef __rtems__
639        if (sc->xdma_rx_channel != XDMAD_ALLOC_FAILED) {
640                XDMAD_FreeChannel(pXdmad, sc->xdma_rx_channel);
641        }
642        if (sc->xdma_tx_channel != XDMAD_ALLOC_FAILED) {
643                XDMAD_FreeChannel(pXdmad, sc->xdma_tx_channel);
644        }
645#endif /* __rtems__ */
646        return;
647}
648
649static int
650at91_mci_is_mci1rev2xx(void)
651{
652
653#ifndef __rtems__
654        switch (soc_info.type) {
655        case AT91_T_SAM9260:
656        case AT91_T_SAM9263:
657        case AT91_T_CAP9:
658        case AT91_T_SAM9G10:
659        case AT91_T_SAM9G20:
660        case AT91_T_SAM9RL:
661                return(1);
662        default:
663                return (0);
664        }
665#else /* __rtems__ */
666        /* Currently only supports the SAM V71 */
667        return (1);
668#endif /* __rtems__ */
669}
670
671static int
672at91_mci_update_ios(device_t brdev, device_t reqdev)
673{
674        struct at91_mci_softc *sc;
675        struct mmc_ios *ios;
676        uint32_t clkdiv;
677        uint32_t freq;
678
679        sc = device_get_softc(brdev);
680        ios = &sc->host.ios;
681
682        /*
683         * Calculate our closest available clock speed that doesn't exceed the
684         * requested speed.
685         *
686         * When overclocking is allowed, the requested clock is 25MHz, the
687         * computed frequency is 15MHz or smaller and clockdiv is 1, use
688         * clockdiv of 0 to double that.  If less than 12.5MHz, double
689         * regardless of the overclocking setting.
690         *
691         * Whatever we come up with, store it back into ios->clock so that the
692         * upper layer drivers can report the actual speed of the bus.
693         */
694        if (ios->clock == 0) {
695                WR4(sc, MCI_CR, MCI_CR_MCIDIS);
696                clkdiv = 0;
697        } else {
698                WR4(sc, MCI_CR, MCI_CR_MCIEN|MCI_CR_PWSEN);
699                if ((at91_master_clock % (ios->clock * 2)) == 0)
700                        clkdiv = ((at91_master_clock / ios->clock) / 2) - 1;
701                else
702                        clkdiv = (at91_master_clock / ios->clock) / 2;
703                freq = at91_master_clock / ((clkdiv+1) * 2);
704                if (clkdiv == 1 && ios->clock == 25000000 && freq <= 15000000) {
705                        if (sc->allow_overclock || freq <= 12500000) {
706                                clkdiv = 0;
707                                freq = at91_master_clock / ((clkdiv+1) * 2);
708                        }
709                }
710                ios->clock = freq;
711        }
712        if (ios->bus_width == bus_width_4)
713                WR4(sc, MCI_SDCR, RD4(sc, MCI_SDCR) | MCI_SDCR_SDCBUS);
714        else
715                WR4(sc, MCI_SDCR, RD4(sc, MCI_SDCR) & ~MCI_SDCR_SDCBUS);
716        WR4(sc, MCI_MR, (RD4(sc, MCI_MR) & ~MCI_MR_CLKDIV) | clkdiv);
717        /* Do we need a settle time here? */
718        /* XXX We need to turn the device on/off here with a GPIO pin */
719        return (0);
720}
721
722#ifdef __rtems__
723static LinkedListDescriporView1 dma_desc[MAX_BLOCKS];
724
725static void
726at91_mci_setup_xdma(struct at91_mci_softc *sc, bool read, uint32_t block_size,
727    uint32_t number_blocks, bus_addr_t paddr, uint32_t len)
728{
729        sXdmadCfg *xdma_cfg;
730        uint32_t xdma_channel;
731        const uint32_t xdma_cndc = XDMAC_CNDC_NDVIEW_NDV1 |
732            XDMAC_CNDC_NDE_DSCR_FETCH_EN |
733            XDMAC_CNDC_NDSUP_SRC_PARAMS_UPDATED |
734            XDMAC_CNDC_NDDUP_DST_PARAMS_UPDATED;
735        const uint32_t sa_rdr = (uint32_t)(sc->mem_res->r_bushandle + MCI_RDR);
736        const uint32_t da_tdr = (uint32_t)(sc->mem_res->r_bushandle + MCI_TDR);
737        const uint32_t xdma_interrupt = XDMAC_CIE_BIE | XDMAC_CIE_DIE |
738            XDMAC_CIE_FIE | XDMAC_CIE_RBIE | XDMAC_CIE_WBIE | XDMAC_CIE_ROIE;
739        eXdmadRC rc;
740        size_t i;
741
742        if (read) {
743                xdma_cfg = &sc->xdma_rx_cfg;
744                xdma_channel = sc->xdma_rx_channel;
745        } else {
746                xdma_cfg = &sc->xdma_tx_cfg;
747                xdma_channel = sc->xdma_tx_channel;
748        }
749
750        for (i = 0; i < number_blocks; ++i) {
751                if (read) {
752                        dma_desc[i].mbr_sa = sa_rdr;
753                        dma_desc[i].mbr_da = ((uint32_t)paddr) + i * block_size;
754                } else {
755                        dma_desc[i].mbr_sa = ((uint32_t)paddr) + i * block_size;
756                        dma_desc[i].mbr_da = da_tdr;
757                }
758                dma_desc[i].mbr_ubc = XDMA_UBC_NVIEW_NDV1 |
759                    XDMA_UBC_NDEN_UPDATED | (block_size/4);
760                if (i == number_blocks - 1) {
761                        dma_desc[i].mbr_ubc |= XDMA_UBC_NDE_FETCH_DIS;
762                        dma_desc[i].mbr_nda = 0;
763                } else {
764                        dma_desc[i].mbr_ubc |= XDMA_UBC_NDE_FETCH_EN;
765                        dma_desc[i].mbr_nda = (uint32_t) &dma_desc[i+1];
766                }
767        }
768
769        rc = XDMAD_ConfigureTransfer(pXdmad, xdma_channel, xdma_cfg, xdma_cndc,
770            (uint32_t)dma_desc, xdma_interrupt);
771        if (rc != XDMAD_OK)
772                panic("Could not configure XDMA: %d.", rc);
773
774        /* FIXME: Is that correct? */
775        if (read) {
776                rtems_cache_invalidate_multiple_data_lines(paddr, len);
777        } else {
778                rtems_cache_flush_multiple_data_lines(paddr, len);
779        }
780        rtems_cache_flush_multiple_data_lines(dma_desc, sizeof(dma_desc));
781
782        rc = XDMAD_StartTransfer(pXdmad, xdma_channel);
783        if (rc != XDMAD_OK)
784                panic("Could not start XDMA: %d.", rc);
785
786}
787
788#endif /* __rtems__ */
789static void
790at91_mci_start_cmd(struct at91_mci_softc *sc, struct mmc_command *cmd)
791{
792        uint32_t cmdr, mr;
793#ifdef __rtems__
794        uint32_t number_blocks;
795        uint32_t block_size;
796#endif /* __rtems__ */
797        struct mmc_data *data;
798
799        sc->curcmd = cmd;
800        data = cmd->data;
801
802        /* XXX Upper layers don't always set this */
803        cmd->mrq = sc->req;
804
805        /* Begin setting up command register. */
806
807        cmdr = cmd->opcode;
808
809        if (sc->host.ios.bus_mode == opendrain)
810                cmdr |= MCI_CMDR_OPDCMD;
811
812        /* Set up response handling.  Allow max timeout for responses. */
813
814        if (MMC_RSP(cmd->flags) == MMC_RSP_NONE)
815                cmdr |= MCI_CMDR_RSPTYP_NO;
816        else {
817                cmdr |= MCI_CMDR_MAXLAT;
818                if (cmd->flags & MMC_RSP_136)
819                        cmdr |= MCI_CMDR_RSPTYP_136;
820                else
821                        cmdr |= MCI_CMDR_RSPTYP_48;
822        }
823
824        /*
825         * If there is no data transfer, just set up the right interrupt mask
826         * and start the command.
827         *
828         * The interrupt mask needs to be CMDRDY plus all non-data-transfer
829         * errors. It's important to leave the transfer-related errors out, to
830         * avoid spurious timeout or crc errors on a STOP command following a
831         * multiblock read.  When a multiblock read is in progress, sending a
832         * STOP in the middle of a block occasionally triggers such errors, but
833         * we're totally disinterested in them because we've already gotten all
834         * the data we wanted without error before sending the STOP command.
835         */
836
837        if (data == NULL) {
838                uint32_t ier = MCI_SR_CMDRDY |
839                    MCI_SR_RTOE | MCI_SR_RENDE |
840                    MCI_SR_RCRCE | MCI_SR_RDIRE | MCI_SR_RINDE;
841
842                at91_mci_pdc_disable(sc);
843
844                if (cmd->opcode == MMC_STOP_TRANSMISSION)
845                        cmdr |= MCI_CMDR_TRCMD_STOP;
846
847                /* Ignore response CRC on CMD2 and ACMD41, per standard. */
848
849                if (cmd->opcode == MMC_SEND_OP_COND ||
850                    cmd->opcode == ACMD_SD_SEND_OP_COND)
851                        ier &= ~MCI_SR_RCRCE;
852
853                if (mci_debug)
854                        printf("CMDR %x (opcode %d) ARGR %x no data\n",
855                            cmdr, cmd->opcode, cmd->arg);
856
857                WR4(sc, MCI_ARGR, cmd->arg);
858                WR4(sc, MCI_CMDR, cmdr);
859                WR4(sc, MCI_IDR, 0xffffffff);
860                WR4(sc, MCI_IER, ier);
861                return;
862        }
863
864        /* There is data, set up the transfer-related parts of the command. */
865
866        if (data->flags & MMC_DATA_READ)
867                cmdr |= MCI_CMDR_TRDIR;
868
869        if (data->flags & (MMC_DATA_READ | MMC_DATA_WRITE))
870                cmdr |= MCI_CMDR_TRCMD_START;
871
872        if (data->flags & MMC_DATA_STREAM)
873                cmdr |= MCI_CMDR_TRTYP_STREAM;
874        else if (data->flags & MMC_DATA_MULTI) {
875                cmdr |= MCI_CMDR_TRTYP_MULTIPLE;
876                sc->flags |= (data->flags & MMC_DATA_READ) ?
877                    CMD_MULTIREAD : CMD_MULTIWRITE;
878        }
879
880        /*
881         * Disable PDC until we're ready.
882         *
883         * Set block size and turn on PDC mode for dma xfer.
884         * Note that the block size is the smaller of the amount of data to be
885         * transferred, or 512 bytes.  The 512 size is fixed by the standard;
886         * smaller blocks are possible, but never larger.
887         */
888
889#ifndef __rtems__
890        WR4(sc, PDC_PTCR, PDC_PTCR_RXTDIS | PDC_PTCR_TXTDIS);
891
892        mr = RD4(sc,MCI_MR) & ~MCI_MR_BLKLEN;
893        mr |=  min(data->len, 512) << 16;
894        WR4(sc, MCI_MR, mr | MCI_MR_PDCMODE|MCI_MR_PDCPADV);
895#else /* __rtems__ */
896        mr = RD4(sc,MCI_MR);
897        WR4(sc, MCI_MR, mr | MCI_MR_PDCPADV);
898
899        WR4(sc, MCI_DMA, MCI_DMA_DMAEN | MCI_DMA_CHKSIZE_1);
900
901        block_size = min(data->len, 512);
902        number_blocks = data->len / block_size;
903        WR4(sc, MCI_BLKR, block_size << 16 | number_blocks);
904#endif /* __rtems__ */
905
906        /*
907         * Set up DMA.
908         *
909         * Use bounce buffers even if we don't need to byteswap, because doing
910         * multi-block IO with large DMA buffers is way fast (compared to
911         * single-block IO), even after incurring the overhead of also copying
912         * from/to the caller's buffers (which may be in non-contiguous physical
913         * pages).
914         *
915         * In an ideal non-byteswap world we could create a dma tag that allows
916         * for discontiguous segments and do the IO directly from/to the
917         * caller's buffer(s), using ENDRX/ENDTX interrupts to chain the
918         * discontiguous buffers through the PDC. Someday.
919         *
920         * If a read is bigger than 2k, split it in half so that we can start
921         * byte-swapping the first half while the second half is on the wire.
922         * It would be best if we could split it into 8k chunks, but we can't
923         * always keep up with the byte-swapping due to other system activity,
924         * and if an RXBUFF interrupt happens while we're still handling the
925         * byte-swap from the prior buffer (IE, we haven't returned from
926         * handling the prior interrupt yet), then data will get dropped on the
927         * floor and we can't easily recover from that.  The right fix for that
928         * would be to have the interrupt handling only keep the DMA flowing and
929         * enqueue filled buffers to be byte-swapped in a non-interrupt context.
930         * Even that won't work on the write side of things though; in that
931         * context we have to have all the data ready to go before starting the
932         * dma.
933         *
934         * XXX what about stream transfers?
935         */
936        sc->xfer_offset = 0;
937        sc->bbuf_curidx = 0;
938
939        if (data->flags & (MMC_DATA_READ | MMC_DATA_WRITE)) {
940                uint32_t len;
941                uint32_t remaining = data->len;
942                bus_addr_t paddr;
943                int err;
944
945#ifndef __rtems__
946                if (remaining > (BBCOUNT*BBSIZE))
947#else /* __rtems__ */
948                if (remaining > (BBSIZE))
949#endif /* __rtems__ */
950                        panic("IO read size exceeds MAXDATA\n");
951
952                if (data->flags & MMC_DATA_READ) {
953#ifndef __rtems__
954                        if (remaining > 2048) // XXX
955                                len = remaining / 2;
956                        else
957#else
958                        /* FIXME: This reduces performance. Set up DMA in two
959                         * parts instead like done on AT91. */
960#endif /* __rtems__ */
961                                len = remaining;
962                        err = bus_dmamap_load(sc->dmatag, sc->bbuf_map[0],
963                            sc->bbuf_vaddr[0], len, at91_mci_getaddr,
964                            &paddr, BUS_DMA_NOWAIT);
965                        if (err != 0)
966                                panic("IO read dmamap_load failed\n");
967                        bus_dmamap_sync(sc->dmatag, sc->bbuf_map[0],
968                            BUS_DMASYNC_PREREAD);
969#ifndef __rtems__
970                        WR4(sc, PDC_RPR, paddr);
971                        WR4(sc, PDC_RCR, len / 4);
972                        sc->bbuf_len[0] = len;
973                        remaining -= len;
974                        if (remaining == 0) {
975                                sc->bbuf_len[1] = 0;
976                        } else {
977                                len = remaining;
978                                err = bus_dmamap_load(sc->dmatag, sc->bbuf_map[1],
979                                    sc->bbuf_vaddr[1], len, at91_mci_getaddr,
980                                    &paddr, BUS_DMA_NOWAIT);
981                                if (err != 0)
982                                        panic("IO read dmamap_load failed\n");
983                                bus_dmamap_sync(sc->dmatag, sc->bbuf_map[1],
984                                    BUS_DMASYNC_PREREAD);
985                                WR4(sc, PDC_RNPR, paddr);
986                                WR4(sc, PDC_RNCR, len / 4);
987                                sc->bbuf_len[1] = len;
988                                remaining -= len;
989                        }
990                        WR4(sc, PDC_PTCR, PDC_PTCR_RXTEN);
991#else /* __rtems__ */
992                        at91_mci_setup_xdma(sc, true, block_size,
993                            number_blocks, paddr, len);
994
995                        sc->bbuf_len[0] = len;
996                        remaining -= len;
997                        sc->bbuf_len[1] = 0;
998                        if (remaining != 0)
999                                panic("Still rx-data left. This should never happen.");
1000#endif /* __rtems__ */
1001                } else {
1002                        len = min(BBSIZE, remaining);
1003                        at91_bswap_buf(sc, sc->bbuf_vaddr[0], data->data, len);
1004                        err = bus_dmamap_load(sc->dmatag, sc->bbuf_map[0],
1005                            sc->bbuf_vaddr[0], len, at91_mci_getaddr,
1006                            &paddr, BUS_DMA_NOWAIT);
1007                        if (err != 0)
1008                                panic("IO write dmamap_load failed\n");
1009                        bus_dmamap_sync(sc->dmatag, sc->bbuf_map[0],
1010                            BUS_DMASYNC_PREWRITE);
1011#ifndef __rtems__
1012                        /*
1013                         * Erratum workaround:  PDC transfer length on a write
1014                         * must not be smaller than 12 bytes (3 words); only
1015                         * blklen bytes (set above) are actually transferred.
1016                         */
1017                        WR4(sc, PDC_TPR,paddr);
1018                        WR4(sc, PDC_TCR, (len < 12) ? 3 : len / 4);
1019                        sc->bbuf_len[0] = len;
1020                        remaining -= len;
1021                        if (remaining == 0) {
1022                                sc->bbuf_len[1] = 0;
1023                        } else {
1024                                len = remaining;
1025                                at91_bswap_buf(sc, sc->bbuf_vaddr[1],
1026                                    ((char *)data->data)+BBSIZE, len);
1027                                err = bus_dmamap_load(sc->dmatag, sc->bbuf_map[1],
1028                                    sc->bbuf_vaddr[1], len, at91_mci_getaddr,
1029                                    &paddr, BUS_DMA_NOWAIT);
1030                                if (err != 0)
1031                                        panic("IO write dmamap_load failed\n");
1032                                bus_dmamap_sync(sc->dmatag, sc->bbuf_map[1],
1033                                    BUS_DMASYNC_PREWRITE);
1034                                WR4(sc, PDC_TNPR, paddr);
1035                                WR4(sc, PDC_TNCR, (len < 12) ? 3 : len / 4);
1036                                sc->bbuf_len[1] = len;
1037                                remaining -= len;
1038                        }
1039                        /* do not enable PDC xfer until CMDRDY asserted */
1040#else /* __rtems__ */
1041                        at91_mci_setup_xdma(sc, false, block_size,
1042                            number_blocks, paddr, len);
1043
1044                        sc->bbuf_len[0] = len;
1045                        remaining -= len;
1046                        sc->bbuf_len[1] = 0;
1047                        if (remaining != 0)
1048                                panic("Still tx-data left. This should never happen.");
1049
1050#endif /* __rtems__ */
1051                }
1052                data->xfer_len = 0; /* XXX what's this? appears to be unused. */
1053        }
1054
1055        if (mci_debug)
1056                printf("CMDR %x (opcode %d) ARGR %x with data len %d\n",
1057                       cmdr, cmd->opcode, cmd->arg, cmd->data->len);
1058
1059        WR4(sc, MCI_ARGR, cmd->arg);
1060        WR4(sc, MCI_CMDR, cmdr);
1061        WR4(sc, MCI_IER, MCI_SR_ERROR | MCI_SR_CMDRDY);
1062}
1063
1064static void
1065at91_mci_next_operation(struct at91_mci_softc *sc)
1066{
1067        struct mmc_request *req;
1068
1069        req = sc->req;
1070        if (req == NULL)
1071                return;
1072
1073        if (sc->flags & PENDING_CMD) {
1074                sc->flags &= ~PENDING_CMD;
1075                at91_mci_start_cmd(sc, req->cmd);
1076                return;
1077        } else if (sc->flags & PENDING_STOP) {
1078                sc->flags &= ~PENDING_STOP;
1079                at91_mci_start_cmd(sc, req->stop);
1080                return;
1081        }
1082
1083        WR4(sc, MCI_IDR, 0xffffffff);
1084        sc->req = NULL;
1085        sc->curcmd = NULL;
1086        //printf("req done\n");
1087        req->done(req);
1088}
1089
1090static int
1091at91_mci_request(device_t brdev, device_t reqdev, struct mmc_request *req)
1092{
1093        struct at91_mci_softc *sc = device_get_softc(brdev);
1094
1095        AT91_MCI_LOCK(sc);
1096        if (sc->req != NULL) {
1097                AT91_MCI_UNLOCK(sc);
1098                return (EBUSY);
1099        }
1100        //printf("new req\n");
1101        sc->req = req;
1102        sc->flags = PENDING_CMD;
1103        if (sc->req->stop)
1104                sc->flags |= PENDING_STOP;
1105        at91_mci_next_operation(sc);
1106        AT91_MCI_UNLOCK(sc);
1107        return (0);
1108}
1109
1110static int
1111at91_mci_get_ro(device_t brdev, device_t reqdev)
1112{
1113        return (0);
1114}
1115
1116static int
1117at91_mci_acquire_host(device_t brdev, device_t reqdev)
1118{
1119        struct at91_mci_softc *sc = device_get_softc(brdev);
1120        int err = 0;
1121
1122        AT91_MCI_LOCK(sc);
1123        while (sc->bus_busy)
1124                msleep(sc, &sc->sc_mtx, PZERO, "mciah", hz / 5);
1125        sc->bus_busy++;
1126        AT91_MCI_UNLOCK(sc);
1127        return (err);
1128}
1129
1130static int
1131at91_mci_release_host(device_t brdev, device_t reqdev)
1132{
1133        struct at91_mci_softc *sc = device_get_softc(brdev);
1134
1135        AT91_MCI_LOCK(sc);
1136        sc->bus_busy--;
1137        wakeup(sc);
1138        AT91_MCI_UNLOCK(sc);
1139        return (0);
1140}
1141
1142static void
1143at91_mci_read_done(struct at91_mci_softc *sc, uint32_t sr)
1144{
1145        struct mmc_command *cmd = sc->curcmd;
1146        char * dataptr = (char *)cmd->data->data;
1147        uint32_t curidx = sc->bbuf_curidx;
1148        uint32_t len = sc->bbuf_len[curidx];
1149
1150        /*
1151         * We arrive here when a DMA transfer for a read is done, whether it's
1152         * a single or multi-block read.
1153         *
1154         * We byte-swap the buffer that just completed, and if that is the
1155         * last buffer that's part of this read then we move on to the next
1156         * operation, otherwise we wait for another ENDRX for the next bufer.
1157         */
1158
1159#ifndef __rtems__
1160        bus_dmamap_sync(sc->dmatag, sc->bbuf_map[curidx], BUS_DMASYNC_POSTREAD);
1161        bus_dmamap_unload(sc->dmatag, sc->bbuf_map[curidx]);
1162#endif /* __rtems__ */
1163
1164        at91_bswap_buf(sc, dataptr + sc->xfer_offset, sc->bbuf_vaddr[curidx], len);
1165
1166        if (mci_debug) {
1167                printf("read done sr %x curidx %d len %d xfer_offset %d\n",
1168                       sr, curidx, len, sc->xfer_offset);
1169        }
1170
1171        sc->xfer_offset += len;
1172        sc->bbuf_curidx = !curidx; /* swap buffers */
1173
1174        /*
1175         * If we've transferred all the data, move on to the next operation.
1176         *
1177         * If we're still transferring the last buffer, RNCR is already zero but
1178         * we have to write a zero anyway to clear the ENDRX status so we don't
1179         * re-interrupt until the last buffer is done.
1180         */
1181        if (sc->xfer_offset == cmd->data->len) {
1182                WR4(sc, PDC_PTCR, PDC_PTCR_RXTDIS | PDC_PTCR_TXTDIS);
1183                cmd->error = MMC_ERR_NONE;
1184                at91_mci_next_operation(sc);
1185        } else {
1186                WR4(sc, PDC_RNCR, 0);
1187#ifndef __rtems__
1188                WR4(sc, MCI_IER, MCI_SR_ERROR | MCI_SR_ENDRX);
1189#else /* __rtems__ */
1190                WR4(sc, MCI_IER, MCI_SR_ERROR | MCI_SR_XFRDONE);
1191#endif /* __rtems__ */
1192        }
1193}
1194
1195static void
1196at91_mci_write_done(struct at91_mci_softc *sc, uint32_t sr)
1197{
1198        struct mmc_command *cmd = sc->curcmd;
1199
1200        /*
1201         * We arrive here when the entire DMA transfer for a write is done,
1202         * whether it's a single or multi-block write.  If it's multi-block we
1203         * have to immediately move on to the next operation which is to send
1204         * the stop command.  If it's a single-block transfer we need to wait
1205         * for NOTBUSY, but if that's already asserted we can avoid another
1206         * interrupt and just move on to completing the request right away.
1207         */
1208
1209        WR4(sc, PDC_PTCR, PDC_PTCR_RXTDIS | PDC_PTCR_TXTDIS);
1210
1211        bus_dmamap_sync(sc->dmatag, sc->bbuf_map[sc->bbuf_curidx],
1212            BUS_DMASYNC_POSTWRITE);
1213        bus_dmamap_unload(sc->dmatag, sc->bbuf_map[sc->bbuf_curidx]);
1214
1215        if ((cmd->data->flags & MMC_DATA_MULTI) || (sr & MCI_SR_NOTBUSY)) {
1216                cmd->error = MMC_ERR_NONE;
1217                at91_mci_next_operation(sc);
1218        } else {
1219                WR4(sc, MCI_IER, MCI_SR_ERROR | MCI_SR_NOTBUSY);
1220        }
1221}
1222
1223static void
1224at91_mci_notbusy(struct at91_mci_softc *sc)
1225{
1226        struct mmc_command *cmd = sc->curcmd;
1227
1228        /*
1229         * We arrive here by either completion of a single-block write, or
1230         * completion of the stop command that ended a multi-block write (and,
1231         * I suppose, after a card-select or erase, but I haven't tested
1232         * those).  Anyway, we're done and it's time to move on to the next
1233         * command.
1234         */
1235
1236        cmd->error = MMC_ERR_NONE;
1237        at91_mci_next_operation(sc);
1238}
1239
1240static void
1241at91_mci_stop_done(struct at91_mci_softc *sc, uint32_t sr)
1242{
1243        struct mmc_command *cmd = sc->curcmd;
1244
1245        /*
1246         * We arrive here after receiving CMDRDY for a MMC_STOP_TRANSMISSION
1247         * command.  Depending on the operation being stopped, we may have to
1248         * do some unusual things to work around hardware bugs.
1249         */
1250
1251        /*
1252         * This is known to be true of at91rm9200 hardware; it may or may not
1253         * apply to more recent chips:
1254         *
1255         * After stopping a multi-block write, the NOTBUSY bit in MCI_SR does
1256         * not properly reflect the actual busy state of the card as signaled
1257         * on the DAT0 line; it always claims the card is not-busy.  If we
1258         * believe that and let operations continue, following commands will
1259         * fail with response timeouts (except of course MMC_SEND_STATUS -- it
1260         * indicates the card is busy in the PRG state, which was the smoking
1261         * gun that showed MCI_SR NOTBUSY was not tracking DAT0 correctly).
1262         *
1263         * The atmel docs are emphatic: "This flag [NOTBUSY] must be used only
1264         * for Write Operations."  I guess technically since we sent a stop
1265         * it's not a write operation anymore.  But then just what did they
1266         * think it meant for the stop command to have "...an optional busy
1267         * signal transmitted on the data line" according to the SD spec?
1268         *
1269         * I tried a variety of things to un-wedge the MCI and get the status
1270         * register to reflect NOTBUSY correctly again, but the only thing
1271         * that worked was a full device reset.  It feels like an awfully big
1272         * hammer, but doing a full reset after every multiblock write is
1273         * still faster than doing single-block IO (by almost two orders of
1274         * magnitude: 20KB/sec improves to about 1.8MB/sec best case).
1275         *
1276         * After doing the reset, wait for a NOTBUSY interrupt before
1277         * continuing with the next operation.
1278         *
1279         * This workaround breaks multiwrite on the rev2xx parts, but some other
1280         * workaround is needed.
1281         */
1282        if ((sc->flags & CMD_MULTIWRITE) && (sc->sc_cap & CAP_NEEDS_BYTESWAP)) {
1283                at91_mci_reset(sc);
1284                WR4(sc, MCI_IER, MCI_SR_ERROR | MCI_SR_NOTBUSY);
1285                return;
1286        }
1287
1288        /*
1289         * This is known to be true of at91rm9200 hardware; it may or may not
1290         * apply to more recent chips:
1291         *
1292         * After stopping a multi-block read, loop to read and discard any
1293         * data that coasts in after we sent the stop command.  The docs don't
1294         * say anything about it, but empirical testing shows that 1-3
1295         * additional words of data get buffered up in some unmentioned
1296         * internal fifo and if we don't read and discard them here they end
1297         * up on the front of the next read DMA transfer we do.
1298         *
1299         * This appears to be unnecessary for rev2xx parts.
1300         */
1301        if ((sc->flags & CMD_MULTIREAD) && (sc->sc_cap & CAP_NEEDS_BYTESWAP)) {
1302                uint32_t sr;
1303                int count = 0;
1304
1305                do {
1306                        sr = RD4(sc, MCI_SR);
1307                        if (sr & MCI_SR_RXRDY) {
1308                                RD4(sc,  MCI_RDR);
1309                                ++count;
1310                        }
1311                } while (sr & MCI_SR_RXRDY);
1312                at91_mci_reset(sc);
1313        }
1314
1315        cmd->error = MMC_ERR_NONE;
1316        at91_mci_next_operation(sc);
1317
1318}
1319
1320static void
1321at91_mci_cmdrdy(struct at91_mci_softc *sc, uint32_t sr)
1322{
1323        struct mmc_command *cmd = sc->curcmd;
1324        int i;
1325
1326        if (cmd == NULL)
1327                return;
1328
1329        /*
1330         * We get here at the end of EVERY command.  We retrieve the command
1331         * response (if any) then decide what to do next based on the command.
1332         */
1333
1334        if (cmd->flags & MMC_RSP_PRESENT) {
1335                for (i = 0; i < ((cmd->flags & MMC_RSP_136) ? 4 : 1); i++) {
1336                        cmd->resp[i] = RD4(sc, MCI_RSPR + i * 4);
1337                        if (mci_debug)
1338                                printf("RSPR[%d] = %x sr=%x\n", i, cmd->resp[i],  sr);
1339                }
1340        }
1341
1342        /*
1343         * If this was a stop command, go handle the various special
1344         * conditions (read: bugs) that have to be dealt with following a stop.
1345         */
1346        if (cmd->opcode == MMC_STOP_TRANSMISSION) {
1347                at91_mci_stop_done(sc, sr);
1348                return;
1349        }
1350
1351        /*
1352         * If this command can continue to assert BUSY beyond the response then
1353         * we need to wait for NOTBUSY before the command is really done.
1354         *
1355         * Note that this may not work properly on the at91rm9200.  It certainly
1356         * doesn't work for the STOP command that follows a multi-block write,
1357         * so post-stop CMDRDY is handled separately; see the special handling
1358         * in at91_mci_stop_done().
1359         *
1360         * Beside STOP, there are other R1B-type commands that use the busy
1361         * signal after CMDRDY: CMD7 (card select), CMD28-29 (write protect),
1362         * CMD38 (erase). I haven't tested any of them, but I rather expect
1363         * them all to have the same sort of problem with MCI_SR not actually
1364         * reflecting the state of the DAT0-line busy indicator.  So this code
1365         * may need to grow some sort of special handling for them too. (This
1366         * just in: CMD7 isn't a problem right now because dev/mmc.c incorrectly
1367         * sets the response flags to R1 rather than R1B.) XXX
1368         */
1369        if ((cmd->flags & MMC_RSP_BUSY)) {
1370                WR4(sc, MCI_IER, MCI_SR_ERROR | MCI_SR_NOTBUSY);
1371                return;
1372        }
1373
1374        /*
1375         * If there is a data transfer with this command, then...
1376         * - If it's a read, we need to wait for ENDRX.
1377         * - If it's a write, now is the time to enable the PDC, and we need
1378         *   to wait for a BLKE that follows a TXBUFE, because if we're doing
1379         *   a split transfer we get a BLKE after the first half (when TPR/TCR
1380         *   get loaded from TNPR/TNCR).  So first we wait for the TXBUFE, and
1381         *   the handling for that interrupt will then invoke the wait for the
1382         *   subsequent BLKE which indicates actual completion.
1383         */
1384        if (cmd->data) {
1385                uint32_t ier;
1386#ifndef __rtems__
1387                if (cmd->data->flags & MMC_DATA_READ) {
1388                        ier = MCI_SR_ENDRX;
1389                } else {
1390                        ier = MCI_SR_TXBUFE;
1391                        WR4(sc, PDC_PTCR, PDC_PTCR_TXTEN);
1392                }
1393#else /* __rtems__ */
1394                ier = MCI_SR_XFRDONE;
1395#endif /* __rtems__ */
1396                WR4(sc, MCI_IER, MCI_SR_ERROR | ier);
1397                return;
1398        }
1399
1400        /*
1401         * If we made it to here, we don't need to wait for anything more for
1402         * the current command, move on to the next command (will complete the
1403         * request if there is no next command).
1404         */
1405        cmd->error = MMC_ERR_NONE;
1406        at91_mci_next_operation(sc);
1407}
1408
1409static void
1410at91_mci_intr(void *arg)
1411{
1412        struct at91_mci_softc *sc = (struct at91_mci_softc*)arg;
1413        struct mmc_command *cmd = sc->curcmd;
1414        uint32_t sr, isr;
1415
1416        AT91_MCI_LOCK(sc);
1417
1418        sr = RD4(sc, MCI_SR);
1419        isr = sr & RD4(sc, MCI_IMR);
1420
1421        if (mci_debug)
1422                printf("i 0x%x sr 0x%x\n", isr, sr);
1423
1424        /*
1425         * All interrupts are one-shot; disable it now.
1426         * The next operation will re-enable whatever interrupts it wants.
1427         */
1428        WR4(sc, MCI_IDR, isr);
1429        if (isr & MCI_SR_ERROR) {
1430                if (isr & (MCI_SR_RTOE | MCI_SR_DTOE))
1431                        cmd->error = MMC_ERR_TIMEOUT;
1432                else if (isr & (MCI_SR_RCRCE | MCI_SR_DCRCE))
1433                        cmd->error = MMC_ERR_BADCRC;
1434                else if (isr & (MCI_SR_OVRE | MCI_SR_UNRE))
1435                        cmd->error = MMC_ERR_FIFO;
1436                else
1437                        cmd->error = MMC_ERR_FAILED;
1438                /*
1439                 * CMD8 is used to probe for SDHC cards, a standard SD card
1440                 * will get a response timeout; don't report it because it's a
1441                 * normal and expected condition.  One might argue that all
1442                 * error reporting should be left to higher levels, but when
1443                 * they report at all it's always EIO, which isn't very
1444                 * helpful. XXX bootverbose?
1445                 */
1446                if (cmd->opcode != 8) {
1447                        device_printf(sc->dev,
1448                            "IO error; status MCI_SR = 0x%b cmd opcode = %d%s\n",
1449                            sr, MCI_SR_BITSTRING, cmd->opcode,
1450                            (cmd->opcode != 12) ? "" :
1451                            (sc->flags & CMD_MULTIREAD) ? " after read" : " after write");
1452                        /* XXX not sure RTOE needs a full reset, just a retry */
1453                        at91_mci_reset(sc);
1454                }
1455                at91_mci_next_operation(sc);
1456        } else {
1457#ifndef __rtems__
1458                if (isr & MCI_SR_TXBUFE) {
1459//                      printf("TXBUFE\n");
1460                        /*
1461                         * We need to wait for a BLKE that follows TXBUFE
1462                         * (intermediate BLKEs might happen after ENDTXes if
1463                         * we're chaining multiple buffers).  If BLKE is also
1464                         * asserted at the time we get TXBUFE, we can avoid
1465                         * another interrupt and process it right away, below.
1466                         */
1467                        if (sr & MCI_SR_BLKE)
1468                                isr |= MCI_SR_BLKE;
1469                        else
1470                                WR4(sc, MCI_IER, MCI_SR_BLKE);
1471                }
1472                if (isr & MCI_SR_RXBUFF) {
1473//                      printf("RXBUFF\n");
1474                }
1475                if (isr & MCI_SR_ENDTX) {
1476//                      printf("ENDTX\n");
1477                }
1478                if (isr & MCI_SR_ENDRX) {
1479//                      printf("ENDRX\n");
1480                        at91_mci_read_done(sc, sr);
1481                }
1482#else /* __rtems__ */
1483                if (isr & MCI_SR_XFRDONE) {
1484                        struct mmc_command *cmd = sc->curcmd;
1485                        if (cmd->data->flags & MMC_DATA_READ) {
1486                                at91_mci_read_done(sc, sr);
1487                        } else {
1488                                if (sr & MCI_SR_BLKE)
1489                                        isr |= MCI_SR_BLKE;
1490                                else
1491                                        WR4(sc, MCI_IER, MCI_SR_BLKE);
1492                        }
1493                }
1494#endif /* __rtems__ */
1495                if (isr & MCI_SR_NOTBUSY) {
1496//                      printf("NOTBUSY\n");
1497                        at91_mci_notbusy(sc);
1498                }
1499                if (isr & MCI_SR_DTIP) {
1500//                      printf("Data transfer in progress\n");
1501                }
1502                if (isr & MCI_SR_BLKE) {
1503//                      printf("Block transfer end\n");
1504                        at91_mci_write_done(sc, sr);
1505                }
1506                if (isr & MCI_SR_TXRDY) {
1507//                      printf("Ready to transmit\n");
1508                }
1509                if (isr & MCI_SR_RXRDY) {
1510//                      printf("Ready to receive\n");
1511                }
1512                if (isr & MCI_SR_CMDRDY) {
1513//                      printf("Command ready\n");
1514                        at91_mci_cmdrdy(sc, sr);
1515                }
1516        }
1517        AT91_MCI_UNLOCK(sc);
1518}
1519
1520static int
1521at91_mci_read_ivar(device_t bus, device_t child, int which, uintptr_t *result)
1522{
1523        struct at91_mci_softc *sc = device_get_softc(bus);
1524
1525        switch (which) {
1526        default:
1527                return (EINVAL);
1528        case MMCBR_IVAR_BUS_MODE:
1529                *(int *)result = sc->host.ios.bus_mode;
1530                break;
1531        case MMCBR_IVAR_BUS_WIDTH:
1532                *(int *)result = sc->host.ios.bus_width;
1533                break;
1534        case MMCBR_IVAR_CHIP_SELECT:
1535                *(int *)result = sc->host.ios.chip_select;
1536                break;
1537        case MMCBR_IVAR_CLOCK:
1538                *(int *)result = sc->host.ios.clock;
1539                break;
1540        case MMCBR_IVAR_F_MIN:
1541                *(int *)result = sc->host.f_min;
1542                break;
1543        case MMCBR_IVAR_F_MAX:
1544                *(int *)result = sc->host.f_max;
1545                break;
1546        case MMCBR_IVAR_HOST_OCR:
1547                *(int *)result = sc->host.host_ocr;
1548                break;
1549        case MMCBR_IVAR_MODE:
1550                *(int *)result = sc->host.mode;
1551                break;
1552        case MMCBR_IVAR_OCR:
1553                *(int *)result = sc->host.ocr;
1554                break;
1555        case MMCBR_IVAR_POWER_MODE:
1556                *(int *)result = sc->host.ios.power_mode;
1557                break;
1558        case MMCBR_IVAR_VDD:
1559                *(int *)result = sc->host.ios.vdd;
1560                break;
1561        case MMCBR_IVAR_CAPS:
1562                if (sc->has_4wire) {
1563                        sc->sc_cap |= CAP_HAS_4WIRE;
1564                        sc->host.caps |= MMC_CAP_4_BIT_DATA;
1565                } else {
1566                        sc->sc_cap &= ~CAP_HAS_4WIRE;
1567                        sc->host.caps &= ~MMC_CAP_4_BIT_DATA;
1568                }
1569                *(int *)result = sc->host.caps;
1570                break;
1571#ifdef __rtems__
1572        case MMCBR_IVAR_TIMING:
1573                *result = sc->host.ios.timing;
1574                break;
1575#endif /* __rtems__ */
1576        case MMCBR_IVAR_MAX_DATA:
1577                /*
1578                 * Something is wrong with the 2x parts and multiblock, so
1579                 * just do 1 block at a time for now, which really kills
1580                 * performance.
1581                 */
1582                if (sc->sc_cap & CAP_MCI1_REV2XX)
1583                        *(int *)result = 1;
1584                else
1585                        *(int *)result = MAX_BLOCKS;
1586                break;
1587        }
1588        return (0);
1589}
1590
1591static int
1592at91_mci_write_ivar(device_t bus, device_t child, int which, uintptr_t value)
1593{
1594        struct at91_mci_softc *sc = device_get_softc(bus);
1595
1596        switch (which) {
1597        default:
1598                return (EINVAL);
1599        case MMCBR_IVAR_BUS_MODE:
1600                sc->host.ios.bus_mode = value;
1601                break;
1602        case MMCBR_IVAR_BUS_WIDTH:
1603                sc->host.ios.bus_width = value;
1604                break;
1605        case MMCBR_IVAR_CHIP_SELECT:
1606                sc->host.ios.chip_select = value;
1607                break;
1608        case MMCBR_IVAR_CLOCK:
1609                sc->host.ios.clock = value;
1610                break;
1611        case MMCBR_IVAR_MODE:
1612                sc->host.mode = value;
1613                break;
1614        case MMCBR_IVAR_OCR:
1615                sc->host.ocr = value;
1616                break;
1617        case MMCBR_IVAR_POWER_MODE:
1618                sc->host.ios.power_mode = value;
1619                break;
1620        case MMCBR_IVAR_VDD:
1621                sc->host.ios.vdd = value;
1622                break;
1623#ifdef __rtems__
1624        case MMCBR_IVAR_TIMING:
1625                sc->host.ios.timing = value;
1626                break;
1627#endif /* __rtems__ */
1628        /* These are read-only */
1629        case MMCBR_IVAR_CAPS:
1630        case MMCBR_IVAR_HOST_OCR:
1631        case MMCBR_IVAR_F_MIN:
1632        case MMCBR_IVAR_F_MAX:
1633        case MMCBR_IVAR_MAX_DATA:
1634                return (EINVAL);
1635        }
1636        return (0);
1637}
1638
1639static device_method_t at91_mci_methods[] = {
1640        /* device_if */
1641        DEVMETHOD(device_probe, at91_mci_probe),
1642        DEVMETHOD(device_attach, at91_mci_attach),
1643        DEVMETHOD(device_detach, at91_mci_detach),
1644
1645        /* Bus interface */
1646        DEVMETHOD(bus_read_ivar,        at91_mci_read_ivar),
1647        DEVMETHOD(bus_write_ivar,       at91_mci_write_ivar),
1648
1649        /* mmcbr_if */
1650        DEVMETHOD(mmcbr_update_ios, at91_mci_update_ios),
1651        DEVMETHOD(mmcbr_request, at91_mci_request),
1652        DEVMETHOD(mmcbr_get_ro, at91_mci_get_ro),
1653        DEVMETHOD(mmcbr_acquire_host, at91_mci_acquire_host),
1654        DEVMETHOD(mmcbr_release_host, at91_mci_release_host),
1655
1656        DEVMETHOD_END
1657};
1658
1659static driver_t at91_mci_driver = {
1660        "at91_mci",
1661        at91_mci_methods,
1662        sizeof(struct at91_mci_softc),
1663};
1664
1665static devclass_t at91_mci_devclass;
1666
1667#ifndef __rtems__
1668#ifdef FDT
1669DRIVER_MODULE(at91_mci, simplebus, at91_mci_driver, at91_mci_devclass, NULL,
1670    NULL);
1671#else
1672DRIVER_MODULE(at91_mci, atmelarm, at91_mci_driver, at91_mci_devclass, NULL,
1673    NULL);
1674#endif
1675
1676MMC_DECLARE_BRIDGE(at91_mci);
1677#else /* __rtems__ */
1678DRIVER_MODULE(at91_mci, nexus, at91_mci_driver, at91_mci_devclass, NULL, NULL);
1679#endif /* __rtems__ */
1680DRIVER_MODULE(mmc, at91_mci, mmc_driver, mmc_devclass, NULL, NULL);
1681MODULE_DEPEND(at91_mci, mmc, 1, 1, 1);
1682#endif /* __rtems__ && LIBBSP_ARM_ATSAM_BSP_H */
Note: See TracBrowser for help on using the repository browser.