linux-3.0.x for AP-SH4A-0A Board
修訂 | f392cbf75615e9d8cb90ef5ffb8d4e752b7be3c7 (tree) |
---|---|
時間 | 2011-08-31 04:55:01 |
作者 | Ming Lei <ming.lei@cano...> |
Commiter | Nicolas Pitre |
usb: ehci: make HC see up-to-date qh/qtd descriptor ASAP
This patch introduces the helper of ehci_sync_mem to flush
qtd/qh into memory immediately on some ARM, so that HC can
see the up-to-date qtd/qh descriptor asap.
This patch fixs one performance bug on ARM Cortex A9 dual core
platform, which has been reported on quite a few ARM machines
(OMAP4, Tegra 2, snowball...), see details from link of
https://bugs.launchpad.net/bugs/709245.
The patch has been tested ok on OMAP4 panda A1 board, and the
performance of 'dd' over usb mass storage can be increased from
4~5MB/sec to 14~16MB/sec after applying this patch.
Cc: Russell King <linux@arm.linux.org.uk>
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: Alan Stern <stern@rowland.harvard.edu>
@@ -995,6 +995,12 @@ static void qh_link_async (struct ehci_hcd *ehci, struct ehci_qh *qh) | ||
995 | 995 | head->qh_next.qh = qh; |
996 | 996 | head->hw->hw_next = dma; |
997 | 997 | |
998 | + /* | |
999 | + * flush qh descriptor into memory immediately, | |
1000 | + * see comments in qh_append_tds. | |
1001 | + */ | |
1002 | + ehci_sync_mem(); | |
1003 | + | |
998 | 1004 | qh_get(qh); |
999 | 1005 | qh->xacterrs = 0; |
1000 | 1006 | qh->qh_state = QH_STATE_LINKED; |
@@ -1082,6 +1088,18 @@ static struct ehci_qh *qh_append_tds ( | ||
1082 | 1088 | wmb (); |
1083 | 1089 | dummy->hw_token = token; |
1084 | 1090 | |
1091 | + /* | |
1092 | + * Writing to dma coherent buffer on ARM may | |
1093 | + * be delayed to reach memory, so HC may not see | |
1094 | + * hw_token of dummy qtd in time, which can cause | |
1095 | + * the qtd transaction to be executed very late, | |
1096 | + * and degrade performance a lot. ehci_sync_mem | |
1097 | + * is added to flush 'token' immediatelly into | |
1098 | + * memory, so that ehci can execute the transaction | |
1099 | + * ASAP. | |
1100 | + */ | |
1101 | + ehci_sync_mem(); | |
1102 | + | |
1085 | 1103 | urb->hcpriv = qh_get (qh); |
1086 | 1104 | } |
1087 | 1105 | } |
@@ -736,6 +736,23 @@ static inline u32 hc32_to_cpup (const struct ehci_hcd *ehci, const __hc32 *x) | ||
736 | 736 | |
737 | 737 | #endif |
738 | 738 | |
739 | +/* | |
740 | + * Writing to dma coherent memory on ARM may be delayed via L2 | |
741 | + * writing buffer, so introduce the helper which can flush L2 writing | |
742 | + * buffer into memory immediately, especially used to flush ehci | |
743 | + * descriptor to memory. | |
744 | + */ | |
745 | +#ifdef CONFIG_ARM_DMA_MEM_BUFFERABLE | |
746 | +static inline void ehci_sync_mem() | |
747 | +{ | |
748 | + mb(); | |
749 | +} | |
750 | +#else | |
751 | +static inline void ehci_sync_mem() | |
752 | +{ | |
753 | +} | |
754 | +#endif | |
755 | + | |
739 | 756 | /*-------------------------------------------------------------------------*/ |
740 | 757 | |
741 | 758 | #ifndef DEBUG |