From 2d979ee4a9e7de6b956c373842e8c81c2e6382b4 Mon Sep 17 00:00:00 2001 From: R Date: Fri, 12 Sep 2025 22:16:15 +0100 Subject: [PATCH 1/5] ohci: Add functions used for explicit cache operations --- src/portable/ohci/ohci.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/src/portable/ohci/ohci.c b/src/portable/ohci/ohci.c index 81091c9a7e..9450b8d0e2 100644 --- a/src/portable/ohci/ohci.c +++ b/src/portable/ohci/ohci.c @@ -143,6 +143,15 @@ enum { PID_FROM_TD = 0, }; +//--------------------------------------------------------------------+ +// Support for explicit D-cache operations +//--------------------------------------------------------------------+ +TU_ATTR_WEAK bool hcd_dcache_clean(void const* addr, uint32_t data_size) { (void) addr; (void) data_size; return true; } +TU_ATTR_WEAK bool hcd_dcache_invalidate(void const* addr, uint32_t data_size) { (void) addr; (void) data_size; return true; } +#ifndef hcd_dcache_uncached +#define hcd_dcache_uncached(x) (x) +#endif + //--------------------------------------------------------------------+ // INTERNAL OBJECT & FUNCTION DECLARATION //--------------------------------------------------------------------+ From 839149c5c0b667a8080de39c85cdbdd291be4dc5 Mon Sep 17 00:00:00 2001 From: R Date: Fri, 12 Sep 2025 22:16:46 +0100 Subject: [PATCH 2/5] ohci: Align TDs to cache lines --- src/portable/ohci/ohci.h | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) diff --git a/src/portable/ohci/ohci.h b/src/portable/ohci/ohci.h index 94bad5df7f..12c411764c 100644 --- a/src/portable/ohci/ohci.h +++ b/src/portable/ohci/ohci.h @@ -61,6 +61,31 @@ typedef struct { TU_VERIFY_STATIC( sizeof(ohci_hcca_t) == 256, "size is not correct" ); +// An OHCI host controller is controlled using data structures placed in memory (RAM). +// It needs to both read and write these data structures (as defined by the OHCI specification), +// and this can be mentally conceptualized similar to two software threads running on +// two different CPUs. In order to prevent a _data race_ where data gets corrupted, +// the CPU and the OHCI host controller need to agree on how the memory should be accessed. +// In this driver, we do this by transferring logical ownership of transfer descriptors (TDs) +// between the CPU and the OHCI host controller. Only the device which holds the logical ownership +// is allowed to read or write the TD. This ownership is not visible anywhere in the code, +// but it instead must be inferred based on the logical state of the transfer. +// +// If dcache-supporting mode is enabled, we need to do additional manual cache operations +// in order to correctly transfer this logical ownership and prevent data corruption. +// In order to do this, we also choose to align each OHCI TD so that it doesn't +// share CPU cache lines with other TDs. This is because manual cache operations +// can only be performed on cache line granularity. In other words, one cache line is +// the _smallest_ amount that can be read/written at a time. If there were to be multiple TDs +// in the same cache line, they would be required to always have the same logical ownership. +// This ends up being impossible to guarantee, so we choose a design which avoids the situation entirely. +// +// TDs have a minimum alignment requirement according to the OHCI specification. This is 16 bytes for +// a general TD but 32 bytes for an isochronous TD. It happens that typical CPU cache line sizes are usually +// a power of 2 at least 32. In order to simplify code later in this file, we assume this +// as an additional requirement. +TU_VERIFY_STATIC( (CFG_TUH_MEM_DCACHE_ENABLE ? CFG_TUH_MEM_DCACHE_LINE_SIZE : 0) % 32 == 0, "cache line not multiple of 32" ); + // common link item for gtd and itd for list travel // use as pointer only typedef struct TU_ATTR_ALIGNED(16) { @@ -69,7 +94,7 @@ typedef struct TU_ATTR_ALIGNED(16) { uint32_t reserved2; }ohci_td_item_t; -typedef struct TU_ATTR_ALIGNED(16) +typedef struct TU_ATTR_ALIGNED(CFG_TUH_MEM_DCACHE_ENABLE ? CFG_TUH_MEM_DCACHE_LINE_SIZE : 16) { // Word 0 uint32_t used : 1; @@ -92,7 +117,7 @@ typedef struct TU_ATTR_ALIGNED(16) uint8_t* buffer_end; } ohci_gtd_t; -TU_VERIFY_STATIC( sizeof(ohci_gtd_t) == 16, "size is not correct" ); +TU_VERIFY_STATIC( sizeof(ohci_gtd_t) == CFG_TUH_MEM_DCACHE_ENABLE ? CFG_TUH_MEM_DCACHE_LINE_SIZE : 16, "size is not correct" ); typedef struct TU_ATTR_ALIGNED(16) { @@ -129,7 +154,7 @@ typedef struct TU_ATTR_ALIGNED(16) TU_VERIFY_STATIC( sizeof(ohci_ed_t) == 16, "size is not correct" ); -typedef struct TU_ATTR_ALIGNED(32) +typedef struct TU_ATTR_ALIGNED(CFG_TUH_MEM_DCACHE_ENABLE ? CFG_TUH_MEM_DCACHE_LINE_SIZE : 32) { /*---------- Word 1 ----------*/ uint32_t starting_frame : 16; @@ -152,7 +177,7 @@ typedef struct TU_ATTR_ALIGNED(32) volatile uint16_t offset_packetstatus[8]; } ochi_itd_t; -TU_VERIFY_STATIC( sizeof(ochi_itd_t) == 32, "size is not correct" ); +TU_VERIFY_STATIC( sizeof(ochi_itd_t) == CFG_TUH_MEM_DCACHE_ENABLE ? CFG_TUH_MEM_DCACHE_LINE_SIZE : 32, "size is not correct" ); typedef struct { uint16_t expected_bytes; // up to 8192 bytes so max is 13 bits From 330d9d7f426b3227149e76fc7dd167e2cd3745ad Mon Sep 17 00:00:00 2001 From: R Date: Fri, 12 Sep 2025 23:33:47 +0100 Subject: [PATCH 3/5] ohci: Re-implement TD allocation, matching the specification The initial motivation for doing this is to remove the `used` flag in the TD. If we use this flag, we end up being required to read from TDs that the OHCI controller might be modifying (i.e. the OHCI controller logically owns the TD). This happens when we try to allocate a new, empty TD while the OHCI host controller is working on a transfer. Move the `used` flag to `gtd_extra_data_t`. This data is only used by the CPU, and the OHCI controller never accesses it. The existing allocation method for TDs does *not* put an empty TD onto each ED (i.e it does *not* do what is shown in Figure 5-6 of the OHCI specification). Instead, the NextTD field of the last TD is set to 0. The TailP field of the ED is also set to 0. This works in many cases. However, this implementation means that the CPU may end up trying to write to the NextTD field of an in-progress transfer while the OHCI host controller logically owns it. Change the implementation to use an empty TD, as suggested by the specification, for endpoints other than EP0. This avoids the above issue. It is not necessary to make the change for EP0 because only at most one TD can ever be pending at a time. The above change should also remove the need for the stall workaround. In the future, we want to modify the code to access EDs through an uncached mapping. Because uncached mappings are slow, we want to access EDs as little as possible. Currently, when a TD completes, we access an ED in order to figure out the device address and endpoint number of the TD which was completed. Because moving `used` to `gtd_extra_data_t` necessitates expanding it, we have enough room to also store the device address and endpoint number of the TD. This patch does so. With the above two changes, we no longer need to access an ED when a TD completes. Also remove the `index` field from TDs as it is no longer necessary. --- src/portable/ohci/ohci.c | 82 +++++++++++++++------------------------- src/portable/ohci/ohci.h | 8 ++-- 2 files changed, 35 insertions(+), 55 deletions(-) diff --git a/src/portable/ohci/ohci.c b/src/portable/ohci/ohci.c index 9450b8d0e2..b297173bc8 100644 --- a/src/portable/ohci/ohci.c +++ b/src/portable/ohci/ohci.c @@ -355,7 +355,6 @@ static void ed_init(ohci_ed_t *p_ed, uint8_t dev_addr, uint16_t ep_size, uint8_t static void gtd_init(ohci_gtd_t *p_td, uint8_t *data_ptr, uint16_t total_bytes) { tu_memclr(p_td, sizeof(ohci_gtd_t)); - p_td->used = 1; gtd_get_extra_data(p_td)->expected_bytes = total_bytes; p_td->buffer_rounding = 1; // less than queued length is not a error @@ -439,25 +438,15 @@ static ohci_gtd_t * gtd_find_free(void) { for(uint8_t i=0; i < GTD_MAX; i++) { - if ( !ohci_data.gtd_pool[i].used ) return &ohci_data.gtd_pool[i]; + if ( !ohci_data.gtd_extra[i].used ) { + ohci_data.gtd_extra[i].used = 1; + return &ohci_data.gtd_pool[i]; + } } return NULL; } -static void td_insert_to_ed(ohci_ed_t* p_ed, ohci_gtd_t * p_gtd) -{ - // tail is always NULL - if ( tu_align16(p_ed->td_head.address) == 0 ) - { // TD queue is empty --> head = TD - p_ed->td_head.address |= (uint32_t) _phys_addr(p_gtd); - } - else - { // TODO currently only support queue up to 2 TD each endpoint at a time - ((ohci_gtd_t*) tu_align16((uint32_t)_virt_addr((void *)p_ed->td_head.address)))->next = (uint32_t) _phys_addr(p_gtd); - } -} - //--------------------------------------------------------------------+ // Endpoint API //--------------------------------------------------------------------+ @@ -490,6 +479,16 @@ bool hcd_edpt_open(uint8_t rhport, uint8_t dev_addr, tusb_desc_endpoint_t const return true; } + if ( tu_edpt_number(ep_desc->bEndpointAddress) != 0 ) { + // Get an empty TD and use it as the end-of-list marker. + // This marker TD will be used when a transfer is made on this EP + // (and a new, empty TD will be allocated for the next-next transfer). + ohci_gtd_t* gtd = gtd_find_free(); + TU_ASSERT(gtd); + hcd_dcache_uncached(p_ed->td_head).address = (uint32_t)_phys_addr(gtd); + hcd_dcache_uncached(p_ed->td_tail) = (uint32_t)_phys_addr(gtd); + } + ed_list_insert( p_ed_head[ep_desc->bmAttributes.xfer], p_ed ); return true; @@ -508,7 +507,8 @@ bool hcd_setup_send(uint8_t rhport, uint8_t dev_addr, uint8_t const setup_packet ohci_gtd_t *qtd = &ohci_data.control[dev_addr].gtd; gtd_init(qtd, (uint8_t*)(uintptr_t) setup_packet, 8); - qtd->index = dev_addr; + gtd_get_extra_data(qtd)->dev_addr = dev_addr; + gtd_get_extra_data(qtd)->ep_addr = tu_edpt_addr(0, TUSB_DIR_OUT); qtd->pid = PID_SETUP; qtd->data_toggle = GTD_DT_DATA0; qtd->delay_interrupt = OHCI_INT_ON_COMPLETE_YES; @@ -534,8 +534,9 @@ bool hcd_edpt_xfer(uint8_t rhport, uint8_t dev_addr, uint8_t ep_addr, uint8_t * ohci_gtd_t* gtd = &ohci_data.control[dev_addr].gtd; gtd_init(gtd, buffer, buflen); + gtd_get_extra_data(gtd)->dev_addr = dev_addr; + gtd_get_extra_data(gtd)->ep_addr = ep_addr; - gtd->index = dev_addr; gtd->pid = dir ? PID_IN : PID_OUT; gtd->data_toggle = GTD_DT_DATA1; // Both Data and Ack stage start with DATA1 gtd->delay_interrupt = OHCI_INT_ON_COMPLETE_YES; @@ -546,15 +547,20 @@ bool hcd_edpt_xfer(uint8_t rhport, uint8_t dev_addr, uint8_t ep_addr, uint8_t * }else { ohci_ed_t * ed = ed_from_addr(dev_addr, ep_addr); - ohci_gtd_t* gtd = gtd_find_free(); - - TU_ASSERT(gtd); + ohci_gtd_t *gtd = (ohci_gtd_t *)_virt_addr((void *)hcd_dcache_uncached(ed->td_tail)); gtd_init(gtd, buffer, buflen); - gtd->index = ed-ohci_data.ed_pool; + gtd_get_extra_data(gtd)->dev_addr = dev_addr; + gtd_get_extra_data(gtd)->ep_addr = ep_addr; gtd->delay_interrupt = OHCI_INT_ON_COMPLETE_YES; - td_insert_to_ed(ed, gtd); + // Insert a new, empty TD at the tail, to be used by the next transfer + ohci_gtd_t* new_gtd = gtd_find_free(); + TU_ASSERT(new_gtd); + + gtd->next = (uint32_t)_phys_addr(new_gtd); + + hcd_dcache_uncached(ed->td_tail) = (uint32_t)_phys_addr(new_gtd); tusb_xfer_type_t xfer_type = ed_get_xfer_type( ed_from_addr(dev_addr, ep_addr) ); if (TUSB_XFER_BULK == xfer_type) OHCI_REG->command_status_bit.bulk_list_filled = 1; @@ -614,17 +620,6 @@ static inline bool gtd_is_control(ohci_gtd_t const * const p_qtd) return ((uint32_t) p_qtd) < ((uint32_t) ohci_data.gtd_pool); // check ohci_data_t for memory layout } -static inline ohci_ed_t* gtd_get_ed(ohci_gtd_t const * const p_qtd) -{ - if ( gtd_is_control(p_qtd) ) - { - return &ohci_data.control[p_qtd->index].ed; - }else - { - return &ohci_data.ed_pool[p_qtd->index]; - } -} - static gtd_extra_data_t *gtd_get_extra_data(ohci_gtd_t const * const gtd) { if ( gtd_is_control(gtd) ) { uint8_t idx = ((uintptr_t)gtd - (uintptr_t)&ohci_data.control->gtd) / sizeof(ohci_data.control[0]); @@ -661,29 +656,12 @@ static void done_queue_isr(uint8_t hostid) xfer_result_t const event = (qtd->condition_code == OHCI_CCODE_NO_ERROR) ? XFER_RESULT_SUCCESS : (qtd->condition_code == OHCI_CCODE_STALL) ? XFER_RESULT_STALLED : XFER_RESULT_FAILED; - qtd->used = 0; // free TD + gtd_get_extra_data(qtd)->used = 0; // free TD if ( (qtd->delay_interrupt == OHCI_INT_ON_COMPLETE_YES) || (event != XFER_RESULT_SUCCESS) ) { - ohci_ed_t * const ed = gtd_get_ed(qtd); uint32_t const xferred_bytes = gtd_get_extra_data(qtd)->expected_bytes - gtd_xfer_byte_left((uint32_t) qtd->buffer_end, (uint32_t) qtd->current_buffer_pointer); - // NOTE Assuming the current list is BULK and there is no other EDs in the list has queued TDs. - // When there is a error resulting this ED is halted, and this EP still has other queued TD - // --> the Bulk list only has this halted EP queueing TDs (remaining) - // --> Bulk list will be considered as not empty by HC !!! while there is no attempt transaction on this list - // --> HC will not process Control list (due to service ratio when Bulk list not empty) - // To walk-around this, the halted ED will have TailP = HeadP (empty list condition), when clearing halt - // the TailP must be set back to NULL for processing remaining TDs - if (event != XFER_RESULT_SUCCESS) - { - ed->td_tail &= 0x0Ful; - ed->td_tail |= tu_align16(ed->td_head.address); // mark halted EP as empty queue - if ( event == XFER_RESULT_STALLED ) ed->is_stalled = 1; - } - - uint8_t dir = (ed->ep_number == 0) ? (qtd->pid == PID_IN) : (ed->pid == PID_IN); - - hcd_event_xfer_complete(ed->dev_addr, tu_edpt_addr(ed->ep_number, dir), xferred_bytes, event, true); + hcd_event_xfer_complete(gtd_get_extra_data(qtd)->dev_addr, gtd_get_extra_data(qtd)->ep_addr, xferred_bytes, event, true); } td_head = (ohci_td_item_t*) _virt_addr((void *)td_head->next); diff --git a/src/portable/ohci/ohci.h b/src/portable/ohci/ohci.h index 12c411764c..8483686fae 100644 --- a/src/portable/ohci/ohci.h +++ b/src/portable/ohci/ohci.h @@ -97,9 +97,7 @@ typedef struct TU_ATTR_ALIGNED(16) { typedef struct TU_ATTR_ALIGNED(CFG_TUH_MEM_DCACHE_ENABLE ? CFG_TUH_MEM_DCACHE_LINE_SIZE : 16) { // Word 0 - uint32_t used : 1; - uint32_t index : 8; // endpoint index the gtd belongs to, or device address in case of control xfer - uint32_t : 9; // can be used + uint32_t : 18; // can be used uint32_t buffer_rounding : 1; uint32_t pid : 2; uint32_t delay_interrupt : 3; @@ -181,7 +179,11 @@ TU_VERIFY_STATIC( sizeof(ochi_itd_t) == CFG_TUH_MEM_DCACHE_ENABLE ? CFG_TUH_MEM_ typedef struct { uint16_t expected_bytes; // up to 8192 bytes so max is 13 bits + uint8_t dev_addr : 7; + uint8_t used : 1; + uint8_t ep_addr; } gtd_extra_data_t; +TU_VERIFY_STATIC( sizeof(gtd_extra_data_t) == 4, "size is not correct" ); // structure with member alignment required from large to small typedef struct TU_ATTR_ALIGNED(256) { From 915d21241c265692a5bfc1cb1d96af4521eca536 Mon Sep 17 00:00:00 2001 From: R Date: Sat, 13 Sep 2025 00:18:52 +0100 Subject: [PATCH 4/5] ohci: Perform explicit cache operations on TDs, buffers --- src/portable/ohci/ohci.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/src/portable/ohci/ohci.c b/src/portable/ohci/ohci.c index b297173bc8..c5b1d78e6a 100644 --- a/src/portable/ohci/ohci.c +++ b/src/portable/ohci/ohci.c @@ -225,6 +225,8 @@ bool hcd_init(uint8_t rhport, const tusb_rhport_init_t* rh_init) { #endif } + hcd_dcache_clean(&ohci_data, sizeof(ohci_data)); + // reset controller OHCI_REG->command_status_bit.controller_reset = 1; while( OHCI_REG->command_status_bit.controller_reset ) {} // should not take longer than 10 us @@ -506,12 +508,15 @@ bool hcd_setup_send(uint8_t rhport, uint8_t dev_addr, uint8_t const setup_packet ohci_ed_t* ed = &ohci_data.control[dev_addr].ed; ohci_gtd_t *qtd = &ohci_data.control[dev_addr].gtd; + hcd_dcache_clean(setup_packet, 8); + gtd_init(qtd, (uint8_t*)(uintptr_t) setup_packet, 8); gtd_get_extra_data(qtd)->dev_addr = dev_addr; gtd_get_extra_data(qtd)->ep_addr = tu_edpt_addr(0, TUSB_DIR_OUT); qtd->pid = PID_SETUP; qtd->data_toggle = GTD_DT_DATA0; qtd->delay_interrupt = OHCI_INT_ON_COMPLETE_YES; + hcd_dcache_clean(qtd, sizeof(ohci_gtd_t)); //------------- Attach TDs list to Control Endpoint -------------// ed->td_head.address = (uint32_t) _phys_addr(qtd); @@ -528,6 +533,13 @@ bool hcd_edpt_xfer(uint8_t rhport, uint8_t dev_addr, uint8_t ep_addr, uint8_t * uint8_t const epnum = tu_edpt_number(ep_addr); uint8_t const dir = tu_edpt_dir(ep_addr); + // IN transfer: invalidate buffer, OUT transfer: clean buffer + if (dir) { + hcd_dcache_invalidate(buffer, buflen); + } else { + hcd_dcache_clean(buffer, buflen); + } + if ( epnum == 0 ) { ohci_ed_t* ed = &ohci_data.control[dev_addr].ed; @@ -540,6 +552,7 @@ bool hcd_edpt_xfer(uint8_t rhport, uint8_t dev_addr, uint8_t ep_addr, uint8_t * gtd->pid = dir ? PID_IN : PID_OUT; gtd->data_toggle = GTD_DT_DATA1; // Both Data and Ack stage start with DATA1 gtd->delay_interrupt = OHCI_INT_ON_COMPLETE_YES; + hcd_dcache_clean(gtd, sizeof(ohci_gtd_t)); ed->td_head.address = (uint32_t) _phys_addr(gtd); @@ -559,6 +572,7 @@ bool hcd_edpt_xfer(uint8_t rhport, uint8_t dev_addr, uint8_t ep_addr, uint8_t * TU_ASSERT(new_gtd); gtd->next = (uint32_t)_phys_addr(new_gtd); + hcd_dcache_clean(gtd, sizeof(ohci_gtd_t)); hcd_dcache_uncached(ed->td_tail) = (uint32_t)_phys_addr(new_gtd); @@ -603,6 +617,12 @@ static ohci_td_item_t* list_reverse(ohci_td_item_t* td_head) while(td_head != NULL) { td_head = _virt_addr(td_head); + // FIXME: This is not the correct object size. + // However, because we have hardcoded the assumption that + // a cache line is at least 32 bytes (in ohci.h), and + // because both types of TD structs are <= 32 bytes, this + // nonetheless still works without error. + hcd_dcache_invalidate(td_head, sizeof(ohci_td_item_t)); uint32_t next = td_head->next; // make current's item become reverse's first item From b9378eb8e79691907aae40b3fc26cb57d4ffaeb4 Mon Sep 17 00:00:00 2001 From: R Date: Sat, 13 Sep 2025 00:41:15 +0100 Subject: [PATCH 5/5] ohci: Use uncached alias to access EDs This code is written very carefully to always use an uncached view of memory to read/write EDs. An uncached view must *always* be used, or else cache behavior can corrupt the ED. As part of this change, combine access into as few word-sized accesses as possible. This makes the code perform better. Doing this involves giving type names to the bitfields that make up the ED's data words. --- src/portable/ohci/ohci.c | 95 +++++++++++++++++++++------------------- src/portable/ohci/ohci.h | 48 +++++++++++--------- 2 files changed, 79 insertions(+), 64 deletions(-) diff --git a/src/portable/ohci/ohci.c b/src/portable/ohci/ohci.c index c5b1d78e6a..a9246e1cdc 100644 --- a/src/portable/ohci/ohci.c +++ b/src/portable/ohci/ohci.c @@ -199,9 +199,9 @@ bool hcd_init(uint8_t rhport, const tusb_rhport_init_t* rh_init) { ohci_data.hcca.interrupt_table[i] = (uint32_t) _phys_addr(&ohci_data.period_head_ed); } - ohci_data.control[0].ed.skip = 1; - ohci_data.bulk_head_ed.skip = 1; - ohci_data.period_head_ed.skip = 1; + ohci_data.control[0].ed.w0.skip = 1; + ohci_data.bulk_head_ed.w0.skip = 1; + ohci_data.period_head_ed.w0.skip = 1; //If OHCI hardware is in SMM mode, gain ownership (Ref OHCI spec 5.1.1.3.3) if (OHCI_REG->control_bit.interrupt_routing == 1) @@ -300,7 +300,7 @@ void hcd_device_close(uint8_t rhport, uint8_t dev_addr) // addr0 serves as static head --> only set skip bit if ( dev_addr == 0 ) { - ohci_data.control[0].ed.skip = 1; + hcd_dcache_uncached(ohci_data.control[0].ed.w0).skip = 1; }else { // remove control @@ -323,11 +323,11 @@ void hcd_device_close(uint8_t rhport, uint8_t dev_addr) //--------------------------------------------------------------------+ // List Helper //--------------------------------------------------------------------+ -static inline tusb_xfer_type_t ed_get_xfer_type(ohci_ed_t const * const p_ed) +static inline tusb_xfer_type_t ed_get_xfer_type(ohci_ed_word0 w0) { - return (p_ed->ep_number == 0 ) ? TUSB_XFER_CONTROL : - (p_ed->is_iso ) ? TUSB_XFER_ISOCHRONOUS : - (p_ed->is_interrupt_xfer) ? TUSB_XFER_INTERRUPT : TUSB_XFER_BULK; + return (w0.ep_number == 0 ) ? TUSB_XFER_CONTROL : + (w0.is_iso ) ? TUSB_XFER_ISOCHRONOUS : + (w0.is_interrupt_xfer) ? TUSB_XFER_INTERRUPT : TUSB_XFER_BULK; } static void ed_init(ohci_ed_t *p_ed, uint8_t dev_addr, uint16_t ep_size, uint8_t ep_addr, uint8_t xfer_type, uint8_t interval) @@ -337,21 +337,25 @@ static void ed_init(ohci_ed_t *p_ed, uint8_t dev_addr, uint16_t ep_size, uint8_t // address 0 is used as async head, which always on the list --> cannot be cleared if (dev_addr != 0) { - tu_memclr(p_ed, sizeof(ohci_ed_t)); + hcd_dcache_uncached(p_ed->td_tail) = 0; + hcd_dcache_uncached(p_ed->td_head).address = 0; + hcd_dcache_uncached(p_ed->next) = 0; } tuh_bus_info_t bus_info; tuh_bus_info_get(dev_addr, &bus_info); - p_ed->dev_addr = dev_addr; - p_ed->ep_number = ep_addr & 0x0F; - p_ed->pid = (xfer_type == TUSB_XFER_CONTROL) ? PID_FROM_TD : (tu_edpt_dir(ep_addr) ? PID_IN : PID_OUT); - p_ed->speed = bus_info.speed; - p_ed->is_iso = (xfer_type == TUSB_XFER_ISOCHRONOUS) ? 1 : 0; - p_ed->max_packet_size = ep_size; - - p_ed->used = 1; - p_ed->is_interrupt_xfer = (xfer_type == TUSB_XFER_INTERRUPT ? 1 : 0); + ohci_ed_word0 w0 = {.u = 0}; + w0.dev_addr = dev_addr; + w0.ep_number = ep_addr & 0x0F; + w0.pid = (xfer_type == TUSB_XFER_CONTROL) ? PID_FROM_TD : (tu_edpt_dir(ep_addr) ? PID_IN : PID_OUT); + w0.speed = bus_info.speed; + w0.is_iso = (xfer_type == TUSB_XFER_ISOCHRONOUS) ? 1 : 0; + w0.max_packet_size = ep_size; + + w0.used = 1; + w0.is_interrupt_xfer = (xfer_type == TUSB_XFER_INTERRUPT ? 1 : 0); + hcd_dcache_uncached(p_ed->w0) = w0; } static void gtd_init(ohci_gtd_t *p_td, uint8_t *data_ptr, uint16_t total_bytes) { @@ -381,8 +385,9 @@ static ohci_ed_t * ed_from_addr(uint8_t dev_addr, uint8_t ep_addr) for(uint32_t i=0; inext = p_pre->next; - p_pre->next = (uint32_t) _phys_addr(p_ed); + hcd_dcache_uncached(p_ed->next) = hcd_dcache_uncached(p_pre->next); + hcd_dcache_uncached(p_pre->next) = (uint32_t) _phys_addr(p_ed); } static void ed_list_remove_by_addr(ohci_ed_t * p_head, uint8_t dev_addr) { ohci_ed_t* p_prev = p_head; - while( p_prev->next ) + uint32_t ed_pa; + while( (ed_pa = hcd_dcache_uncached(p_prev->next)) ) { - ohci_ed_t* ed = (ohci_ed_t*) _virt_addr((void *)p_prev->next); + ohci_ed_t* ed = (ohci_ed_t*) _virt_addr((void *)ed_pa); - if (ed->dev_addr == dev_addr) + if (hcd_dcache_uncached(ed->w0).dev_addr == dev_addr) { // Prevent Host Controller from processing this ED while we remove it - ed->skip = 1; + hcd_dcache_uncached(ed->w0).skip = 1; // unlink ed, will also move up p_prev - p_prev->next = ed->next; + hcd_dcache_uncached(p_prev->next) = hcd_dcache_uncached(ed->next); // point the removed ED's next pointer to list head to make sure HC can always safely move away from this ED - ed->next = (uint32_t) _phys_addr(p_head); - ed->used = 0; - ed->skip = 0; + hcd_dcache_uncached(ed->next) = (uint32_t) _phys_addr(p_head); + ohci_ed_word0 w0 = hcd_dcache_uncached(ed->w0); + w0.used = 0; + w0.skip = 0; + hcd_dcache_uncached(ed->w0) = w0; }else { - p_prev = (ohci_ed_t*) _virt_addr((void *)p_prev->next); + p_prev = (ohci_ed_t*) _virt_addr((void *)ed_pa); } } } @@ -477,7 +485,7 @@ bool hcd_edpt_open(uint8_t rhport, uint8_t dev_addr, tusb_desc_endpoint_t const // control of dev0 is used as static async head if ( dev_addr == 0 ) { - p_ed->skip = 0; // only need to clear skip bit + hcd_dcache_uncached(p_ed->w0).skip = 0; // only need to clear skip bit return true; } @@ -519,7 +527,7 @@ bool hcd_setup_send(uint8_t rhport, uint8_t dev_addr, uint8_t const setup_packet hcd_dcache_clean(qtd, sizeof(ohci_gtd_t)); //------------- Attach TDs list to Control Endpoint -------------// - ed->td_head.address = (uint32_t) _phys_addr(qtd); + hcd_dcache_uncached(ed->td_head.address) = (uint32_t) _phys_addr(qtd); OHCI_REG->command_status_bit.control_list_filled = 1; @@ -554,12 +562,13 @@ bool hcd_edpt_xfer(uint8_t rhport, uint8_t dev_addr, uint8_t ep_addr, uint8_t * gtd->delay_interrupt = OHCI_INT_ON_COMPLETE_YES; hcd_dcache_clean(gtd, sizeof(ohci_gtd_t)); - ed->td_head.address = (uint32_t) _phys_addr(gtd); + hcd_dcache_uncached(ed->td_head).address = (uint32_t) _phys_addr(gtd); OHCI_REG->command_status_bit.control_list_filled = 1; }else { ohci_ed_t * ed = ed_from_addr(dev_addr, ep_addr); + tusb_xfer_type_t xfer_type = ed_get_xfer_type( hcd_dcache_uncached(ed->w0) ); ohci_gtd_t *gtd = (ohci_gtd_t *)_virt_addr((void *)hcd_dcache_uncached(ed->td_tail)); gtd_init(gtd, buffer, buflen); @@ -576,7 +585,6 @@ bool hcd_edpt_xfer(uint8_t rhport, uint8_t dev_addr, uint8_t ep_addr, uint8_t * hcd_dcache_uncached(ed->td_tail) = (uint32_t)_phys_addr(new_gtd); - tusb_xfer_type_t xfer_type = ed_get_xfer_type( ed_from_addr(dev_addr, ep_addr) ); if (TUSB_XFER_BULK == xfer_type) OHCI_REG->command_status_bit.bulk_list_filled = 1; } @@ -595,13 +603,12 @@ bool hcd_edpt_clear_stall(uint8_t rhport, uint8_t dev_addr, uint8_t ep_addr) { (void) rhport; ohci_ed_t * const p_ed = ed_from_addr(dev_addr, ep_addr); - p_ed->is_stalled = 0; - p_ed->td_tail &= 0x0Ful; // set tail pointer back to NULL - - p_ed->td_head.toggle = 0; // reset data toggle - p_ed->td_head.halted = 0; + ohci_ed_td_head td_head = hcd_dcache_uncached(p_ed->td_head); + td_head.toggle = 0; // reset data toggle + td_head.halted = 0; + hcd_dcache_uncached(p_ed->td_head) = td_head; - if ( TUSB_XFER_BULK == ed_get_xfer_type(p_ed) ) OHCI_REG->command_status_bit.bulk_list_filled = 1; + if ( TUSB_XFER_BULK == ed_get_xfer_type(hcd_dcache_uncached(p_ed->w0)) ) OHCI_REG->command_status_bit.bulk_list_filled = 1; return true; } @@ -665,8 +672,8 @@ static void done_queue_isr(uint8_t hostid) (void) hostid; // done head is written in reversed order of completion --> need to reverse the done queue first - ohci_td_item_t* td_head = list_reverse ( (ohci_td_item_t*) tu_align16(ohci_data.hcca.done_head) ); - ohci_data.hcca.done_head = 0; + ohci_td_item_t* td_head = list_reverse ( (ohci_td_item_t*) tu_align16(hcd_dcache_uncached(ohci_data.hcca).done_head) ); + hcd_dcache_uncached(ohci_data.hcca).done_head = 0; while( td_head != NULL ) { diff --git a/src/portable/ohci/ohci.h b/src/portable/ohci/ohci.h index 8483686fae..78cac664ea 100644 --- a/src/portable/ohci/ohci.h +++ b/src/portable/ohci/ohci.h @@ -117,34 +117,42 @@ typedef struct TU_ATTR_ALIGNED(CFG_TUH_MEM_DCACHE_ENABLE ? CFG_TUH_MEM_DCACHE_LI TU_VERIFY_STATIC( sizeof(ohci_gtd_t) == CFG_TUH_MEM_DCACHE_ENABLE ? CFG_TUH_MEM_DCACHE_LINE_SIZE : 16, "size is not correct" ); +typedef union { + struct { + uint32_t dev_addr : 7; + uint32_t ep_number : 4; + uint32_t pid : 2; + uint32_t speed : 1; + uint32_t skip : 1; + uint32_t is_iso : 1; + uint32_t max_packet_size : 11; + // HCD: make use of 5 reserved bits + uint32_t used : 1; + uint32_t is_interrupt_xfer : 1; + uint32_t : 3; + }; + uint32_t u; +} ohci_ed_word0; + +typedef union { + uint32_t address; + struct { + uint32_t halted : 1; + uint32_t toggle : 1; + uint32_t : 30; + }; +} ohci_ed_td_head; + typedef struct TU_ATTR_ALIGNED(16) { // Word 0 - uint32_t dev_addr : 7; - uint32_t ep_number : 4; - uint32_t pid : 2; - uint32_t speed : 1; - uint32_t skip : 1; - uint32_t is_iso : 1; - uint32_t max_packet_size : 11; - // HCD: make use of 5 reserved bits - uint32_t used : 1; - uint32_t is_interrupt_xfer : 1; - uint32_t is_stalled : 1; - uint32_t : 2; + ohci_ed_word0 w0; // Word 1 uint32_t td_tail; // Word 2 - volatile union { - uint32_t address; - struct { - uint32_t halted : 1; - uint32_t toggle : 1; - uint32_t : 30; - }; - }td_head; + volatile ohci_ed_td_head td_head; // Word 3: next ED uint32_t next;