Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fully tested for aarch64 OS without a bus error #289

Open
wants to merge 3 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 13 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,9 +1,13 @@
cmake_minimum_required(VERSION 2.8)

project(fbcp-ili9341)
if(NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE "Release" CACHE STRING "Choose the type of build, options are: Debug Release RelWithDebInfo MinSizeRel." FORCE)
endif()

EXECUTE_PROCESS( COMMAND uname -m COMMAND tr -d '\n' OUTPUT_VARIABLE ARCHITECTURE )
message( STATUS "Architecture: ${ARCHITECTURE}" )

include_directories(/opt/vc/include)
link_directories(/opt/vc/lib)

Expand Down Expand Up @@ -46,7 +50,11 @@ if (SINGLE_CORE_BOARD)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DSINGLE_CORE_BOARD=1")
endif()

set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -marm -mabi=aapcs-linux -mhard-float -mfloat-abi=hard -mlittle-endian -mtls-dialect=gnu2 -funsafe-math-optimizations")
if (NOT ${ARCHITECTURE} STREQUAL "aarch64")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -marm -mabi=aapcs-linux -mhard-float -mfloat-abi=hard -mlittle-endian -mtls-dialect=gnu2 -funsafe-math-optimizations")
else()
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mlittle-endian -funsafe-math-optimizations -mstrict-align")
endif()

option(ARMV6Z "Target a Raspberry Pi with ARMv6Z instruction set (Pi 1A, 1A+, 1B, 1B+, Zero, Zero W)" ${DEFAULT_TO_ARMV6Z})
if (ARMV6Z)
Expand Down Expand Up @@ -220,6 +228,9 @@ elseif(KEDEI_V63_MPI3501)
if (USE_DMA_TRANSFERS)
message(FATAL_ERROR "DMA is unfortunately not possible with KeDei MPI3501. Please disable with -DUSE_DMA_TRANSFERS=OFF.")
endif()
elseif(KEDEI_TRASH)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DKEDEI_TRASH")
message(STATUS "Targeting KeDei 3.5 inch SPI TFTLCD 480*320 16bit/18bit version 3.0 2015/4/9 display")
elseif(ILI9341)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DILI9341")
message(STATUS "Targeting ILI9341")
Expand Down Expand Up @@ -271,4 +282,4 @@ endif()

add_executable(fbcp-ili9341 ${sourceFiles})

target_link_libraries(fbcp-ili9341 pthread bcm_host atomic)
target_link_libraries(fbcp-ili9341 pthread bcm_host atomic bcm2835)
2 changes: 2 additions & 0 deletions display.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

#if defined(ILI9341) || defined(ILI9340)
#include "ili9341.h"
#elif defined(KEDEI_TRASH)
#include "kedei_trash.h"
#elif defined(ILI9486L)
#include "ili9486l.h"
#elif defined(ILI9488)
Expand Down
35 changes: 30 additions & 5 deletions spi.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,16 +41,19 @@ static uint32_t writeCounter = 0;
TOGGLE_CHIP_SELECT_LINE(); \
DEBUG_PRINT_WRITTEN_BYTE(w); \
} while(0)

int mem_fd = -1;
volatile void *bcm2835 = 0;
volatile GPIORegisterFile *gpio = 0;
volatile SPIRegisterFile *spi = 0;

// Points to the system timer register. N.B. spec sheet says this is two low and high parts, in an 32-bit aligned (but not 64-bit aligned) address. Profiling shows
// that Pi 3 Model B does allow reading this as a u64 load, and even when unaligned, it is around 30% faster to do so compared to loading in parts "lo | (hi << 32)".
#if __aarch64__
volatile uint32_t *systemTimerRegister = 0;
#else
volatile uint64_t *systemTimerRegister = 0;

#endif
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if instead of the above, it would work to do

#if __aarch64__
typedef uint64_t __attribute__((aligned(4))) timer_uint64_t;
#else
typedef uint64_t timer_uint64_t;
#endif

volatile timer_uint64_t *systemTimerRegister = 0;

This will tell the compiler that systemTimerRegister points to something that is only 32-bit aligned, and it should then automatically generate the appropriate 32-bit load and stores on its own.

After that, the other #ifdef __aarch64__s would not be necessary below?

Copy link
Author

@meesokim meesokim Jan 31, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've applied your patch on the following 64bit OS.
"Linux raspberrypi 5.15.84-v8+ #1613 SMP PREEMPT Thu Jan 5 12:03:08 GMT 2023 aarch64 GNU/Linux"

But it makes same bus error.

bcm_host_get_peripheral_address: 0x3f000000, bcm_host_get_peripheral_size: 16777216, bcm_host_get_sdram_address: 0xc0000000
BCM core speed: current: 400000000hz, max turbo: 400000000hz. SPI CDIV: 6, SPI max frequency: 66666667hz
Initializing display
bcm2835 library version: 10071 (0x00002757)
Creating SPI task thread
InitSPI done
Bus error

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Oh, bummer. That would have been sweet if that had worked.

void DumpSPICS(uint32_t reg)
{
PRINT_FLAG(BCM2835_SPI0_CS_CS);
Expand Down Expand Up @@ -319,7 +322,6 @@ void RunSPITask(SPITask *task)

SET_GPIO(GPIO_TFT_DATA_CONTROL);
#endif

// Send the data payload:
while(tStart < tPrefillEnd) WRITE_FIFO(*tStart++);
while(tStart < tEnd)
Expand All @@ -335,6 +337,11 @@ void RunSPITask(SPITask *task)
}
#else

#ifdef KEDEI_TRASH
extern void lcd_data8(uint8_t *data);
extern void lcd_cmd(uint8_t data);
#endif

void RunSPITask(SPITask *task)
{
WaitForPolledSPITransferToFinish();
Expand All @@ -352,6 +359,15 @@ void RunSPITask(SPITask *task)
const uint32_t payloadSize = tEnd - tStart;
uint8_t *tPrefillEnd = tStart + MIN(15, payloadSize);

#ifdef KEDEI_TRASH
lcd_cmd(task->cmd);
while(tStart < tEnd)
{
lcd_data8(tStart);
tStart += 2;
}
#else // not KEDEI_TRASH

// Send the command word if display is 4-wire (3-wire displays can omit this, commands are interleaved in the data payload stream above)
#ifndef SPI_3WIRE_PROTOCOL
// An SPI transfer to the display always starts with one control (command) byte, followed by N data bytes.
Expand All @@ -362,7 +378,6 @@ void RunSPITask(SPITask *task)
WRITE_FIFO(0x00);
#endif
WRITE_FIFO(task->cmd);

#ifdef DISPLAY_SPI_BUS_IS_16BITS_WIDE
while(!(spi->cs & (BCM2835_SPI0_CS_DONE))) /*nop*/;
spi->fifo;
Expand Down Expand Up @@ -390,7 +405,11 @@ void RunSPITask(SPITask *task)
else
#endif
{
while(tStart < tPrefillEnd) WRITE_FIFO(*tStart++);
while(tStart < tPrefillEnd)
{
WRITE_FIFO(*tStart++);

}
while(tStart < tEnd)
{
uint32_t cs = spi->cs;
Expand All @@ -399,10 +418,12 @@ void RunSPITask(SPITask *task)
if ((cs & (BCM2835_SPI0_CS_RXR|BCM2835_SPI0_CS_RXF))) spi->cs = BCM2835_SPI0_CS_CLEAR_RX | BCM2835_SPI0_CS_TA | DISPLAY_SPI_DRIVE_SETTINGS;
}
}
#endif // not KEDEI_TRASH

#ifdef DISPLAY_NEEDS_CHIP_SELECT_SIGNAL
END_SPI_COMMUNICATION();
#endif

}
#endif

Expand Down Expand Up @@ -515,7 +536,11 @@ int InitSPI()
if (bcm2835 == MAP_FAILED) FATAL_ERROR("mapping /dev/mem failed");
spi = (volatile SPIRegisterFile*)((uintptr_t)bcm2835 + BCM2835_SPI0_BASE);
gpio = (volatile GPIORegisterFile*)((uintptr_t)bcm2835 + BCM2835_GPIO_BASE);
#if __aarch64__
systemTimerRegister = (volatile uint32_t*)((uintptr_t)bcm2835 + BCM2835_TIMER_BASE + 0x04); // Generates an unaligned 64-bit pointer, but seems to be fine.
#else
systemTimerRegister = (volatile uint64_t*)((uintptr_t)bcm2835 + BCM2835_TIMER_BASE + 0x04); // Generates an unaligned 64-bit pointer, but seems to be fine.
#endif
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe

// On 32-bit Pi, we can perform an unaligned 64-bit pointer access to read the timer for a micro-optimization, but on 64-bit Pi reading in 32-bit parts is needed.
#if __aarch64__
  systemTimerRegister = (volatile uint32_t*)
#else
  systemTimerRegister = (volatile uint64_t*)
#endif
systemTimerRegister = (volatile uint32_t*)((uintptr_t)bcm2835 + BCM2835_TIMER_BASE + 0x04);

?

// TODO: On graceful shutdown, (ctrl-c signal?) close(mem_fd)
#endif

Expand Down
3 changes: 2 additions & 1 deletion spi.h
Original file line number Diff line number Diff line change
Expand Up @@ -112,12 +112,12 @@ typedef struct __attribute__((packed)) SPITask
#else
uint8_t cmd;
#endif
uint32_t dmaSpiHeader;
#ifdef OFFLOAD_PIXEL_COPY_TO_DMA_CPP
uint8_t *fb;
uint8_t *prevFb;
uint16_t width;
#endif
uint32_t dmaSpiHeader;
uint8_t data[]; // Contains both 8-bit and 9-bit tasks back to back, 8-bit first, then 9-bit.

#ifdef SPI_3WIRE_PROTOCOL
Expand All @@ -130,6 +130,7 @@ typedef struct __attribute__((packed)) SPITask
inline uint8_t *PayloadEnd() { return data + size; }
inline uint32_t PayloadSize() const { return size; }
inline uint32_t *DmaSpiHeaderAddress() { return &dmaSpiHeader; }
// inline uint32_t *DmaSpiHeaderAddress() { return 0; }
#endif

} SPITask;
Expand Down
7 changes: 5 additions & 2 deletions tick.h
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,14 @@
#include <unistd.h>

// Initialized in spi.cpp along with the rest of the BCM2835 peripheral:
#if __aarch64__
extern volatile uint32_t *systemTimerRegister;
#define tick() (*systemTimerRegister+((uint64_t)(*(systemTimerRegister+1))<<32))
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe

#define tick() (systemTimerRegister[0] | ((uint64_t)systemTimerRegister[1] << 32))

?

#else
extern volatile uint64_t *systemTimerRegister;
#define tick() (*systemTimerRegister)

#endif

#endif

#ifdef NO_THROTTLING
#define usleep(x) ((void)0)
Expand Down