diff --git a/Firmware/CoverUI/YardForce/CortexMCycleCounter.cpp b/Firmware/CoverUI/YardForce/CortexMCycleCounter.cpp index 30d9f7c..afa18eb 100644 --- a/Firmware/CoverUI/YardForce/CortexMCycleCounter.cpp +++ b/Firmware/CoverUI/YardForce/CortexMCycleCounter.cpp @@ -1,3 +1,4 @@ +#include #include #include "include/CortexMCycleCounter.hpp" @@ -22,13 +23,21 @@ CortexMCycleCounter::CortexMCycleCounter() void CortexMCycleCounter::start() { +#ifdef STM32F030x8 // No DWT MCUs like STM32f0x + start_ = micros(); +#else // DWT capable MCU start_ = ARM_CM_DWT_CYCCNT; +#endif } void CortexMCycleCounter::stop() { +#ifdef STM32F030x8 // No DWT MCUs like STM32f0x + const uint32_t stop = micros(); +#else // DWT capable MCU const uint32_t stop = ARM_CM_DWT_CYCCNT; - +#endif + current_ = stop - start_; // Min/Max diff --git a/Firmware/CoverUI/YardForce/UC1698.cpp b/Firmware/CoverUI/YardForce/UC1698.cpp index ac8ad51..2a11e50 100644 --- a/Firmware/CoverUI/YardForce/UC1698.cpp +++ b/Firmware/CoverUI/YardForce/UC1698.cpp @@ -23,12 +23,11 @@ namespace yardforce { initConnection_(); - // Reset via RST control line - digitalWrite(PIN_CS, 0); // Chip select + CLR_CS; // Chip select digitalWrite(PIN_RST, 0); delay(1); digitalWrite(PIN_RST, 1); - digitalWrite(PIN_CS, 1); // Chip deselect + SET_CS; // Chip deselect delay(UC1698_RESET_DELAY_MS); // Wait for MTP read ready (after RST) // Check if it's an UC1698 @@ -39,7 +38,14 @@ namespace yardforce return false; } +#ifdef BENCHMARK + cycle_cnt_init_.start(); +#endif initDisplay_(); +#ifdef BENCHMARK + cycle_cnt_init_.stop(); +#endif + return true; } @@ -70,13 +76,13 @@ namespace yardforce { static uint8_t status[UC1698_GET_STATUS_SIZE]; - digitalWrite(PIN_CS, 0); // Chip select - digitalWrite(PIN_CD, 0); // Command + CLR_CS; // Chip select + CLR_CD; // Command for (unsigned int i = 0; i < UC1698_GET_STATUS_SIZE; i++) { status[i] = read_(); } - digitalWrite(PIN_CS, 1); // Chip deselect + SET_CS; // Chip deselect return status; } @@ -149,47 +155,47 @@ namespace yardforce // Write command void UC1698::writeCommand(uint8_t data) { - digitalWrite(PIN_CS, 0); // Chip select - digitalWrite(PIN_CD, 0); // Command + CLR_CS; // Chip select + CLR_CD; // Command writeSeq_(data); - digitalWrite(PIN_CS, 1); // Chip deselect + SET_CS; // Chip deselect } void UC1698::writeCommand(const uint8_t *data_array, unsigned int length) { - digitalWrite(PIN_CS, 0); // Chip select - digitalWrite(PIN_CD, 0); // Command - // while (length > 0) + CLR_CS; // Chip select + CLR_CD; // Command for (; length > 0; length--, data_array++) { writeSeq_(*data_array); } - digitalWrite(PIN_CS, 1); // Chip deselect + SET_CS; // Chip deselect } void UC1698::writeData(uint16_t data) { - digitalWrite(PIN_CS, 0); // Chip select - digitalWrite(PIN_CD, 1); // Data - + CLR_CS; // Chip select + SET_CD; // Data uint8_t part = (uint8_t)(data >> 8); writeSeq_(part); part = (uint8_t)data; writeSeq_(part); - digitalWrite(PIN_CS, 1); // Chip deselect + SET_CS; // Chip deselect } void UC1698::writeData(const uint8_t *data_array, unsigned int length) { - digitalWrite(PIN_CS, 0); // Chip select - digitalWrite(PIN_CD, 1); // Data + CLR_CS; // Chip select + SET_CD; // Data for (; length > 0; length--, data_array++) { writeSeq_(*data_array); + NOOP; + NOOP; // >=45ns. This NOOPs = 2*21ns + the next cycle. See Thpw80 of UC1698 datasheet page 60 } - digitalWrite(PIN_CS, 1); // Chip deselect + SET_CS; // Chip deselect } // Initialize control and data lines @@ -205,7 +211,7 @@ namespace yardforce digitalWrite(PIN_WR, 1); pinMode(PIN_CD, OUTPUT); digitalWrite(PIN_CD, 1); - GPIO_DATA_MODE_INPUT; + GPIO_DATA_MODE_OUTPUT; } // Initialize control and data lines @@ -219,22 +225,10 @@ namespace yardforce writeCommand(0b11010000 | 1); // [20] Set Color Pattern to R-G-B writeCommand(0b11010110); // [21] Set Color Mode to RRRRR-GGGGGG-BBBBB, 64k-color - fillScreen(false); // Clear Screen + // fillScreen(true); // Clear Screen setDisplayEnable(true); } - // NOP delay to fullfil ns timing requirements. Only useful for values > 100ns, otherwise use single NOOP's - void UC1698::nopDelay_(unsigned int t_ns) - { - int16_t i = t_ns / NOP_CYCLE_NS; - i -= 6; // Already consumed (or more) - - for (; i >= 0; i--) - { - NOOP; - } - } - // [2*] Read data from port uint8_t UC1698::read_() { @@ -245,6 +239,7 @@ namespace yardforce NOOP; uint8_t data = ((GPIO_TypeDef *)GPIO_DATA)->IDR & GPIO_DATA_MASK; digitalWrite(PIN_RD, 1); // Read end + GPIO_DATA_MODE_OUTPUT; return data; } @@ -252,12 +247,17 @@ namespace yardforce // Write data sequence void UC1698::writeSeq_(uint8_t data) { - GPIO_DATA_MODE_OUTPUT; +#ifdef BENCHMARK + cycle_cnt_writeSeq_.start(); +#endif - digitalWrite(PIN_WR, 0); // Trigger write + CLR_WR; // Trigger write ((GPIO_TypeDef *)GPIO_DATA)->ODR = (((GPIO_TypeDef *)GPIO_DATA)->ODR & ~GPIO_DATA_MASK) | data; - NOOP; // >=30ns. This NOOP = 21ns + the next cycle for the rising edge. See Tds80 of UC1698 datasheet page 60 - digitalWrite(PIN_WR, 1); // Write end + NOOP; // >=30ns. This NOOP = 21ns + the next cycle for the rising edge. See Tds80 of UC1698 datasheet page 60 + SET_WR; // Write end +#ifdef BENCHMARK + cycle_cnt_writeSeq_.stop(); +#endif } } // namespace controller } // namespace display diff --git a/Firmware/CoverUI/YardForce/include/CortexMCycleCounter.hpp b/Firmware/CoverUI/YardForce/include/CortexMCycleCounter.hpp index a93f233..8438d5b 100644 --- a/Firmware/CoverUI/YardForce/include/CortexMCycleCounter.hpp +++ b/Firmware/CoverUI/YardForce/include/CortexMCycleCounter.hpp @@ -24,13 +24,13 @@ class CortexMCycleCounter void stop(); // Stop measurement private: - unsigned int start_ = 0; - unsigned int current_ = 0; - unsigned int min_ = 0xffffffff; - unsigned int max_ = 0; - unsigned int avg_ = 0; - unsigned int sum_ = 0; - unsigned int cnt_ = 0; + unsigned long start_ = 0; + unsigned long current_ = 0; + unsigned long min_ = 0xffffffff; + unsigned long max_ = 0; + unsigned long avg_ = 0; + unsigned long sum_ = 0; + unsigned long cnt_ = 0; static bool s_dwt_initialized_; }; // CortexMCycleCounter diff --git a/Firmware/CoverUI/YardForce/include/UC1698.hpp b/Firmware/CoverUI/YardForce/include/UC1698.hpp index 6ce8805..8d90c73 100644 --- a/Firmware/CoverUI/YardForce/include/UC1698.hpp +++ b/Firmware/CoverUI/YardForce/include/UC1698.hpp @@ -15,21 +15,29 @@ #include -#define PIN_CS PC7 // Chip Select pin #define PIN_RST PA12 // ReSTart pin #define PIN_RD PC9 // Read control line -#define PIN_WR PC8 // Write control line -#define PIN_CD PA8 // Command/Data (A0) + +#define PIN_CS PC7 // Chip Select pin +#define SET_CS GPIOC->ODR = GPIOC->ODR | 0b10000000 // Set CS (PC7) = high +#define CLR_CS GPIOC->ODR = GPIOC->ODR & 0b11111111111111111111111101111111 // Clear CS (PC7) = low + +#define PIN_CD PA8 // Command/Data (A0) +#define SET_CD GPIOA->ODR = GPIOA->ODR | 0b100000000 // Set CD (PA8) = high +#define CLR_CD GPIOA->ODR = GPIOA->ODR & 0b11111111111111111111111011111111 // Clear CD (PA8) = low + +#define PIN_WR PC8 // Write control line. Only for Mode set +#define SET_WR GPIOC->ODR = GPIOC->ODR | 0b100000000 // Set WR (PC8) = high +#define CLR_WR GPIOC->ODR = GPIOC->ODR & 0b11111111111111111111111011111111 // Clear WR (PC8) = low #define GPIO_DATA GPIOB_BASE // GPIO port/base of the data lines. Assumed that D0-D7 are wired to ONE port! #define GPIO_DATA_MASK 0x00ff // Mask which forms the data. Assumed that D0-D7 are all connected consecutive via LSB 0 // Shorthands to set GPIO Mode data pins to input/output -#define GPIO_DATA_MODE_INPUT ((GPIO_TypeDef *)GPIO_DATA)->MODER = (((GPIO_TypeDef *)GPIO_DATA)->MODER & 0xffff0000) & 0xffff0000; // Mode 0 = Input -#define GPIO_DATA_MODE_OUTPUT ((GPIO_TypeDef *)GPIO_DATA)->MODER = (((GPIO_TypeDef *)GPIO_DATA)->MODER & 0xffff0000) | 0b0101010101010101 // Mode 01 = Output +#define GPIO_DATA_MODE_INPUT ((GPIO_TypeDef *)GPIO_DATA)->MODER = (((GPIO_TypeDef *)GPIO_DATA)->MODER & 0xffff0000) & 0xffff0000; // D0-D7 Mode 0 = Input +#define GPIO_DATA_MODE_OUTPUT ((GPIO_TypeDef *)GPIO_DATA)->MODER = (((GPIO_TypeDef *)GPIO_DATA)->MODER & 0xffff0000) | 0b0101010101010101 // D0-D7 Mode 01 = Output -#define NOP_CYCLE_NS 21 // For STM32F030x it's 1/48MHz which is 20.9ns (but we use int for not wasting time in slow FP calculations) -#define NOOP asm("nop\n") +#define NOOP asm("nop\n") // For STM32F030x it's 1/48MHz which is 20.9ns #define UC1698_RESET_DELAY_MS 150 #define UC1698_GET_STATUS_SIZE 3 @@ -41,6 +49,13 @@ #define UC1698_DISPLAY_HEIGHT 128 #endif +// Enable for benchmarking specific code +// #define BENCHMARK + +#ifdef BENCHMARK +#include "include/CortexMCycleCounter.hpp" +#endif + namespace yardforce { namespace display @@ -58,6 +73,9 @@ namespace yardforce void drawPixelTriplet(bool pixel1State, bool pixel2State, bool pixel3State); void fillScreen(bool t_color_black); // Fill screen (t_color: false = white, true = black) + void writeData(uint16_t data); + void writeData(const uint8_t *data_array, unsigned int length); + private: void initConnection_(); // Initialize control and data lines void initDisplay_(); // Initialize control and data lines @@ -72,13 +90,13 @@ namespace yardforce void writeCommand(uint8_t data); void writeCommand(const uint8_t *data_array, unsigned int length); - void writeData(uint16_t data); - void writeData(const uint8_t *data_array, unsigned int length); - - void nopDelay_(unsigned int t_ns); // NOP delay to fullfil ns timing requirements. Only useful for values > 100ns, otherwise use single NOOP's - uint8_t read_(); void writeSeq_(uint8_t data); + +#ifdef BENCHMARK + CortexMCycleCounter cycle_cnt_init_; + CortexMCycleCounter cycle_cnt_writeSeq_; +#endif }; } // namespace controller } // namespace display diff --git a/Firmware/CoverUI/YardForce/include/model/Display_SAxPRO.cpp b/Firmware/CoverUI/YardForce/include/model/Display_SAxPRO.cpp index 7a8fb76..f1b3f4a 100644 --- a/Firmware/CoverUI/YardForce/include/model/Display_SAxPRO.cpp +++ b/Firmware/CoverUI/YardForce/include/model/Display_SAxPRO.cpp @@ -60,20 +60,30 @@ namespace yardforce */ void DisplaySAXPRO::flush_cb(lv_disp_drv_t *disp_drv, const lv_area_t *area, lv_color_t *t_color_p) { - unsigned int x, y; - lv_color_t *color_p; +#ifdef BENCHMARK + cycle_cnt_flush_cb_.start(); +#endif + size_t cols16b = ((area->x2 - area->x1) + 1) / 3; // Num of 16 bit columns for this flush area + size_t cols8b = cols16b * 2; + uint8_t row_buffer[cols8b]; // Buffer for one row 2*8-bit (every 16 bit col holds 3 pixel) + uc1698.setWindowProgramArea(area->x1, area->x2, area->y1, area->y2); - for (y = area->y1; y <= area->y2; y++) + for (size_t y = area->y1; y <= area->y2; y++) { - for (x = area->x1; x <= area->x2; x += 3) // FIXME: Might overflow buffer if area->x2 is not dividable by 3 + for (size_t x = 0; x < cols8b; x += 2) // ATTENTION: Will only work with a correct rounder_cb()! { // Color is inverted (0 = black but pixel off / >0 = white but pixel on) but UC1698 "[16] Set Inverse Display" is set - uc1698.drawPixelTriplet(t_color_p->full, (t_color_p + 1)->full, (t_color_p + 2)->full); + row_buffer[x] = (0b11111000 * t_color_p->full | (0b00000111 * (t_color_p + 1)->full)); + row_buffer[x + 1] = (0b11100000 * (t_color_p + 1)->full) | (0b00011111 * (t_color_p + 2)->full); t_color_p += 3; } + uc1698.writeData(row_buffer, cols8b); } lv_disp_flush_ready(disp_drv); +#ifdef BENCHMARK + cycle_cnt_flush_cb_.stop(); +#endif } void DisplaySAXPRO::set_undocked() @@ -87,6 +97,9 @@ namespace yardforce void DisplaySAXPRO::mainScreen() { +#ifdef BENCHMARK + volatile auto perf_test = cycle_cnt_flush_cb_; +#endif // On the left side of the status bar we do have functional status symbols like heartbeat and ROS v_led_ros = new lvgl::WidgetLedSymbol(FA_SYMBOL_ROS, LV_ALIGN_TOP_LEFT, 0, 0); // Leftmost diff --git a/Firmware/CoverUI/YardForce/include/model/Display_SAxPRO.hpp b/Firmware/CoverUI/YardForce/include/model/Display_SAxPRO.hpp index 9241945..5f61ede 100644 --- a/Firmware/CoverUI/YardForce/include/model/Display_SAxPRO.hpp +++ b/Firmware/CoverUI/YardForce/include/model/Display_SAxPRO.hpp @@ -31,6 +31,13 @@ #define BACKLIGHT_TIMEOUT_MS 120000 // 2 minutes #define STATUS_TICKER_LENGTH 100 +// Enable for benchmarking specific code +// #define BENCHMARK + +#ifdef BENCHMARK +#include "include/CortexMCycleCounter.hpp" +#endif + namespace yardforce { namespace display @@ -57,6 +64,11 @@ namespace yardforce static void set_undocked(); }; + +#ifdef BENCHMARK + static CortexMCycleCounter cycle_cnt_flush_cb_; +#endif + } // namespace display } // namespace yardforce