Berry animation speed optimization for gradient (#24103)

2025-11-11 19:12:14 +01:00 · 2025-11-11 19:12:14 +01:00 · e7bc18c148
commit e7bc18c148
parent 2f70c0b99b
6 changed files with 776 additions and 733 deletions
--- a/lib/libesp32/berry_animation/src/animations/palette_pattern.be
+++ b/lib/libesp32/berry_animation/src/animations/palette_pattern.be
@ -117,15 +117,44 @@ class PalettePatternAnimation : animation.animation
    
    # Apply colors from the color source to each pixel based on its value
    var strip_length = self.engine.strip_length
-    var i = 0
-    while (i < strip_length)
-      var byte_value = self.value_buffer[i]
-      
-      # Use the color_source to get color for the byte value (0-255)
-      var color = color_source.get_color_for_value(byte_value, elapsed)
-      
-      frame.set_pixel_color(i, color)
-      i += 1
+
+    # Optimization for LUT patterns
+    var lut
+    if isinstance(color_source, animation.color_provider) && (lut := color_source.get_lut()) != nil
+      var lut_factor = color_source.LUT_FACTOR    # default = 1, we have only 128 cached values
+      var lut_max = 256 >> lut_factor
+      var i = 0
+      var frame_ptr = frame.pixels._buffer()
+      var lut_ptr = lut._buffer()
+      var buffer = self.value_buffer._buffer()
+      while (i < strip_length)
+        var byte_value = buffer[i]
+        var lut_index = byte_value >> lut_factor  # Divide by 2 using bit shift
+        if byte_value == 255
+          lut_index = lut_max
+        end
+
+        var lut_color_ptr = lut_ptr + (lut_index << 2)  # calculate the pointer for LUT color
+        frame_ptr[0] = lut_color_ptr[0]
+        frame_ptr[1] = lut_color_ptr[1]
+        frame_ptr[2] = lut_color_ptr[2]
+        frame_ptr[3] = lut_color_ptr[3]
+
+        # advance to next
+        i += 1
+        frame_ptr += 4
+      end
+    else    # no LUT, do one color at a time
+      var i = 0
+      while (i < strip_length)
+        var byte_value = self.value_buffer[i]
+        
+        # Use the color_source to get color for the byte value (0-255)
+        var color = color_source.get_color_for_value(byte_value, elapsed)
+        
+        frame.set_pixel_color(i, color)
+        i += 1
+      end
    end
    
    return true
@ -238,13 +267,26 @@ class PaletteGradientAnimation : PalettePatternAnimation
    
    # Calculate values for each pixel
    var i = 0
+    # Calculate position within the spatial period, including temporal and phase offsets
+    var spatial_pos = (temporal_offset + phase_offset) % effective_spatial_period
+
+    # Calculate the increment per pixel, in 1/1024 of pixels
+    # We calculate 1024*255/effective_spatial_period
+    # But for rounding we actually calculate
+    # ((1024 * 255 * 2) + 1) / (2 * effective_spatial_period)
+    # Note: (1024 * 255 * 2) + 1 = 522241
+    var incr_1024 = (522241 / effective_spatial_period) >> 1
+
+    # 'spatial_1024' is our accumulator in 1/1024th of pixels, 2^10
+    var spatial_1024 = spatial_pos * incr_1024
+    var buffer = self.value_buffer._buffer()    # 'buffer' is of type 'comptr'
+
+    # var effective_spatial_period_1 = effective_spatial_period - 1
+    # # Calculate the increment in 1/256 of values
+    # var increment = tasmota.scale_uint(effective_spatial_period)
    while i < strip_length
-      # Calculate position within the spatial period, including temporal and phase offsets
-      var spatial_pos = (i + temporal_offset + phase_offset) % effective_spatial_period
-      
-      # Map spatial position to gradient value (0-255)
-      var byte_value = tasmota.scale_uint(int(spatial_pos), 0, effective_spatial_period - 1, 0, 255)
-      self.value_buffer[i] = byte_value
+      buffer[i] = spatial_1024 >> 10
+      spatial_1024 += incr_1024     # we don't really care about overflow since we clamp modula 255 anyways
      i += 1
    end
  end
--- a/lib/libesp32/berry_animation/src/core/animation_engine.be
+++ b/lib/libesp32/berry_animation/src/core/animation_engine.be
@ -223,15 +223,17 @@ class AnimationEngine
    self.ts_end = tasmota.millis()
    self._record_tick_metrics(current_time)
    
+    global.debug_animation = false
    return true
  end
  
  # Unified update and render process
  def _update_and_render(time_ms)
+    self.ts_1 = tasmota.millis()
    # Update root animation (which updates all children)
    self.root_animation.update(time_ms)
    
-    self.ts_1 = tasmota.millis()
+    self.ts_2 = tasmota.millis()
    # Skip rendering if no children
    if self.root_animation.is_empty()
      if self.render_needed
@ -244,7 +246,7 @@ class AnimationEngine
    # Clear main buffer
    self.frame_buffer.clear()
    
-    self.ts_2 = tasmota.millis()
+    # self.ts_2 = tasmota.millis()
    # Render root animation (which renders all children with blending)
    var rendered = self.root_animation.render(self.frame_buffer, time_ms)
    
@ -421,40 +423,25 @@ class AnimationEngine
      return
    end
    
-    # Calculate statistics
-    var expected_ticks = period_ms / 5  # Expected ticks at 5ms intervals
-    var missed_ticks = expected_ticks - self.tick_count
+    # # Calculate statistics
+    # var expected_ticks = period_ms / 5  # Expected ticks at 5ms intervals
+    # var missed_ticks = expected_ticks - self.tick_count
    
    # Calculate means from sums
    var mean_time = self.tick_time_sum / self.tick_count
    var mean_anim = self.anim_time_sum / self.tick_count
    var mean_hw = self.hw_time_sum / self.tick_count
+
+      var mean_phase1 = self.phase1_time_sum / self.tick_count
+      var mean_phase2 = self.phase2_time_sum / self.tick_count
+      var mean_phase3 = self.phase3_time_sum / self.tick_count
    
-    # Calculate CPU usage percentage
-    var cpu_percent = (self.tick_time_sum * 100) / period_ms
+    # # Calculate CPU usage percentage
+    # var cpu_percent = (self.tick_time_sum * 100) / period_ms
    
    # Format and log stats - split into animation calc vs hardware output
-    var stats_msg = f"AnimEngine: ticks={self.tick_count}/{int(expected_ticks)} missed={int(missed_ticks)} total={mean_time:.2f}ms({self.tick_time_min}-{self.tick_time_max}) anim={mean_anim:.2f}ms({self.anim_time_min}-{self.anim_time_max}) hw={mean_hw:.2f}ms({self.hw_time_min}-{self.hw_time_max}) cpu={cpu_percent:.1f}%"
+    var stats_msg = f"AnimEngine: ticks={self.tick_count} total={mean_time:.2f}ms({self.tick_time_min}-{self.tick_time_max}) events={mean_phase1:.2f}ms({self.phase1_time_min}-{self.phase1_time_max}) update={mean_phase2:.2f}ms({self.phase2_time_min}-{self.phase2_time_max}) anim={mean_anim:.2f}ms({self.anim_time_min}-{self.anim_time_max}) hw={mean_hw:.2f}ms({self.hw_time_min}-{self.hw_time_max})"
    tasmota.log(stats_msg, 3)  # Log level 3 (DEBUG)
-    
-    # Print intermediate phase metrics if available
-    if self.phase1_time_sum > 0
-      var mean_phase1 = self.phase1_time_sum / self.tick_count
-      var phase1_msg = f"  Phase1(checks): mean={mean_phase1:.2f}ms({self.phase1_time_min}-{self.phase1_time_max})"
-      tasmota.log(phase1_msg, 3)
-    end
-    
-    if self.phase2_time_sum > 0
-      var mean_phase2 = self.phase2_time_sum / self.tick_count
-      var phase2_msg = f"  Phase2(events): mean={mean_phase2:.2f}ms({self.phase2_time_min}-{self.phase2_time_max})"
-      tasmota.log(phase2_msg, 3)
-    end
-    
-    if self.phase3_time_sum > 0
-      var mean_phase3 = self.phase3_time_sum / self.tick_count
-      var phase3_msg = f"  Phase3(anim): mean={mean_phase3:.2f}ms({self.phase3_time_min}-{self.phase3_time_max})"
-      tasmota.log(phase3_msg, 3)
-    end
  end
  
  # Interrupt current animations
--- a/lib/libesp32/berry_animation/src/core/parameterized_object.be
+++ b/lib/libesp32/berry_animation/src/core/parameterized_object.be
@ -103,6 +103,9 @@ class ParameterizedObject
  # @param name: string - Parameter name being accessed
  # @return any - Resolved parameter value (ValueProvider resolved to actual value)
  def member(name)
+    # if global.debug_animation
+    #   log(f">>> member {name=}", 3)
+    # end
    # Check if it's a parameter (either set in values or defined in PARAMS)
    # Implement a fast-track if the value exists
    if self.values.contains(name)
--- a/lib/libesp32/berry_animation/src/providers/color_provider.be
+++ b/lib/libesp32/berry_animation/src/providers/color_provider.be
@ -17,8 +17,9 @@ class ColorProvider : animation.value_provider
  # LUT (Lookup Table) management for color providers
  # Subclasses can use this to cache pre-computed colors for performance
  # If a subclass doesn't use a LUT, this remains nil
-  var _color_lut       # Color lookup table cache (bytes() object or nil)
-  var _lut_dirty       # Flag indicating LUT needs rebuilding
+  var _color_lut            # Color lookup table cache (bytes() object or nil)
+  var _lut_dirty            # Flag indicating LUT needs rebuilding
+  static var LUT_FACTOR = 1 # Reduction factor for LUT compression
  
  # Parameter definitions
  static var PARAMS = animation.enc_params({
--- a/lib/libesp32/berry_animation/src/providers/rich_palette_color_provider.be
+++ b/lib/libesp32/berry_animation/src/providers/rich_palette_color_provider.be
@ -377,19 +377,21 @@ class RichPaletteColorProvider : animation.color_provider
    end
    
    # Pre-compute colors for values 0, 2, 4, ..., 254 at max brightness
+    var lut_factor = self.LUT_FACTOR    # multiplier
    var i = 0
-    while i < 128
-      var value = i * 2
+    var i_max = (256 >> lut_factor)
+    while i < i_max
+      var value = i << lut_factor
      var color = self._get_color_for_value_uncached(value, 0)
      
      # Store color using efficient bytes.set()
-      self._color_lut.set(i * 4, color, 4)
+      self._color_lut.set(i << 2, color, 4)
      i += 1
    end
    
    # Add final entry for value 255 at max brightness
    var color_255 = self._get_color_for_value_uncached(255, 0)
-    self._color_lut.set(128 * 4, color_255, 4)
+    self._color_lut.set(i_max << 2, color_255, 4)
    
    self._lut_dirty = false
  end
@ -459,7 +461,7 @@ class RichPaletteColorProvider : animation.color_provider
    # Map value to LUT index
    # For values 0-254: index = value / 2 (integer division)
    # For value 255: index = 128
-    var lut_index = value >> 1  # Divide by 2 using bit shift
+    var lut_index = value >> self.LUT_FACTOR  # Divide by 2 using bit shift
    if value >= 255
      lut_index = 128
    end
--- a/lib/libesp32/berry_animation/src/solidify/solidified_animation.h
+++ b/lib/libesp32/berry_animation/src/solidify/solidified_animation.h