pyth-network
diff --git a/‎program/c/makefile
Lines changed: 0 additions & 1 deletion b/‎program/c/makefile
Lines changed: 0 additions & 1 deletion
diff --git a/‎program/c/src/oracle/model/price_model.c
Lines changed: 73 additions & 81 deletions b/‎program/c/src/oracle/model/price_model.c
Lines changed: 73 additions & 81 deletions
diff --git a/‎program/c/src/oracle/model/price_model.h
Lines changed: 11 additions & 82 deletions b/‎program/c/src/oracle/model/price_model.h
Lines changed: 11 additions & 82 deletions
diff --git a/‎program/c/src/oracle/model/test_price_model.c
Lines changed: 29 additions & 3 deletions b/‎program/c/src/oracle/model/test_price_model.c
Lines changed: 29 additions & 3 deletions
@@ -41,7 +41,6 @@ cpyth-native: features.h
 test: features.h
 	mkdir -p $(OUT_DIR)/test/
 	gcc -c ./src/oracle/model/test_price_model.c -o $(OUT_DIR)/test/test_price_model.o -fPIC
-	gcc -c ./src/oracle/sort/test_sort_stable.c -o $(OUT_DIR)/test/test_sort_stable.o -fPIC
 	gcc -c ./src/oracle/util/test_align.c -o $(OUT_DIR)/test/test_align.o -fPIC
 	gcc -c ./src/oracle/util/test_avg.c -o $(OUT_DIR)/test/test_avg.o -fPIC
 	gcc -c ./src/oracle/util/test_hash.c -o $(OUT_DIR)/test/test_hash.o -fPIC
 
@@ -1,112 +1,104 @@
 #include "price_model.h"
 #include "../util/avg.h" /* For avg_2_int64 */
 
-#define SORT_NAME  int64_sort_ascending
-#define SORT_KEY_T int64_t
-#include "../sort/tmpl/sort_stable.c"
+/*
+ * In-place bottom-up Heapsort implementation optimized for minimal compute unit usage in BPF.
+ *
+ * Initially it creates a max heap in linear time and then to get ascending
+ * order it swaps the root with the last element and then sifts down the root.
+ *
+ * The number of comparisions in average case is nlgn + O(1) and in worst case is
+ * 1.5nlgn + O(n).
+ *
+ * There are a lot of (j-1) or (j+1) math in the code which can be optimized by
+ * thinking of a as 1-based array. Fortunately, BPF compiler optimizes that for us.
+ */
+void heapsort(int64_t * a, uint64_t n) {
+  if (n <= 1) return;
+
+  /*
+   * This is a bottom-up heapify which is linear in time.
+   */
+  for (uint64_t i = n / 2 - 1;; --i) {
+    int64_t root = a[i];
+    uint64_t j = i * 2 + 1;
+    while (j < n) {
+      if (j + 1 < n && a[j] < a[j + 1]) ++j;
+      if (root >= a[j]) break;
+      a[(j - 1) / 2] = a[j];
+      j = j * 2 + 1;
+    }
+    a[(j - 1) / 2] = root;
+
+    if (i == 0) break;
+  }
+
+  for (uint64_t i = n - 1; i > 0; --i) {
+    int64_t tmp = a[0];
+    a[0] = a[i];
+    a[i] = tmp;
+
+    int64_t root = a[0];
+    uint64_t j = 1;
+    while (j < i) {
+      if (j + 1 < i && a[j] < a[j + 1]) ++j;
+      if (root >= a[j]) break;
+      a[(j - 1) / 2] = a[j];
+      j = j * 2 + 1;
+    }
+    a[(j - 1) / 2] = root;
+  }
+}
 
+/*
+ * Find the 25, 50, and 75 percentiles of the given quotes using heapsort.
+ *
+ * This implementation optimizes the price_model_core function for minimal compute unit usage in BPF.
+ *
+ * In Solana, each BPF instruction costs 1 unit of compute and is much different than a native code
+ * execution time. Here are some of the differences:
+ * 1. There is no cache, so memory access is much more expensive.
+ * 2. The instruction set is very minimal, and there are only 10 registers available.
+ * 3. The BPF compiler is not very good at optimizing the code.
+ * 4. The stack size is limited and having extra stack frame has high overhead.
+ *
+ * This implementation is chosen among other implementations such as merge-sort, quick-sort, and quick-select
+ * because it is very fast, has small number of instructions, and has a very small memory footprint by being
+ * in-place and is non-recursive and has a nlogn worst-case time complexity.
+ */
 int64_t *
 price_model_core( uint64_t  cnt,
                   int64_t * quote,
                   int64_t * _p25,
                   int64_t * _p50,
-                  int64_t * _p75,
-                  void    * scratch ) {
-
-  /* Sort the quotes.  The sorting implementation used here is a highly
-     optimized mergesort (merge with an unrolled insertion sorting
-     network small n base cases).  The best case is ~0.5 n lg n compares
-     and the average and worst cases are ~n lg n compares.
-
-     While not completely data oblivious, this has quite low variance in
-     operation count practically and this is _better_ than quicksort's
-     average case and quicksort's worst case is a computational
-     denial-of-service and timing attack vulnerable O(n^2).  Unlike
-     quicksort, this is also stable (but this stability does not
-     currently matter ... it might be a factor in future models).
-
-     A data oblivious sorting network approach might be viable here with
-     and would have a completely deterministic operations count.  It
-     currently isn't used as the best known practical approaches for
-     general n have a worse algorithmic cost (O( n (lg n)^2 )) and,
-     while the application probably doesn't need perfect obliviousness,
-     mergesort is still moderately oblivious and the application can
-     benefit from mergesort's lower operations cost.  (The main drawback
-     of mergesort over quicksort is that it isn't in place, but memory
-     footprint isn't an issue here.)
-
-     Given the operations cost model (e.g. cache friendliness is not
-     incorporated), a radix sort might be viable here (O(n) in best /
-     average / worst).  It currently isn't used as we expect invocations
-     with small-ish n to be common and radix sort would be have large
-     coefficients on the O(n) and additional fixed overheads that would
-     make it more expensive than mergesort in this regime.
-
-     Note: price_model_cnt_valid( cnt ) implies
-     int64_sort_ascending_cnt_valid( cnt ) currently.
-
-     Note: consider filtering out "NaN" quotes (i.e. INT64_MIN)? */
-
-  int64_t * sort_quote = int64_sort_ascending_stable( quote, cnt, scratch );
-
-  /* Extract the p25
-
-     There are many variants with subtle tradeoffs here.  One option is
-     to interpolate when the ideal p25 is bracketed by two samples (akin
-     to the p50 interpolation above when the number of quotes is even).
-     That is, for p25, interpolate between quotes floor((cnt-2)/4) and
-     ceil((cnt-2)/4) with the weights determined by cnt mod 4.  The
-     current preference is to not do that as it is slightly more
-     complex, doesn't exactly always minimize the current loss function
-     and is more exposed to the confidence intervals getting skewed by
-     bum quotes with the number of quotes is small.
-
-     Another option is to use the inside quote of the above pair.  That
-     is, for p25, use quote ceil((cnt-2)/4) == floor((cnt+1)/4) ==
-     (cnt+1)>>2.  The current preference is not to do this as, though
-     this has stronger bum quote robustness, it results in p25==p50==p75
-     when cnt==3.  (In this case, the above wants to do an interpolation
-     between quotes 0 and 1 to for the p25 and between quotes 1 and 2
-     for the p75.  But limiting to just the inside quote results in
-     p25/p50/p75 all using the median quote.)
-
-     A tweak to this option, for p25, is to use floor(cnt/4) == cnt>>2.
-     This is simple, has the same asymptotic behavior for large cnt, has
-     good behavior in the cnt==3 case and practically as good bum quote
-     rejection in the moderate cnt case. */
+                  int64_t * _p75) {
+  heapsort(quote, cnt);
 
+  /* Extract the p25 */
   uint64_t p25_idx = cnt >> 2;
-
-  *_p25 = sort_quote[p25_idx];
+  *_p25 = quote[p25_idx];
 
   /* Extract the p50 */
-
   if( (cnt & (uint64_t)1) ) { /* Odd number of quotes */
-
     uint64_t p50_idx = cnt >> 1; /* ==ceil((cnt-1)/2) */
-
-    *_p50 = sort_quote[p50_idx];
-
+    *_p50 = quote[p50_idx];
   } else { /* Even number of quotes (at least 2) */
-
     uint64_t p50_idx_right = cnt >> 1;                    /* == ceil((cnt-1)/2)> 0 */
     uint64_t p50_idx_left  = p50_idx_right - (uint64_t)1; /* ==floor((cnt-1)/2)>=0 (no overflow/underflow) */
 
-    int64_t vl = sort_quote[p50_idx_left ];
-    int64_t vr = sort_quote[p50_idx_right];
+    int64_t vl = quote[p50_idx_left];
+    int64_t vr = quote[p50_idx_right];
 
     /* Compute the average of vl and vr (with floor / round toward
        negative infinity rounding and without possibility of
        intermediate overflow). */
-
     *_p50 = avg_2_int64( vl, vr );
   }
 
   /* Extract the p75 (this is the mirror image of the p25 case) */
 
   uint64_t p75_idx = cnt - ((uint64_t)1) - p25_idx;
+  *_p75 = quote[p75_idx];
 
-  *_p75 = sort_quote[p75_idx];
-
-  return sort_quote;
+  return quote;
 }
@@ -8,91 +8,20 @@
 extern "C" {
 #endif
 
-/* Returns the minimum and maximum number of quotes the implementation
-   can handle */
-
-static inline uint64_t
-price_model_quote_min( void ) {
-  return (uint64_t)1;
-}
-
-static inline uint64_t
-price_model_quote_max( void ) {
-  return (UINT64_MAX-(uint64_t)alignof(int64_t)+(uint64_t)1) / (uint64_t)sizeof(int64_t);
-}
-
-/* price_model_cnt_valid returns non-zero if cnt is a valid value or
-   zero if not. */
-
-static inline int
-price_model_cnt_valid( uint64_t cnt ) {
-  return price_model_quote_min()<=cnt && cnt<=price_model_quote_max();
-}
-
-/* price_model_scratch_footprint returns the number of bytes of scratch
-   space needed for an arbitrarily aligned scratch region required by
-   price_model to handle price_model_quote_min() to cnt quotes
-   inclusive. */
-
-static inline uint64_t
-price_model_scratch_footprint( uint64_t cnt ) { /* Assumes price_model_cnt_valid( cnt ) is true */
-  /* cnt int64_t's plus worst case alignment padding, no overflow
-     possible as cnt is valid at this point */
-  return cnt*(uint64_t)sizeof(int64_t)+(uint64_t)alignof(int64_t)-(uint64_t)1;
-}
-
-/* price_model_core minimizes (to quote precision in a floor / round
-   toward negative infinity sense) the loss model of the given quotes.
-   Assumes valid inputs (e.g. cnt is at least 1 and not unreasonably
-   large ... typically a multiple of 3 but this is not required,
-   quote[i] for i in [0,cnt) are the quotes of interest on input, p25,
-   p50, p75 point to where to write model outputs, scratch points to a
-   suitable footprint scratch region).
-
-   Returns a pointer to the quotes sorted in ascending order.  As such,
-   the min and max and any other rank statistic can be extracted easily
-   on return.  This location will either be quote itself or to a
-   location in scratch.  Use price_model below for a variant that always
-   replaces quote with the sorted quotes (potentially has extra ops for
-   copying).  Further, on return, *_p25, *_p50, *_p75 will hold the loss
-   model minimizing values for the input quotes and the scratch region
-   was clobbered.
-
-   Scratch points to a memory region of arbitrary alignment with at
-   least price_model_scratch_footprint( cnt ) bytes and it will be
-   clobbered on output.  It is sufficient to use a normally aligned /
-   normally allocated / normally declared array of cnt int64_t's.
-
-   The cost of this function is a fast and low variance (but not
-   completely data oblivious) O(cnt lg cnt) in the best / average /
-   worst cases.  This function uses no heap / dynamic memory allocation.
-   It is thread safe provided it passed non-conflicting quote, output
-   and scratch arrays.  It has a bounded call depth ~lg cnt <= ~64 (this
-   could reduce to O(1) by using a non-recursive sort/select
-   implementation under the hood if desired). */
-
-int64_t *                              /* Returns pointer to sorted quotes (either quote or ALIGN_UP(scratch,int64_t)) */
-price_model_core( uint64_t  cnt,       /* Assumes price_model_cnt_valid( cnt ) is true */
+/*
+ * This function computes the p25, p50 and p75 percentiles of the given quotes and
+ * writes them to the given pointers. It also returns the sorted quotes array. Being
+ * sorted is not necessary for this model to work, and is only relied upon by the
+ * tests to verify the correctness of the model with more confidence.
+ *
+ * The quote array might get modified by this function.
+ */
+int64_t *
+price_model_core( uint64_t  cnt,       /* Number of elements in quote */
                   int64_t * quote,     /* Assumes quote[i] for i in [0,cnt) is the i-th quote on input */
                   int64_t * _p25,      /* Assumes *_p25 is safe to write to the p25 model output */
                   int64_t * _p50,      /* Assumes *_p50 " */
-                  int64_t * _p75,      /* Assumes *_p75 " */
-                  void    * scratch ); /* Assumes a suitable scratch region */
-
-/* Same as the above but always returns quote and quote always holds the
-   sorted quotes on return. */
-
-static inline int64_t *
-price_model( uint64_t  cnt,
-             int64_t * quote,
-             int64_t * _p25,
-             int64_t * _p50,
-             int64_t * _p75,
-             void    * scratch ) {
-  int64_t * tmp = price_model_core( cnt, quote, _p25, _p50, _p75, scratch );
-  if( tmp!=quote ) for( uint64_t idx=(uint64_t)0; idx<cnt; idx++ ) quote[ idx ] = tmp[ idx ];
-  return quote;
-}
+                  int64_t * _p75);     /* Assumes *_p75 " */
 
 #ifdef __cplusplus
 }
 
@@ -19,13 +19,38 @@ int test_price_model() {
   prng_t _prng[1];
   prng_t * prng = prng_join( prng_new( _prng, (uint32_t)0, (uint64_t)0 ) );
 
-# define N 96
+# define N 192
 
   int64_t quote0 [N];
   int64_t quote  [N];
   int64_t val    [3];
-  int64_t scratch[N];
 
+  /* Brute force validate small sizes via the 0-1 principle. */
+  for( int cnt=0; cnt<=24; cnt++ ) {
+    for( long mask=0L; mask<(1L<<cnt); mask++ ) {
+      for( int i=0; i<cnt; i++ ) quote0[i] = (int64_t) ((mask>>i) & 1L);
+
+      memcpy( quote, quote0, sizeof(int64_t)*(size_t)cnt );
+      if( price_model_core( cnt, quote, val+0, val+1, val+2)!=quote ) { printf( "FAIL (01-compose)\n" ); return 1; }
+
+      /* Validate the results */
+
+      /* Although being sorted is not necessary it gives us more confidence about the correctness of the model */
+      qsort( quote0, (size_t)cnt, sizeof(int64_t), qcmp );
+      if( memcmp( quote, quote0, sizeof(int64_t)*(size_t)cnt ) ) { printf( "FAIL (01-sort)\n" ); return 1; }
+
+      uint64_t p25_idx = cnt>>2;
+      uint64_t p50_idx = cnt>>1;
+      uint64_t p75_idx = cnt - (uint64_t)1 - p25_idx;
+      uint64_t is_even = (uint64_t)!(cnt & (uint64_t)1);
+
+      if( val[0]!=quote[ p25_idx ] ) { printf( "FAIL (01-p25)\n" ); return 1; }
+      if( val[1]!=avg_2_int64( quote[ p50_idx-is_even ], quote[ p50_idx ] ) ) { printf( "FAIL (01-p50)\n" ); return 1; }
+      if( val[2]!=quote[ p75_idx ] ) { printf( "FAIL (01-p75)\n" ); return 1; }
+    }
+  }
+
+  /* Test using randomized inputs */
   for( int iter=0; iter<10000000; iter++ ) {
 
     /* Generate a random test */
@@ -36,10 +61,11 @@ int test_price_model() {
     /* Apply the model */
 
     memcpy( quote, quote0, sizeof(int64_t)*(size_t)cnt );
-    if( price_model( cnt, quote, val+0, val+1, val+2, scratch )!=quote ) { printf( "FAIL (compose)\n" ); return 1; }
+    if( price_model_core( cnt, quote, val+0, val+1, val+2)!=quote ) { printf( "FAIL (compose)\n" ); return 1; }
 
     /* Validate the results */
 
+    /* Although being sorted is not necessary it gives us more confidence about the correctness of the model */
     qsort( quote0, (size_t)cnt, sizeof(int64_t), qcmp );
     if( memcmp( quote, quote0, sizeof(int64_t)*(size_t)cnt ) ) { printf( "FAIL (sort)\n" ); return 1; }