Opsmas 2025 Day 7: kvidxkit and (&) m

Opsmas 2025 Day 7: (kv)(idx)(kit) & (m)

TOC:

kvidxkit

I like writing specific interfaces on top of more general systems. Encapsulation is king. Especially when it involves things like pre-validating or pre-restricting data input/output formats, setting up interfaces or libarires to work “the right way” transparently throughout user interfaces, and even guarding against experienced failure cases by baking in proper edge case handling behind the scenes.

Here we have kvidxkit which is a pluggable storage and access abstraction wrapper on top of sqlite, lmdb, or even rocksdb.

What good is a pluggable wrapper here?

Using the sqlite3 backend as an example, kvidxkit bakes in pre-compiled queries for the common operations with optimal usage behind the scenes. Sure, these are simple queries, but it’s easier to use repeated patterns through an encapsulated and isolated programmatic interface instead of spraying sql and error checking in three dozen places all over your app:

Then you just use a consistent interface regardless of the backend (could actually be lmdb or anything else with adapters for the interface protocol spec):

it’s not rocket surgery, but it’s simple and well-encapsulated and covers up edge cases, performance optimizations, consistent error checking and error reporting all behind the scenes automatically (instead of trying to remember to do each of those consistently every place you use the underlying DB apis in your apps directly).

kvidxkit also includes my weird take on a sql-generator for C including structured table definitions with auto-generated SQL and even auto-genearted queries where you define your table structure logically then it can be created automatically at runtime:

rando exampos:

User Management System

Generated SQL:

E-Commerce Order System

/* Products table */
static const kvidxColDef productCols[] = {
    COL("id", KVIDX_COL_PK_AUTO),
    COL("sku", KVIDX_COL_TEXT | KVIDX_COL_NOT_NULL | KVIDX_COL_UNIQUE),
    COL("name", KVIDX_COL_TEXT | KVIDX_COL_NOT_NULL),
    COL("description", KVIDX_COL_TEXT),
    COL("price", KVIDX_COL_REAL | KVIDX_COL_NOT_NULL),
    COL_DEFAULT_INT("stock", KVIDX_COL_INTEGER | KVIDX_COL_NOT_NULL, 0),
    COL_DEFAULT_INT("active", KVIDX_COL_INTEGER | KVIDX_COL_NOT_NULL, 1),
};

/* Orders table */
static const kvidxColDef orderCols[] = {
    COL("id", KVIDX_COL_PK_AUTO),
    COL_FK("user_id", KVIDX_COL_FK | KVIDX_COL_NOT_NULL, "users"),
    COL_DEFAULT_TEXT("status", KVIDX_COL_TEXT | KVIDX_COL_NOT_NULL, "pending"),
    COL("total", KVIDX_COL_REAL | KVIDX_COL_NOT_NULL),
    COL_DEFAULT_EXPR("ordered_at", KVIDX_COL_TEXT | KVIDX_COL_NOT_NULL, "CURRENT_TIMESTAMP"),
    COL_DEFAULT_NULL("shipped_at", KVIDX_COL_TEXT),
    COL_DEFAULT_NULL("delivered_at", KVIDX_COL_TEXT),
};

static const kvidxIndexDef orderIndexes[] = {
    INDEX("user_id"),
    INDEX("status"),
    INDEX("ordered_at"),
    INDEX("user_id", "status"),
};

/* Order items (line items) */
static const kvidxColDef orderItemCols[] = {
    COL("id", KVIDX_COL_PK_AUTO),
    COL_FK("order_id", KVIDX_COL_FK_CASCADE | KVIDX_COL_NOT_NULL, "orders"),
    COL_FK("product_id", KVIDX_COL_FK | KVIDX_COL_NOT_NULL, "products"),
    COL("quantity", KVIDX_COL_INTEGER | KVIDX_COL_NOT_NULL),
    COL("unit_price", KVIDX_COL_REAL | KVIDX_COL_NOT_NULL),
};

static const kvidxIndexDef orderItemIndexes[] = {
    INDEX("order_id"),
    INDEX("product_id"),
    INDEX_UNIQUE("order_id", "product_id"),  /* One line item per product per order */
};

Multi-Tenant SaaS Application

/* Tenants */
static const kvidxColDef tenantCols[] = {
    COL("id", KVIDX_COL_PK_AUTO),
    COL("name", KVIDX_COL_TEXT | KVIDX_COL_NOT_NULL),
    COL("slug", KVIDX_COL_TEXT | KVIDX_COL_NOT_NULL | KVIDX_COL_UNIQUE),
    COL_DEFAULT_TEXT("plan", KVIDX_COL_TEXT | KVIDX_COL_NOT_NULL, "free"),
    COL_DEFAULT_INT("active", KVIDX_COL_INTEGER | KVIDX_COL_NOT_NULL, 1),
    COL_DEFAULT_EXPR("created_at", KVIDX_COL_TEXT, "CURRENT_TIMESTAMP"),
};

/* Tenant users (with deferred FK for circular references) */
static const kvidxColDef tenantUserCols[] = {
    COL("id", KVIDX_COL_PK_AUTO),
    COL_FK("tenant_id", KVIDX_COL_FK_CASCADE | KVIDX_COL_NOT_NULL, "tenants"),
    COL("email", KVIDX_COL_TEXT | KVIDX_COL_NOT_NULL),
    COL_DEFAULT_TEXT("role", KVIDX_COL_TEXT | KVIDX_COL_NOT_NULL, "member"),
    COL_DEFAULT_INT("active", KVIDX_COL_INTEGER | KVIDX_COL_NOT_NULL, 1),
};

static const kvidxIndexDef tenantUserIndexes[] = {
    INDEX("tenant_id"),
    INDEX_UNIQUE("tenant_id", "email"),  /* Email unique per tenant */
    INDEX("tenant_id", "role"),
};

/* Tenant resources */
static const kvidxColDef resourceCols[] = {
    COL("id", KVIDX_COL_PK_AUTO),
    COL_FK("tenant_id", KVIDX_COL_FK_CASCADE | KVIDX_COL_NOT_NULL, "tenants"),
    COL("type", KVIDX_COL_TEXT | KVIDX_COL_NOT_NULL),
    COL("name", KVIDX_COL_TEXT | KVIDX_COL_NOT_NULL),
    COL("data", KVIDX_COL_BLOB),
    COL_DEFAULT_EXPR("created_at", KVIDX_COL_TEXT, "CURRENT_TIMESTAMP"),
    COL_DEFAULT_EXPR("updated_at", KVIDX_COL_TEXT, "CURRENT_TIMESTAMP"),
};

static const kvidxIndexDef resourceIndexes[] = {
    INDEX("tenant_id"),
    INDEX("tenant_id", "type"),
    INDEX_UNIQUE("tenant_id", "type", "name"),  /* Unique name per type per tenant */
};

More Code Example

#include "kvidxkitTableDesc.h"
#include "kvidxkitSchema.h"
#include <sqlite3.h>
#include <stdio.h>

/* Define all columns */
static const kvidxColDef userCols[] = {
    COL("id", KVIDX_COL_PK_AUTO),
    COL("username", KVIDX_COL_TEXT | KVIDX_COL_NOT_NULL | KVIDX_COL_UNIQUE),
    COL("email", KVIDX_COL_TEXT | KVIDX_COL_NOT_NULL | KVIDX_COL_UNIQUE),
    COL_DEFAULT_INT("active", KVIDX_COL_INTEGER | KVIDX_COL_NOT_NULL, 1),
    COL_DEFAULT_EXPR("created_at", KVIDX_COL_TEXT, "CURRENT_TIMESTAMP"),
};

static const kvidxColDef postCols[] = {
    COL("id", KVIDX_COL_PK_AUTO),
    COL_FK("user_id", KVIDX_COL_FK_CASCADE | KVIDX_COL_NOT_NULL, "users"),
    COL("title", KVIDX_COL_TEXT | KVIDX_COL_NOT_NULL),
    COL("body", KVIDX_COL_TEXT),
    COL_DEFAULT_INT("published", KVIDX_COL_INTEGER | KVIDX_COL_NOT_NULL, 0),
    COL_DEFAULT_EXPR("created_at", KVIDX_COL_TEXT, "CURRENT_TIMESTAMP"),
};

/* Define all indexes */
static const kvidxIndexDef userIndexes[] = {
    INDEX_UNIQUE("username"),
    INDEX_UNIQUE("email"),
    INDEX("active"),
};

static const kvidxIndexDef postIndexes[] = {
    INDEX("user_id"),
    INDEX("published"),
    INDEX("user_id", "published"),
    INDEX("created_at"),
};

/* Define all tables */
static const kvidxTableDef tables[] = {
    {
        .name = "users",
        .columns = userCols,
        .colCount = sizeof(userCols) / sizeof(*userCols),
        .indexes = userIndexes,
        .indexCount = sizeof(userIndexes) / sizeof(*userIndexes),
    },
    {
        .name = "posts",
        .columns = postCols,
        .colCount = sizeof(postCols) / sizeof(*postCols),
        .indexes = postIndexes,
        .indexCount = sizeof(postIndexes) / sizeof(*postIndexes),
    },
};

int main(void) {
    sqlite3 *db;

    /* Open database */
    if (sqlite3_open("blog.db", &db) != SQLITE_OK) {
        fprintf(stderr, "Failed to open database\n");
        return 1;
    }

    /* Enable foreign keys */
    sqlite3_exec(db, "PRAGMA foreign_keys = ON;", NULL, NULL, NULL);

    /* Create all tables */
    kvidxError err = kvidxSchemaCreateTables(db, tables,
        sizeof(tables) / sizeof(*tables));

    if (err != KVIDX_OK) {
        fprintf(stderr, "Failed to create tables\n");
        sqlite3_close(db);
        return 1;
    }

    /* Generate and print INSERT statement for users */
    char buf[1024];
    kvidxGenInsert(&tables[0], buf, sizeof(buf));
    printf("Users INSERT: %s\n", buf);

    /* Generate UPDATE statement */
    kvidxGenUpdateById(&tables[0], buf, sizeof(buf));
    printf("Users UPDATE: %s\n", buf);

    /* Use the generated statements with sqlite3_prepare_v2... */

    sqlite3_close(db);
    return 0;
}

WITHOUT ROWID Table

For key-value stores or tables where you don’t need SQLite’s implicit rowid.

Basic Table

Table with Indexes

tests n stuff

      Start  1: kvidxkit-basic-tests
      Start  2: kvidxkit-comprehensive-tests
      Start  3: kvidxkit-error-handling-tests
      Start  4: kvidxkit-batch-operation-tests
      Start  5: kvidxkit-iterator-tests
      Start  6: kvidxkit-statistics-tests
      Start  7: kvidxkit-configuration-tests
      Start  8: kvidxkit-range-operation-tests
 1/14 Test  #3: kvidxkit-error-handling-tests ....   Passed    0.56 sec
      Start  9: kvidxkit-export-import-tests
 2/14 Test  #4: kvidxkit-batch-operation-tests ...   Passed    1.19 sec
      Start 10: kvidxkit-tabledesc-tests
 3/14 Test  #2: kvidxkit-comprehensive-tests .....   Passed    1.66 sec
      Start 11: kvidxkit-primitives-tests
 4/14 Test  #1: kvidxkit-basic-tests .............   Passed    2.13 sec
      Start 12: kvidxkit-lmdb-adapter-tests
 5/14 Test  #5: kvidxkit-iterator-tests ..........   Passed    2.67 sec
      Start 13: kvidxkit-rocksdb-adapter-tests
 6/14 Test  #6: kvidxkit-statistics-tests ........   Passed    3.38 sec
      Start 14: kvidxkit-fuzzer-tests
 7/14 Test  #7: kvidxkit-configuration-tests .....   Passed    3.76 sec
 8/14 Test  #8: kvidxkit-range-operation-tests ...   Passed    4.42 sec
 9/14 Test  #9: kvidxkit-export-import-tests .....   Passed    4.28 sec
10/14 Test #10: kvidxkit-tabledesc-tests .........   Passed    3.81 sec
11/14 Test #12: kvidxkit-lmdb-adapter-tests ......   Passed    4.28 sec
12/14 Test #11: kvidxkit-primitives-tests ........   Passed    5.94 sec
13/14 Test #13: kvidxkit-rocksdb-adapter-tests ...   Passed    5.23 sec
14/14 Test #14: kvidxkit-fuzzer-tests ............   Passed  296.72 sec

100% tests passed, 0 tests failed out of 14

Total Test time (real) = 300.11 sec

benchy and stuff

╔══════════════════════════════════════════════════════════════════════════════╗
║                kvidxkit Performance Benchmark Framework v1.0
╠══════════════════════════════════════════════════════════════════════════════╣
Operations: 100000  |  Adapters: 3  |  Data size: 64 bytes          ║
╚══════════════════════════════════════════════════════════════════════════════╝

════════════════════════════════════════════════════════════════════════════════
  Benchmarking: SQLite3
════════════════════════════════════════════════════════════════════════════════
  [1/10] Sequential Insert...
  [2/10] Sequential Read...
  [3/10] Random Insert...
  [4/10] Random Read...
  [5/10] Mixed Workload (80/20)...
  [6/10] Batch Insert...
  [7/10] Range Count Query...
  [8/10] Iterator Scan...
  [9/10] Large Data (4KB blobs)...
  [10/10] Delete...
  Done.

════════════════════════════════════════════════════════════════════════════════
  Benchmarking: LMDB
════════════════════════════════════════════════════════════════════════════════
  [1/10] Sequential Insert...
  [2/10] Sequential Read...
  [3/10] Random Insert...
  [4/10] Random Read...
  [5/10] Mixed Workload (80/20)...
  [6/10] Batch Insert...
  [7/10] Range Count Query...
  [8/10] Iterator Scan...
  [9/10] Large Data (4KB blobs)...
  [10/10] Delete...
  Done.

════════════════════════════════════════════════════════════════════════════════
  Benchmarking: RocksDB
════════════════════════════════════════════════════════════════════════════════
  [1/10] Sequential Insert...
  [2/10] Sequential Read...
  [3/10] Random Insert...
  [4/10] Random Read...
  [5/10] Mixed Workload (80/20)...
  [6/10] Batch Insert...
  [7/10] Range Count Query...
  [8/10] Iterator Scan...
  [9/10] Large Data (4KB blobs)...
  [10/10] Delete...
  Done.

╔══════════════════════════════════════════════════════════════════════════════╗
BENCHMARK RESULTS (ops/sec)                          ║
╚══════════════════════════════════════════════════════════════════════════════╝

BenchmarkSQLite3LMDBRocksDB               
─────────────────────┼───────────────────────┼───────────────────────┼───────────────────────
Sequential Insert980.16K803.71K90.30K
Sequential Read907.19K994.53K180.38K
Random Insert433.16K550.39K47.32K
Random Read697.19K747.10K172.45K
Mixed 80/20 R/W807.41K166.52K81.01K
Batch Insert869.35K179.78K64.78K
Range Count Query3111.38K149.45K
Iterator Scan867.49K939.16K119.46K
Large Data (4KB)     │              49.31K85.62K46.48K
Delete1.18M745.06K114.94K

╔══════════════════════════════════════════════════════════════════════════════╗
THROUGHPUT RESULTS (MB/s)                           ║
╚══════════════════════════════════════════════════════════════════════════════╝

BenchmarkSQLite3LMDBRocksDB               
─────────────────────┼───────────────────────┼───────────────────────┼───────────────────────
Sequential Insert59.8249.055.51
Sequential Read55.3760.7011.01
Random Insert26.4433.592.89
Random Read42.5545.6010.53
Mixed 80/20 R/W49.2810.164.94
Batch Insert53.0610.973.95
Iterator Scan52.9557.327.29
Large Data (4KB)     │              192.64334.47181.58

╔══════════════════════════════════════════════════════════════════════════════╗
LATENCY RESULTS (microseconds)                        ║
╚══════════════════════════════════════════════════════════════════════════════╝

BenchmarkSQLite3LMDBRocksDB               
─────────────────────┼───────────────────────┼───────────────────────┼───────────────────────
Sequential Insert1.021.2411.07
Sequential Read1.101.015.54
Random Insert2.311.8221.13
Random Read1.431.345.80
Mixed 80/20 R/W1.246.0112.34
Batch Insert1.155.5615.44
Range Count Query3212.83724.786.69
Iterator Scan1.151.068.37
Large Data (4KB)     │               20.2811.6821.51
Delete0.851.348.70

╔══════════════════════════════════════════════════════════════════════════════╗
WINNER REPORT - Best Adapter by Operation
╠══════════════════════════════════════════════════════════════════════════════╣
║            (~) = within 10% of leader, considered statistical tie            ║
╚══════════════════════════════════════════════════════════════════════════════╝

OperationWinnerPerformanceRanking (slowdown factor)
─────────────────────┼──────────────┼──────────────┼──────────────────────────────────────
Sequential InsertSQLite3980.16K/s │ SQLite3 > LMDB(1.2x) > RocksDB(10.9x)
Sequential Read~TIE~994.53K/s │ LMDB ~ SQLite3 > RocksDB(5.5x)
Random InsertLMDB550.39K/s │ LMDB > SQLite3(1.3x) > RocksDB(11.6x)
Random Read~TIE~747.10K/s │ LMDB ~ SQLite3 > RocksDB(4.3x)
Mixed 80/20 R/WSQLite3807.41K/s │ SQLite3 > LMDB(4.8x) > RocksDB(10.0x)
Batch InsertSQLite3869.35K/s │ SQLite3 > LMDB(4.8x) > RocksDB(13.4x)
Range Count QueryRocksDB149.45K/s │ RocksDB > LMDB(108.3x) > SQLite3(480.2x)
Iterator Scan~TIE~939.16K/s │ LMDB ~ SQLite3 > RocksDB(7.9x)
Large Data (4KB)     │ LMDB85.62K/s │ LMDB > SQLite3(1.7x) > RocksDB(1.8x)
DeleteSQLite31.18M/s │ SQLite3 > LMDB(1.6x) > RocksDB(10.3x)

╔══════════════════════════════════════════════════════════════════════════════╗
OVERALL SCORE BY ADAPTER
╠══════════════════════════════════════════════════════════════════════════════╣
Clear wins = 1 point, Tied wins = 0.5 points each               ║
╚══════════════════════════════════════════════════════════════════════════════╝

  SQLite3    █████▌░░░░ 5.5 pts (4 clear + 3 tied) <- OVERALL WINNER
  LMDB       ███▌░░░░░░ 3.5 pts (2 clear + 3 tied)
  RocksDB    █░░░░░░░░░ 1.0 pts (1 clear)

╔══════════════════════════════════════════════════════════════════════════════╗
BENCHMARK COMPLETE
╚══════════════════════════════════════════════════════════════════════════════╝

stats

of course, this key-term-value index compliance safe-committed storage system was designed to be useful in real world scenarios where you need guaranteed trustworthy term-key-value logging which we will get to… probably tomorrow.

m

mixtures of experts are so hot right now.

so anyway, i started blasting, bang bang and i banged out a mixture of expert mixing system earlier this year

it just takes numbers from the past and tries to predict numbers from the future. no big deal. there’s pictures and stuff in the repo of it predicting/completing geometric shapes with various perturberances applied in various dimensional spaces.

i can’t imagine why anybody would care about high dimensional number predicting or anything.

stats