Serene Runtime 1.0.0
C runtime for the Serene programming language
Loading...
Searching...
No Matches
seq.h
Go to the documentation of this file.
1/* -*- C -*-
2 * Serene programming language
3 * Copyright (C) 2019-2026 Sameer Rahmani <[email protected]>
4 *
5 * This library is free software: you can redistribute it and/or modify
6 * it under the terms of the GNU Lesser General Public License as published by
7 * the Free Software Foundation, either version 3 of the License, or
8 * (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU Lesser General Public License for more details.
14 *
15 * You should have received a copy of the GNU Lesser General Public License
16 * along with this library. If not, see <https://www.gnu.org/licenses/>.
17 */
18
19#pragma once
20
21/// @file
22/// This is an implementation of bit - partitioned, persistent, immutable
23/// sequence For more information, have a look at `Ideal hash trees` paper.
24///
25/// TL;DR:
26/// - Every struct is immutable (from user perspective).
27/// - Nodes are persistent and immutable.
28/// - Nodes are heap allocated
29/// - Heads can be stack allocated
30/// - We create a new head for any change
31/// - Heads contain a tail buffer that will be converted to a leaf node
32/// when they are full
33/// - This data structure is NOT effecient for extracting sub sequences
34/// - This data structure is effecient for tail operations,
35/// - If you need a subset of the elments in the sequence, create a list
36/// view out of them instead of extracting them as a new seq.
37/// - Deleting operation is O(N)
38///
39/// TODO:
40/// - Create map and filter function or a generic fold
41/// - Create a helper function to help creating list view from the seq
42
43// clang-format off
44/* AI Generated (🀦)
45 ------------------------------------------------------------------------------
46 Example (readable scale): B = 4, SHIFT = 2
47 -------------------------------------------------------------------------------
48 seq state:
49 len = 11
50 tail_len = 3 // last 3 elements live in tail
51 depth = 1 // one internal level above leaves
52 tree = elements [0..7] // first 8 elements in the tree
53 tail = elements [8, 9, 10]
54
55 High-level view
56 β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€ Tree (0..7) ───────────────────────┐
57 β”‚ seq_t β”‚ β”‚ β”‚
58 β”‚ len = 11 β”‚ β”‚ (depth = 1, B = 4, SHIFT = 2) β”‚
59 β”‚ tail_len=3 β”‚ β”‚ β”‚
60 β”‚ depth = 1 β”‚ β”‚ Root β”‚
61 β”‚ root ──────┼──────────►│ child[0] ─┐ child[1] ─┐ child[2] child[3] β”‚
62 β”‚ tail ──┐ β”‚ β”‚ β–Ό β–Ό (NULL) (NULL) β”‚
63 β””β”€β”€β”€β”€β”€β”€β”€β”€β”΄β”€β”€β”€β”˜ β”‚ Node0 Node1 β”‚
64 β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β” β”Œβ”€β”€β”€β”€β”€β”€β”€β” β”‚
65 β–Ό β”‚ β”‚c[0] β”‚ β”‚c[0] β”‚ β”‚
66 Tail (8..10) β”‚ β”‚c[1] β”‚ β”‚c[1] β”‚ β”‚
67 [ 8 | 9 | 10 ] β”‚ β”‚c[2] β”‚ β”‚c[2]──► Leaf(4..7) β”‚
68 β”‚ β”‚c[3] β”‚ β”‚c[3] β”‚ β”‚
69 β”‚ β””β”€β”€β”¬β”€β”€β”€β”€β”˜ β””β”€β”€β”€β”€β”€β”€β”€β”˜ β”‚
70 β”‚ β”‚ β”Œβ”€β”€β”€β”€β”€β”€β”€β”
71 β”‚ └────────────► Leaf(0..3) β”‚4 5 6 7β”‚
72 β”‚ β”‚^ β”‚
73 β”‚ Leaves hold up to B elements β””β”€β”€β”€β”€β”€β”€β”€β”˜
74 β””β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”€β”˜
75
76 Concrete leaf contents
77 Leaf(0..3): [0, 1, 2, 3]
78 Leaf(4..7): [4, 5, 6, 7]
79 Tail(8..10): [8, 9, 10]
80
81 Bit-partitioned indexing (tree part)
82 For an index n in [0..7], with SHIFT=2:
83 hi = (n >> 2) & 0b11 // selects Root child[hi]
84 lo = n & 0b11 // selects leaf slot within that child’s leaf
85
86 Example: n = 6 (binary 0b0110)
87 hi = 0b01 = 1 β†’ Root.child[1] β†’ Node1
88 lo = 0b10 = 2 β†’ Node1.child[2] β†’ Leaf(4..7)[2] = 6
89
90 Tail indexing
91 For n in [8..10], use tail[n - (len - tail_len)]:
92 n = 9 β†’ tail[9 - (11 - 3)] = tail[1] = 9
93
94 Notes
95 β€’ Only the right spine and tail change on push.
96 β€’ When tail_len hits B (here 4), the tail is promoted to a new leaf and
97 spliced into the tree (cloning nodes along the modified path).
98──────────────────────────────────────────────────────────────────────────────*/
99// clang-format on
100
101#include <stddef.h>
102#include <stdint.h>
103
104#include "serene/rt/errors.h"
105
106typedef struct srn_context_t srn_context_t;
107
108// -----------------------------------------------------------------------------
109// Tunables
110// -----------------------------------------------------------------------------
111/// branching factor (power of two)
112#define SEQ_BR 32u
113
114/// log2(SEQ_BR)
115#define SEQ_SHIFT 5u
116#define SEQ_MASK (SEQ_BR - 1u)
117
118/// logical length. While techically this implementation will support up to
119/// 2^85 elements in each seq, but we will limit it down to
120/// (2^64-1)(UINT64_MAX)(on 64bit machines) in order to keep the `seq_t` as
121/// small as possible. 12 is the magic number that we use to limit the depth
122/// to. 32^11 = 1,152,921,504,606,846,976 elements
123#define SEQ_MAX_DEPTH 11
124
125/// We use generic pointers to refer to internal nodes, leaf nodes and even
126/// elements. It makes the calculation cruical to determining what type of
127/// data we are looking at, in each node. `seq_elem_t` will be pointing to
128/// actual user data when the node is a leaf node (depth == 0) and it will be
129/// pointing to the next node in the trie if the node is an inner node (depth
130/// != 0).
131typedef void *seq_elem_t;
132
137
138// -----------------------------------------------------------------------------
139// Node
140// -----------------------------------------------------------------------------
141/// We have two type of node that both are implemented using the same data
142/// structure. Inner nodes that point to other inner nodes or leaf nodes, and
143/// leaf nodes which points to actual elements of the sequence.
144///
145/// The main factor in determining the nature of the node is the depth of the
146/// trie. Depth zero, means a leaf node and an inner node otherwise.
147typedef struct seq_node_t {
148 /// We allocate `children` to be a buffer of SEQ_BR number of pointers
151
152// -----------------------------------------------------------------------------
153// Seq
154// -----------------------------------------------------------------------------
155typedef struct seq_t {
157 /// logical length. While techically this implementation will support up to
158 /// 2^85 elements in each seq, but we will limit it down to
159 /// (2^64-1)(UINT64_MAX)(on 64bit machines) in order to keep the `seq_t` as
160 /// small as possible
161 size_t len;
162 /// 0..SEQ_BR
163 uint16_t tail_len;
164 /// tree depth in levels (0 == leaf level)
165 uint8_t depth;
166 /// NULL means β€œall data is in tail”
168 /// small tail array for fast push/pop. We allocate this in heap with SEQ_BR
169 /// size and move it later to the inner Nodes.
172
173// -----------------------------------------------------------------------------
174// Public API
175// -----------------------------------------------------------------------------
176[[nodiscard]] [[gnu::nonnull(1)]] seq_t seq_empty(const srn_context_t *ctx);
177[[nodiscard]] [[gnu::nonnull(1, 2)]] seq_t
178seq_push(const srn_context_t *ctx, const seq_t *seq, seq_elem_t x);
179
180[[nodiscard]] [[gnu::nonnull(1, 2)]] seq_lookup_result_t
181seq_get(const srn_context_t *ctx, const seq_t *seq, size_t n);
int n
Definition acutest.h:538
seq_t seq_push(const srn_context_t *ctx, const seq_t *seq, seq_elem_t x)
Definition seq.c:87
seq_t seq_empty(const srn_context_t *ctx)
Definition seq.c:76
seq_lookup_result_t seq_get(const srn_context_t *ctx, const seq_t *seq, size_t n)
Negative index is not supported.
Definition seq.c:173
void * seq_elem_t
We use generic pointers to refer to internal nodes, leaf nodes and even elements.
Definition seq.h:131
seq_elem_t data
Definition seq.h:135
We have two type of node that both are implemented using the same data structure.
Definition seq.h:147
seq_elem_t * children
We allocate children to be a buffer of SEQ_BR number of pointers.
Definition seq.h:149
Definition seq.h:155
size_t len
logical length.
Definition seq.h:161
seq_node_t * root
NULL means β€œall data is in tail”
Definition seq.h:167
seq_elem_t * tail
small tail array for fast push/pop.
Definition seq.h:170
ERROR_HEADER
Definition seq.h:156
uint8_t depth
tree depth in levels (0 == leaf level)
Definition seq.h:165
uint16_t tail_len
0..SEQ_BR
Definition seq.h:163