248 lines
7.9 KiB
C
248 lines
7.9 KiB
C
/*
|
|
* Copyright (C) 2021 Icecream95
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a
|
|
* copy of this software and associated documentation files (the "Software"),
|
|
* to deal in the Software without restriction, including without limitation
|
|
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
|
* and/or sell copies of the Software, and to permit persons to whom the
|
|
* Software is furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice (including the next
|
|
* paragraph) shall be included in all copies or substantial portions of the
|
|
* Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
* SOFTWARE.
|
|
*/
|
|
|
|
/* A nodearray is an array type that is either sparse or dense, depending on
|
|
* the number of elements.
|
|
*
|
|
* When the number of elements is over a threshold (max_sparse), the dense mode
|
|
* is used, and the nodearray is simply a container for an array.
|
|
*
|
|
* In sparse mode, the array has elements with a 24-bit node index and a value.
|
|
* The nodes are always sorted, so that a binary search can be used to find
|
|
* elements. Nonexistent elements are treated as zero.
|
|
*
|
|
* Function names follow ARM instruction names: orr does *elem |= value.
|
|
*
|
|
* Although it's probably already fast enough, the datastructure could be sped
|
|
* up a lot, especially when NEON is available, by making the sparse mode store
|
|
* sixteen adjacent values, so that adding new keys also allocates nearby keys,
|
|
* and to allow for vectorising iteration, as can be done when in the dense
|
|
* mode.
|
|
*/
|
|
|
|
#ifndef __BIFROST_NODEARRAY_H
|
|
#define __BIFROST_NODEARRAY_H
|
|
|
|
#include <stdint.h>
|
|
|
|
#ifdef __cplusplus
|
|
extern "C" {
|
|
#endif
|
|
|
|
/* A value that may be stored in a nodearray element, used directly for dense
|
|
* elements and included into sparse elements.
|
|
*/
|
|
typedef uint16_t nodearray_value;
|
|
|
|
#define NODEARRAY_MAX_VALUE 0xffff
|
|
|
|
/* Type storing sparse nodearray elements, consisting of a nodearray_value at
|
|
* the bottom and a nodearray_key at the top.
|
|
*/
|
|
typedef uint64_t nodearray_sparse;
|
|
|
|
typedef struct {
|
|
union {
|
|
nodearray_sparse *sparse;
|
|
nodearray_value *dense;
|
|
};
|
|
unsigned size;
|
|
unsigned sparse_capacity;
|
|
} nodearray;
|
|
|
|
/* Align sizes to 16-bytes for SIMD purposes */
|
|
#define NODEARRAY_DENSE_ALIGN(x) ALIGN_POT(x, 16)
|
|
|
|
#define nodearray_sparse_foreach(buf, elem) \
|
|
for (nodearray_sparse *elem = (buf)->sparse; \
|
|
elem < (buf)->sparse + (buf)->size; elem++)
|
|
|
|
#define nodearray_dense_foreach(buf, elem) \
|
|
for (nodearray_value *elem = (buf)->dense; \
|
|
elem < (buf)->dense + (buf)->size; elem++)
|
|
|
|
#define nodearray_dense_foreach_64(buf, elem) \
|
|
for (uint64_t *elem = (uint64_t *)(buf)->dense; \
|
|
(nodearray_value *)elem < (buf)->dense + (buf)->size; elem++)
|
|
|
|
static inline bool
|
|
nodearray_is_sparse(const nodearray *a)
|
|
{
|
|
return a->sparse_capacity != ~0U;
|
|
}
|
|
|
|
static inline void
|
|
nodearray_init(nodearray *a)
|
|
{
|
|
memset(a, 0, sizeof(nodearray));
|
|
}
|
|
|
|
static inline void
|
|
nodearray_reset(nodearray *a)
|
|
{
|
|
free(a->sparse);
|
|
nodearray_init(a);
|
|
}
|
|
|
|
static inline nodearray_sparse
|
|
nodearray_encode(unsigned key, nodearray_value value)
|
|
{
|
|
static_assert(sizeof(nodearray_value) == sizeof(uint16_t), "sizes mismatch");
|
|
return ((nodearray_sparse) key << 16) | value;
|
|
}
|
|
|
|
static inline unsigned
|
|
nodearray_sparse_key(const nodearray_sparse *elem)
|
|
{
|
|
static_assert(sizeof(nodearray_value) == sizeof(uint16_t), "sizes mismatch");
|
|
return *elem >> 16;
|
|
}
|
|
|
|
static inline nodearray_value
|
|
nodearray_sparse_value(const nodearray_sparse *elem)
|
|
{
|
|
return *elem & NODEARRAY_MAX_VALUE;
|
|
}
|
|
|
|
static inline unsigned
|
|
nodearray_sparse_search(const nodearray *a, nodearray_sparse key, nodearray_sparse **elem)
|
|
{
|
|
assert(nodearray_is_sparse(a) && a->size);
|
|
|
|
nodearray_sparse *data = a->sparse;
|
|
|
|
/* Encode the key using the highest possible value, so that the
|
|
* matching node must be encoded lower than this
|
|
*/
|
|
nodearray_sparse skey = nodearray_encode(key, NODEARRAY_MAX_VALUE);
|
|
|
|
unsigned left = 0;
|
|
unsigned right = a->size - 1;
|
|
|
|
if (data[right] <= skey)
|
|
left = right;
|
|
|
|
while (left != right) {
|
|
/* No need to worry about overflow, we couldn't have more than
|
|
* 2^24 elements */
|
|
unsigned probe = (left + right + 1) / 2;
|
|
|
|
if (data[probe] > skey)
|
|
right = probe - 1;
|
|
else
|
|
left = probe;
|
|
}
|
|
|
|
*elem = data + left;
|
|
return left;
|
|
}
|
|
|
|
static inline void
|
|
nodearray_orr(nodearray *a, unsigned key, nodearray_value value,
|
|
unsigned max_sparse, unsigned max)
|
|
{
|
|
assert(key < (1 << 24));
|
|
assert(key < max);
|
|
|
|
if (!value)
|
|
return;
|
|
|
|
if (nodearray_is_sparse(a)) {
|
|
unsigned size = a->size;
|
|
unsigned left = 0;
|
|
|
|
if (size) {
|
|
/* First, binary search for key */
|
|
nodearray_sparse *elem;
|
|
left = nodearray_sparse_search(a, key, &elem);
|
|
|
|
if (nodearray_sparse_key(elem) == key) {
|
|
*elem |= value;
|
|
return;
|
|
}
|
|
|
|
/* We insert before `left`, so increment it if it's
|
|
* out of order */
|
|
if (nodearray_sparse_key(elem) < key)
|
|
++left;
|
|
}
|
|
|
|
if (size < max_sparse && (size + 1) < max / 4) {
|
|
/* We didn't find it, but we know where to insert it. */
|
|
|
|
nodearray_sparse *data = a->sparse;
|
|
nodearray_sparse *data_move = data + left;
|
|
|
|
bool realloc = (++a->size) > a->sparse_capacity;
|
|
|
|
if (realloc) {
|
|
a->sparse_capacity = MIN2(MAX2(a->sparse_capacity * 2, 64), max / 4);
|
|
|
|
a->sparse = (nodearray_sparse *)malloc(a->sparse_capacity * sizeof(nodearray_sparse));
|
|
|
|
if (left)
|
|
memcpy(a->sparse, data, left * sizeof(nodearray_sparse));
|
|
}
|
|
|
|
nodearray_sparse *elem = a->sparse + left;
|
|
|
|
if (left != size)
|
|
memmove(elem + 1, data_move, (size - left) * sizeof(nodearray_sparse));
|
|
|
|
*elem = nodearray_encode(key, value);
|
|
|
|
if (realloc)
|
|
free(data);
|
|
|
|
return;
|
|
}
|
|
|
|
/* There are too many elements, so convert to a dense array */
|
|
nodearray old = *a;
|
|
|
|
a->dense = (nodearray_value *)calloc(NODEARRAY_DENSE_ALIGN(max), sizeof(nodearray_value));
|
|
a->size = max;
|
|
a->sparse_capacity = ~0U;
|
|
|
|
nodearray_value *data = a->dense;
|
|
|
|
nodearray_sparse_foreach(&old, x) {
|
|
unsigned key = nodearray_sparse_key(x);
|
|
nodearray_value value = nodearray_sparse_value(x);
|
|
|
|
assert(key < max);
|
|
data[key] = value;
|
|
}
|
|
|
|
free(old.sparse);
|
|
}
|
|
|
|
a->dense[key] |= value;
|
|
}
|
|
|
|
#ifdef __cplusplus
|
|
} /* extern C */
|
|
#endif
|
|
|
|
#endif
|