You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
518 lines
16 KiB
518 lines
16 KiB
#ifndef GALGORITHM_H
|
|
#define GALGORITHM_H
|
|
|
|
/*
|
|
* Generalized aglogithms based on gheap for C99.
|
|
*
|
|
* Don't forget passing -DNDEBUG option to the compiler when creating optimized
|
|
* builds. This significantly speeds up gheap code by removing debug assertions.
|
|
*
|
|
* Author: Aliaksandr Valialkin <valyala@gmail.com>.
|
|
*/
|
|
|
|
|
|
/*******************************************************************************
|
|
* Interface.
|
|
******************************************************************************/
|
|
|
|
#include "gheap.h" /* for gheap_ctx */
|
|
|
|
#include <stddef.h> /* for size_t */
|
|
|
|
/*
|
|
* Sorts [base[0] ... base[n-1]] in ascending order via heapsort.
|
|
*/
|
|
static inline void galgorithm_heapsort(const struct gheap_ctx *const ctx,
|
|
void *const base, const size_t n);
|
|
|
|
/*
|
|
* Performs partial sort, so [base[0] ... base[middle_index-1]) will contain
|
|
* items sorted in ascending order, which are smaller than the rest of items
|
|
* in the [base[middle_index] ... base[n-1]).
|
|
*/
|
|
static inline void galgorithm_partial_sort(const struct gheap_ctx *ctx,
|
|
void *base, size_t n, size_t middle_index);
|
|
|
|
/*
|
|
* Vtable for input iterators, which is passed to galgorithm_nway_merge().
|
|
*/
|
|
struct galgorithm_nway_merge_input_vtable
|
|
{
|
|
/*
|
|
* Must advance the iterator to the next item.
|
|
* Must return non-zero on success or 0 on the end of input.
|
|
*
|
|
* Galgorithm won't call this function after it returns 0.
|
|
*/
|
|
int (*next)(void *ctx);
|
|
|
|
/*
|
|
* Must return a pointer to the current item.
|
|
*
|
|
* Galgorithm won't call this function after the next() returns 0.
|
|
*/
|
|
const void *(*get)(const void *ctx);
|
|
};
|
|
|
|
/*
|
|
* A collection of input iterators, which is passed to galgorithm_nway_merge().
|
|
*/
|
|
struct galgorithm_nway_merge_input
|
|
{
|
|
const struct galgorithm_nway_merge_input_vtable *vtable;
|
|
|
|
/*
|
|
* An array of opaque contexts, which are passed to vtable functions.
|
|
* Each context represents a single input iterator.
|
|
* Contextes must contain data reqired for fetching items from distinct
|
|
* input iterators.
|
|
*
|
|
* Contextes in this array can be shuffled using ctx_mover.
|
|
*/
|
|
void *ctxs;
|
|
|
|
/* The number of contextes. */
|
|
size_t ctxs_count;
|
|
|
|
/* The size of each context object. */
|
|
size_t ctx_size;
|
|
|
|
/* Is used for shuffling context objects. */
|
|
gheap_item_mover_t ctx_mover;
|
|
};
|
|
|
|
/*
|
|
* Vtable for output iterator, which is passed to galgorithm_nway_merge().
|
|
*/
|
|
struct galgorithm_nway_merge_output_vtable
|
|
{
|
|
/*
|
|
* Must put data into the output and advance the iterator
|
|
* to the next position.
|
|
*/
|
|
void (*put)(void *ctx, const void *data);
|
|
};
|
|
|
|
/*
|
|
* Output iterator, which is passed to galgorithm_nway_merge().
|
|
*/
|
|
struct galgorithm_nway_merge_output
|
|
{
|
|
const struct galgorithm_nway_merge_output_vtable *vtable;
|
|
|
|
/*
|
|
* An opaque context, which is passed to vtable functions.
|
|
* The context must contain data essential for the output iterator.
|
|
*/
|
|
void *ctx;
|
|
};
|
|
|
|
/*
|
|
* Performs N-way merging of the given inputs into the output sorted
|
|
* in ascending order, using ctx->less_comparer for items' comparison.
|
|
*
|
|
* Each input must hold non-zero number of items sorted in ascending order.
|
|
*
|
|
* As a side effect the function shuffles input contextes.
|
|
*/
|
|
static inline void galgorithm_nway_merge(const struct gheap_ctx *ctx,
|
|
const struct galgorithm_nway_merge_input *input,
|
|
const struct galgorithm_nway_merge_output *output);
|
|
|
|
/*
|
|
* Must sort the range [base[0] ... base[n-1]].
|
|
* ctx is small_range_sorter_ctx passed to galgorithm_nway_mergesort.
|
|
*/
|
|
typedef void (*galgorithm_nway_mergesort_small_range_sorter_t)(
|
|
const void *ctx, void *base, size_t n);
|
|
|
|
/*
|
|
* Performs n-way mergesort for [base[0] ... base[range_size-1]] items.
|
|
*
|
|
* Uses small_range_sorter for sorting ranges containing no more
|
|
* than small_range_size items.
|
|
*
|
|
* Splits the input range into subranges with small_range_size size,
|
|
* sorts them using small_range_sorter and then merges them back
|
|
* using n-way merge with n = subranges_count.
|
|
*
|
|
* items_tmp_buf must point to an uninitialized memory, which can hold
|
|
* up to range_size items.
|
|
*/
|
|
static inline void galgorithm_nway_mergesort(const struct gheap_ctx *ctx,
|
|
void *base, size_t range_size,
|
|
galgorithm_nway_mergesort_small_range_sorter_t small_range_sorter,
|
|
const void *small_range_sorter_ctx,
|
|
size_t small_range_size, size_t subranges_count, void *items_tmp_buf);
|
|
|
|
|
|
/*******************************************************************************
|
|
* Implementation.
|
|
*
|
|
* Define all functions inline, so compiler will be able optimizing out common
|
|
* args (fanout, page_chunks, item_size, less_comparer and item_mover),
|
|
* which are usually constants, using constant folding optimization
|
|
* ( http://en.wikipedia.org/wiki/Constant_folding ).
|
|
*****************************************************************************/
|
|
|
|
#include "gheap.h" /* for gheap_* stuff */
|
|
|
|
#include <assert.h> /* for assert */
|
|
#include <stddef.h> /* for size_t */
|
|
#include <stdint.h> /* for uintptr_t, SIZE_MAX and UINTPTR_MAX */
|
|
#include <stdlib.h> /* for malloc(), free() */
|
|
|
|
/* Returns a pointer to base[index]. */
|
|
static inline void *_galgorithm_get_item_ptr(
|
|
const struct gheap_ctx *const ctx,
|
|
const void *const base, const size_t index)
|
|
{
|
|
const size_t item_size = ctx->item_size;
|
|
|
|
assert(index <= SIZE_MAX / item_size);
|
|
|
|
const size_t offset = item_size * index;
|
|
assert((uintptr_t)base <= UINTPTR_MAX - offset);
|
|
|
|
return ((char *)base) + offset;
|
|
}
|
|
|
|
/* Swaps items with given indexes */
|
|
static inline void _galgorithm_swap_items(const struct gheap_ctx *const ctx,
|
|
const void *const base, const size_t a_index, const size_t b_index)
|
|
{
|
|
const size_t item_size = ctx->item_size;
|
|
const gheap_item_mover_t item_mover = ctx->item_mover;
|
|
|
|
char tmp[item_size];
|
|
void *const a = _galgorithm_get_item_ptr(ctx, base, a_index);
|
|
void *const b = _galgorithm_get_item_ptr(ctx, base, b_index);
|
|
item_mover(tmp, a);
|
|
item_mover(a, b);
|
|
item_mover(b, tmp);
|
|
}
|
|
|
|
static inline void galgorithm_heapsort(const struct gheap_ctx *const ctx,
|
|
void *const base, const size_t n)
|
|
{
|
|
gheap_make_heap(ctx, base, n);
|
|
gheap_sort_heap(ctx, base, n);
|
|
}
|
|
|
|
static inline void galgorithm_partial_sort(const struct gheap_ctx *const ctx,
|
|
void *const base, const size_t n, const size_t middle_index)
|
|
{
|
|
assert(middle_index <= n);
|
|
|
|
if (middle_index > 0) {
|
|
gheap_make_heap(ctx, base, middle_index);
|
|
|
|
const gheap_less_comparer_t less_comparer = ctx->less_comparer;
|
|
const void *const less_comparer_ctx = ctx->less_comparer_ctx;
|
|
|
|
for (size_t i = middle_index; i < n; ++i) {
|
|
void *const tmp = _galgorithm_get_item_ptr(ctx, base, i);
|
|
if (less_comparer(less_comparer_ctx, tmp, base)) {
|
|
gheap_swap_max_item(ctx, base, middle_index, tmp);
|
|
}
|
|
}
|
|
|
|
gheap_sort_heap(ctx, base, middle_index);
|
|
}
|
|
}
|
|
|
|
struct _galgorithm_nway_merge_less_comparer_ctx
|
|
{
|
|
gheap_less_comparer_t less_comparer;
|
|
const void *less_comparer_ctx;
|
|
const struct galgorithm_nway_merge_input_vtable *vtable;
|
|
};
|
|
|
|
static inline int _galgorithm_nway_merge_less_comparer(const void *const ctx,
|
|
const void *const a, const void *const b)
|
|
{
|
|
const struct _galgorithm_nway_merge_less_comparer_ctx *const c = ctx;
|
|
const gheap_less_comparer_t less_comparer = c->less_comparer;
|
|
const void *const less_comparer_ctx = c->less_comparer_ctx;
|
|
const struct galgorithm_nway_merge_input_vtable *const vtable = c->vtable;
|
|
|
|
return less_comparer(less_comparer_ctx, vtable->get(b), vtable->get(a));
|
|
}
|
|
|
|
static inline void galgorithm_nway_merge(const struct gheap_ctx *const ctx,
|
|
const struct galgorithm_nway_merge_input *const input,
|
|
const struct galgorithm_nway_merge_output *const output)
|
|
{
|
|
void *const top_input = input->ctxs;
|
|
size_t inputs_count = input->ctxs_count;
|
|
|
|
assert(inputs_count > 0);
|
|
|
|
const struct _galgorithm_nway_merge_less_comparer_ctx less_comparer_ctx = {
|
|
.less_comparer = ctx->less_comparer,
|
|
.less_comparer_ctx = ctx->less_comparer_ctx,
|
|
.vtable = input->vtable,
|
|
};
|
|
const struct gheap_ctx nway_ctx = {
|
|
.fanout = ctx->fanout,
|
|
.page_chunks = ctx->page_chunks,
|
|
.item_size = input->ctx_size,
|
|
.less_comparer = &_galgorithm_nway_merge_less_comparer,
|
|
.less_comparer_ctx = &less_comparer_ctx,
|
|
.item_mover = input->ctx_mover,
|
|
};
|
|
|
|
gheap_make_heap(&nway_ctx, top_input, inputs_count);
|
|
while (1) {
|
|
const void *const data = input->vtable->get(top_input);
|
|
output->vtable->put(output->ctx, data);
|
|
if (!input->vtable->next(top_input)) {
|
|
--inputs_count;
|
|
if (inputs_count == 0) {
|
|
break;
|
|
}
|
|
_galgorithm_swap_items(&nway_ctx, top_input, 0, inputs_count);
|
|
}
|
|
gheap_restore_heap_after_item_decrease(&nway_ctx, top_input,
|
|
inputs_count, 0);
|
|
}
|
|
}
|
|
|
|
static inline void _galgorithm_move_items(const struct gheap_ctx *const ctx,
|
|
void *const src, const size_t n, void *const dst)
|
|
{
|
|
const gheap_item_mover_t item_mover = ctx->item_mover;
|
|
|
|
for (size_t i = 0; i < n; ++i) {
|
|
item_mover(
|
|
_galgorithm_get_item_ptr(ctx, dst, i),
|
|
_galgorithm_get_item_ptr(ctx, src, i));
|
|
}
|
|
}
|
|
|
|
static inline void _galgorithm_sort_subranges(const struct gheap_ctx *const ctx,
|
|
void *const base, const size_t range_size,
|
|
const galgorithm_nway_mergesort_small_range_sorter_t small_range_sorter,
|
|
const void *const small_range_sorter_ctx,
|
|
const size_t small_range_size)
|
|
{
|
|
assert(small_range_size > 0);
|
|
|
|
const size_t last_full_range = range_size - range_size % small_range_size;
|
|
for (size_t i = 0; i != last_full_range; i += small_range_size) {
|
|
small_range_sorter(small_range_sorter_ctx,
|
|
_galgorithm_get_item_ptr(ctx, base, i), small_range_size);
|
|
}
|
|
|
|
/* Sort the last subrange, which contains less than small_range_size items. */
|
|
if (last_full_range < range_size) {
|
|
small_range_sorter(small_range_sorter_ctx,
|
|
_galgorithm_get_item_ptr(ctx, base, last_full_range),
|
|
range_size - last_full_range);
|
|
}
|
|
}
|
|
|
|
struct _galgorithm_nway_mergesort_input_ctx
|
|
{
|
|
const struct gheap_ctx *ctx;
|
|
const void *next;
|
|
const void *last;
|
|
};
|
|
|
|
static inline int _galgorithm_nway_mergesort_input_next(void *ctx)
|
|
{
|
|
struct _galgorithm_nway_mergesort_input_ctx *const c = ctx;
|
|
|
|
assert(c->next < c->last);
|
|
c->next = _galgorithm_get_item_ptr(c->ctx, c->next, 1);
|
|
assert(c->next <= c->last);
|
|
return (c->next < c->last);
|
|
}
|
|
|
|
static inline const void *_galgorithm_nway_mergesort_input_get(const void *ctx)
|
|
{
|
|
const struct _galgorithm_nway_mergesort_input_ctx *const c = ctx;
|
|
|
|
assert(c->next < c->last);
|
|
return c->next;
|
|
}
|
|
|
|
static const struct galgorithm_nway_merge_input_vtable
|
|
_galgorithm_nway_mergesort_input_vtable = {
|
|
.next = &_galgorithm_nway_mergesort_input_next,
|
|
.get = &_galgorithm_nway_mergesort_input_get,
|
|
};
|
|
|
|
struct _galgorithm_nway_mergesort_output_ctx
|
|
{
|
|
const struct gheap_ctx *ctx;
|
|
void *next;
|
|
};
|
|
|
|
static inline void _galgorithm_nway_mergesort_output_put(void *ctx,
|
|
const void *data)
|
|
{
|
|
struct _galgorithm_nway_mergesort_output_ctx *const c = ctx;
|
|
const gheap_item_mover_t item_mover = c->ctx->item_mover;
|
|
|
|
item_mover(c->next, data);
|
|
c->next = _galgorithm_get_item_ptr(c->ctx, c->next, 1);
|
|
}
|
|
|
|
static const struct galgorithm_nway_merge_output_vtable
|
|
_galgorithm_nway_mergesort_output_vtable = {
|
|
.put = &_galgorithm_nway_mergesort_output_put,
|
|
};
|
|
|
|
static inline void _galgorithm_merge_subrange_tuples(
|
|
const struct gheap_ctx *const ctx, void *const base, const size_t range_size,
|
|
struct galgorithm_nway_merge_input *const input,
|
|
const struct galgorithm_nway_merge_output *const output,
|
|
const size_t subranges_count, const size_t subrange_size)
|
|
{
|
|
assert(subranges_count > 1);
|
|
assert(subrange_size > 0);
|
|
|
|
struct _galgorithm_nway_mergesort_input_ctx *const input_ctxs = input->ctxs;
|
|
input->ctxs_count = subranges_count;
|
|
|
|
size_t i = 0;
|
|
|
|
/*
|
|
* Merge full subrange tuples. Each full subrange tuple contains
|
|
* subranges_count full subranges. Each full subrange contains
|
|
* subrange_size items.
|
|
*/
|
|
if (subrange_size <= range_size / subranges_count) {
|
|
const size_t tuple_size = subrange_size * subranges_count;
|
|
const size_t last_full_tuple = range_size - range_size % tuple_size;
|
|
|
|
while (i != last_full_tuple) {
|
|
for (size_t j = 0; j < subranges_count; ++j) {
|
|
input_ctxs[j].next = _galgorithm_get_item_ptr(ctx, base, i);
|
|
i += subrange_size;
|
|
input_ctxs[j].last = _galgorithm_get_item_ptr(ctx, base, i);
|
|
}
|
|
|
|
galgorithm_nway_merge(ctx, input, output);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Merge tail subrange tuple. Tail subrange tuple contains less than
|
|
* subranges_count full subranges. It also may contain tail subrange
|
|
* with less than subrange_size items.
|
|
*/
|
|
const size_t tail_tuple_size = range_size - i;
|
|
if (tail_tuple_size > 0) {
|
|
const size_t full_subranges_count = tail_tuple_size / subrange_size;
|
|
assert(full_subranges_count < subranges_count);
|
|
size_t tail_subranges_count = full_subranges_count;
|
|
|
|
for (size_t j = 0; j < full_subranges_count; ++j) {
|
|
input_ctxs[j].next = _galgorithm_get_item_ptr(ctx, base, i);
|
|
i += subrange_size;
|
|
input_ctxs[j].last = _galgorithm_get_item_ptr(ctx, base, i);
|
|
}
|
|
|
|
if (i < range_size) {
|
|
input_ctxs[full_subranges_count].next =
|
|
_galgorithm_get_item_ptr(ctx, base, i);
|
|
input_ctxs[full_subranges_count].last =
|
|
_galgorithm_get_item_ptr(ctx, base, range_size);
|
|
++tail_subranges_count;
|
|
}
|
|
|
|
input->ctxs_count = tail_subranges_count;
|
|
galgorithm_nway_merge(ctx, input, output);
|
|
}
|
|
}
|
|
|
|
static inline void _galgorithm_nway_mergesort_input_ctx_mover(void *dst,
|
|
const void *src)
|
|
{
|
|
*(struct _galgorithm_nway_mergesort_input_ctx *)dst =
|
|
*(struct _galgorithm_nway_mergesort_input_ctx *)src;
|
|
}
|
|
|
|
static inline void galgorithm_nway_mergesort(const struct gheap_ctx *const ctx,
|
|
void *const base, const size_t range_size,
|
|
const galgorithm_nway_mergesort_small_range_sorter_t small_range_sorter,
|
|
const void *const small_range_sorter_ctx,
|
|
const size_t small_range_size, const size_t subranges_count,
|
|
void *const items_tmp_buf)
|
|
{
|
|
assert(small_range_size > 0);
|
|
assert(subranges_count > 1);
|
|
|
|
/* Preparation: Move items to a temporary buffer. */
|
|
_galgorithm_move_items(ctx, base, range_size, items_tmp_buf);
|
|
|
|
/*
|
|
* Step 1: split the range into subranges with small_range_size size each
|
|
* (except the last subrange, which may contain less than small_range_size
|
|
* items) and sort each of these subranges using small_range_sorter.
|
|
*/
|
|
_galgorithm_sort_subranges(ctx, items_tmp_buf, range_size,
|
|
small_range_sorter, small_range_sorter_ctx, small_range_size);
|
|
|
|
/* Step 2: Merge subranges sorted at the previous step using n-way merge. */
|
|
struct _galgorithm_nway_mergesort_input_ctx *const input_ctxs =
|
|
malloc(sizeof(input_ctxs[0]) * subranges_count);
|
|
for (size_t i = 0; i < subranges_count; ++i) {
|
|
input_ctxs[i].ctx = ctx;
|
|
}
|
|
|
|
struct galgorithm_nway_merge_input input = {
|
|
.vtable = &_galgorithm_nway_mergesort_input_vtable,
|
|
.ctxs = input_ctxs,
|
|
.ctxs_count = subranges_count,
|
|
.ctx_size = sizeof(input_ctxs[0]),
|
|
.ctx_mover = &_galgorithm_nway_mergesort_input_ctx_mover,
|
|
};
|
|
|
|
struct _galgorithm_nway_mergesort_output_ctx output_ctx;
|
|
output_ctx.ctx = ctx;
|
|
|
|
const struct galgorithm_nway_merge_output output = {
|
|
.vtable = &_galgorithm_nway_mergesort_output_vtable,
|
|
.ctx = &output_ctx,
|
|
};
|
|
|
|
size_t subrange_size = small_range_size;
|
|
for (;;) {
|
|
/*
|
|
* First pass: merge items from the temporary buffer
|
|
* to the original location.
|
|
*/
|
|
output_ctx.next = base;
|
|
_galgorithm_merge_subrange_tuples(ctx, items_tmp_buf, range_size,
|
|
&input, &output, subranges_count, subrange_size);
|
|
|
|
if (subrange_size > range_size / subranges_count) {
|
|
break;
|
|
}
|
|
subrange_size *= subranges_count;
|
|
|
|
/*
|
|
* Second pass: merge items from the original location
|
|
* to the temporary buffer.
|
|
*/
|
|
output_ctx.next = items_tmp_buf;
|
|
_galgorithm_merge_subrange_tuples(ctx, base, range_size,
|
|
&input, &output, subranges_count, subrange_size);
|
|
|
|
if (subrange_size > range_size / subranges_count) {
|
|
/* Move items from the temporary buffer to the original location. */
|
|
_galgorithm_move_items(ctx, items_tmp_buf, range_size, base);
|
|
break;
|
|
}
|
|
subrange_size *= subranges_count;
|
|
}
|
|
|
|
free(input_ctxs);
|
|
}
|
|
|
|
|
|
|
|
#endif
|
|
|