There have been a few cases recently where I have needed to keep track of an array of bits. Basically as a record to mark off when sections of something have completed. The simplest scenario is for something like a torrent file transfer. Let's say the file is made up of 100 chunks, which we can keep track of each completed chunk which was written by marking it in our bit buffer.

For example, our buffer "00000000". When the third item completes we can say "00100000", and so on. The simplest way to do this in C# would be to make an array of Booleans. This seemed like a total waste of memory to me, since Booleans in c# take up much more space than a single bit.

raw

public class ByteBitBuffer
{
    /// <summary>
    /// The binary buffer.
    /// </summary>
    private readonly byte[] bytes;

    /// <summary>
    /// Initializes a new instance of the <see cref="ByteBitBuffer"/> class.
    /// </summary>
    /// <param name="length">The length of the buffer.</param>
    public ByteBitBuffer(int length)
    {
        var len = length / 8;
        if(length % 8 != 0)
        {
            len++;
        }

        this.bytes = new byte[len];
    }

    /// <summary>
    /// Sets the value at an index.
    /// </summary>
    /// <param name="index">The index.</param>
    /// <param name="value">The value.</param>
    public void Set(int index, bool value)
    {
        byte mask = (byte)(1 << (index % 8));
        if(value)
        {
            this.bytes[index / 8] |= mask;
        }
        else
        {
            this.bytes[index / 8] &= (byte)~mask;
        }
    }

    /// <summary>
    /// Gets the value at an index.
    /// </summary>
    /// <param name="index">The index.</param>
    /// <returns>The value.</returns>
    public bool Get(int index)
    {
        return ((this.bytes[index / 8] >> (index % 8)) & 1) != 0; 
    }
}

This is what I ended up with. From a programmers perspective, this is great. I got to do everything that I love: bit manipulation and cramming as much data into as little space as possible. The real question is, how fast is this compared to the basic Boolean[] implementation.

The Boolean implementation is much simpler of course.

raw

public class BitBufferBools
{
    /// <summary>
    /// The boolean array.
    /// </summary>
    private readonly bool[] bools;

    /// <summary>
    /// Initializes a new instance of the <see cref="BitBufferBools"/> class.
    /// </summary>
    /// <param name="length">The length of the buffer.</param>
    public BitBufferBools(int length)
    {
        this.bools = new bool[length];
    }

    /// <summary>
    /// Sets the value at the index.
    /// </summary>
    /// <param name="index">The index.</param>
    /// <param name="value">The value.</param>
    public void Set(int index, bool value)
    {
        this.bools[index] = value;
    }

    /// <summary>
    /// Gets the value at the index.
    /// </summary>
    /// <param name="index">The index.</param>
    /// <returns>The value.</returns>
    public bool Get(int index)
    {
        return this.bools[index];
    }
}

C# also provides a class by default which can do the same thing as the two previous implementations named "System.Collections.BitArray".

My test harness for the code:

raw

static void Main(string[] args)
{
    // Run each method once to make sure they are all compiled
    Program.TestBitArray(1, 100);
    Program.TestBooleanArray(1, 100);
    Program.TestByteArray(1, 100);
    Program.TestIntArray(1, 100);
    Program.TestLongArray(1, 100);

    // Get the test values
    Console.Write("Iterations? ");
    var iterations = int.Parse(Console.ReadLine());
    Console.Write("Length? ");
    var length = int.Parse(Console.ReadLine());

    Console.WriteLine($"Performing tests with {iterations} iterations and length {length}.");

    // Test the boolean array
    Program.TestHarness("An array of bools", iterations, length, Program.TestBooleanArray);

    // Test System.Collection.BitArray
    Program.TestHarness("System.Collections.BitArray", iterations, length, Program.TestBitArray);

    // Test custom binary implementations
    Program.TestHarness("byte[]", iterations, length, Program.TestByteArray);
    Program.TestHarness("uint[]", iterations, length, Program.TestIntArray);
    Program.TestHarness("ulong[]", iterations, length, Program.TestLongArray);

    Console.ReadLine();
}

/// <summary>
/// The test harness.
/// </summary>
/// <param name="testName">The name of the test.</param>
/// <param name="iterations">The number of iterations.</param>
/// <param name="length">The length of the bit array.</param>
/// <param name="testAction">The test action.</param>
private static void TestHarness(string testName, int iterations, int length, Action<int, int> testAction)
{
    var watch = Stopwatch.StartNew();

    testAction(iterations, length);

    watch.Stop();
    Console.WriteLine($"Finished test: {testName} -> {watch.ElapsedMilliseconds}ms");
}

/// <summary>
/// Under the hood, just an array of bools.
/// </summary>
private static void TestBooleanArray(int iterations, int length)
{
    var bools = new BitBufferBools(length);

    for(var i = 0; i < iterations; i++)
    {
        for (var l = 0; l < length; l++)
        {
            bools.Set(l, !bools.Get(length - l - 1));
        }
    }
}

/// <summary>
/// Under the hood, a System.Collections.BitArray
/// </summary>
private static void TestBitArray(int iterations, int length)
{
    var bits = new BitArray(length);

    for (var i = 0; i < iterations; i++)
    {
        for (var l = 0; l < length; l++)
        {
            bits.Set(l, !bits.Get(length - l - 1));
        }
    }
}

/// <summary>
/// Under the hood, a byte[]
/// </summary>
private static void TestByteArray(int iterations, int length)
{
    var bits = new ByteBitBuffer(length);

    for (var i = 0; i < iterations; i++)
    {
        for (var l = 0; l < length; l++)
        {
            bits.Set(l, !bits.Get(length - l - 1));
        }
    }
}

/// <summary>
/// Under the hood, a uint[]
/// </summary>
private static void TestIntArray(int iterations, int length)
{
    var bits = new IntBitBuffer(length);

    for (var i = 0; i < iterations; i++)
    {
        for (var l = 0; l < length; l++)
        {
            bits.Set(l, !bits.Get(length - l - 1));
        }
    }
}

/// <summary>
/// Under the hood, a ulong[]
/// </summary>
private static void TestLongArray(int iterations, int length)
{
    var bits = new LongBitBuffer(length);

    for (var i = 0; i < iterations; i++)
    {
        for (var l = 0; l < length; l++)
        {
            bits.Set(l, !bits.Get(length - l - 1));
        }
    }
}

Here are the results, needless to say I was a little disappointed. But as it turns out, all of the bit manipulations are pretty expensive. So what you do save in space, you lose dearly in performance.

raw

// Iterations? 1000
// Length? 1000000
// Performing tests with 1000 iterations and length 1000000.
// Finished test: An array of bools -> 1255ms
// Finished test: System.Collections.BitArray -> 8127ms
// Finished test: byte[] -> 6462ms
// Finished test: uint[] -> 5609ms
// Finished test: ulong[] -> 5531ms

Since I was a little annoyed, I thought I would try one more thing. Maybe I could improve performance significantly by allocating my custom collections directly onto the stack.

raw

private static unsafe void TestStackAllocInts(int iterations, int length)
{
    var len = length / 32;
    if (length % 32 != 0)
    {
        len++;
    }

    uint * bits = stackalloc uint[len];

    for (var i = 0; i < iterations; i++)
    {
        for (var l = 0; l < length; l++)
        {
            PointerBitBuffer.Set(bits, l, !PointerBitBuffer.Get(bits, length - l - 1));
        }
    }
}

public static unsafe class PointerBitBuffer
{
    public static void Set(uint * self, int index, bool value)
    {
        var mask = 1U << (index % 32);
        if (value)
        {
            self[index / 32] |= mask;
        }
        else
        {
            self[index / 32] &= ~mask;
        }
    }

    public static bool Get(uint* self, int index)
    {
        return ((self[index / 32] >> (index % 32)) & 1) != 0;
    }
}

This did improve performance a little. But sadly, allocating the Boolean[] directly onto the stack is an even larger performance increase.

raw

private static unsafe void TestStackAllocBools(int iterations, int length)
{
    bool* bits = stackalloc bool[length];

    for (var i = 0; i < iterations; i++)
    {
        for (var l = 0; l < length; l++)
        {
            bits[i] = !bits[length - l - 1];
        }
    }
}

So here are the final results:

raw

// Iterations? 1000
// Length? 1000000
// Performing tests with 1000 iterations and length 1000000.
// Finished test: An array of bools -> 1255ms
// Finished test: System.Collections.BitArray -> 8127ms
// Finished test: byte[] -> 6462ms
// Finished test: uint[] -> 5609ms
// Finished test: ulong[] -> 5531ms
// Finished test: stackalloc uint[] -> 4102ms
// Finished test: stackalloc ulong[] -> 5106ms
// Finished test: stackalloc bool[] -> 723ms

The moral of the story is, C# is incredibly fast with basic arrays. So if space is no constraint, just use a basic Boolean array to store a bit buffer, unless you actually need the ability to write it directly to binary.