[Lua] Base64 Bit Buffer

Jan 23, 2015
So this's new, I needed some way to pack my data into string, so I rewrote the Base64 from WurstStdLib2 in Lua, and made it to work like Bit Buffer by @Aniki. Plus I took some time to optimize it, because in my maps I don't shy from calling the Save function on every data update, so I would better take as little time as possible. No benchmarks, I just counted how much operations each step do and tried to reduce it as much as I could. So, here you go:

if Debug then Debug.beginFile "Base64" end
    Base64 v2a

    Provides functionality to tightly pack data into Base64 strings, optimized to be as fast and dense as possible at the same time.
    Faster would only be to store numbers in B64 Octets directly, denser would only be to use BigInteger string representation.


        Base64.Encoder.create() -> Encoder
            - Creates a new encoder instance

        Encoder:writeBitString(bitString: integer, bitLength: integer)
            - Add up to 31 bits to the resulting data

        Encoder:buildString() -> string
            - Returns a string with the encoded data

        Base64.Decoder.create(data: string) -> Decoder
            - Creates a decoder instance that will read through the string

        Decoder:readBitString(bitLength: integer) -> integer
            - Reads up to 31 bits from the provided data

        Decoder.bit_len, Encoder.bit_len: integer
            - Can be used to store and verify the bit length elsewhere

            CHARMAP - the default RFC4822 compliant charmap
            REVERSE_CHARMAP - the inverse of the default charmap

            GenerateCharmap(charset: string, voidInt, setSize, i2ch, ch2i) -> Charmap, ReverseCharmap
                - Generates a charmap and its reverse for the given charset
                - Can be used to obfuscate the encoding, if you use it like so:
                    Base64.Internal.CHARMAP, Base64.Internal.REVERSE_CHARMAP = GenerateCharmap(
                - To replicate the original encoding, use `GenerateCharmap('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/', 0)`

    Optional requirements:
        Total Initialization by Bribe                   @ https://www.hiveworkshop.com/threads/317099/
        DebugUtils by Eikonium                          @ https://www.hiveworkshop.com/threads/330758/

    Inspired by:
        - Base64 from WurstStdlib2                      @ https://github.com/wurstscript/WurstStdlib2/blob/master/wurst/file/Base64.wurst
        - Aniki's Base64 & BitBuf                       @ https://www.hiveworkshop.com/threads/288199/

    Updated: 12 May 2023
OnInit("Base64", function()

    --[ INTERNAL ]--

    --- [1, 64] -> char
    local CHARMAP = {
        "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P",
        "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "a", "b", "c", "d", "e", "f",
        "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v",
        "w", "x", "y", "z", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "+", "/"

    --- char -> [0, 63]
    local REVERSE_CHARMAP = {
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 0, 63, 52,
        53, 54, 55, 56, 57, 58, 59, 60, 61, 0, 0, 0, 0, 0, 0, 0,
        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
        16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0, 0, 0, 0, 0, 0,
        26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
        42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0

    --- Generates new dictionaries from the given charset
    ---@param charset string
    local function GenerateCharmap(charset, voidInt, setSize, i2ch, ch2i)
        i2ch = i2ch or {}
        ch2i = ch2i or {}
        voidInt = voidInt or 0
        setSize = setSize or 255
        for i = 1, setSize, 1 do
            ch2i[i] = voidInt
        for i = 1, #charset, 1 do
            i2ch[i] = charset:sub(i, i)
            ch2i[charset:byte(i, i)] = i - 1
        return i2ch, ch2i

    local Internal = {
        GenerateCharmap = GenerateCharmap,

    --[ ENCODER CLASS ]--

    --- Encodes a sequence of arbitrary long unsigned integers into a string.
    ---@class Encoder
    ---@field buffer integer
    ---@field buffer_len integer
    ---@field bit_len integer
    ---@field out table
    ---@field out_len integer
    local Encoder = {}
    Encoder.__index = Encoder

    function Encoder.create()
        return setmetatable({
            buffer = 0,
            buffer_len = 0,
            bit_len = 0,
            out = {},
            out_len = 0,
        }, Encoder)

    --- Writes the first 6 bits from the integer to the buffer
    ---@param e Encoder
    ---@param octet integer
    local function writeOctet(e, octet)
        e.out_len = e.out_len + 1
        e.out[e.out_len] = Internal.CHARMAP[(octet & 0x3f) + 1]

    ---@param bstr integer should be in the range [0, 2^len - 1]
    ---@param len integer should be in the range [0, 31]
    function Encoder:writeBitString(bstr, len)
        --- post: buffer_len < 6, buffer <= 0x1f
        -- optimised for performance
        -- bstr = bstr & (1 << len) - 1) -- clamp extra bits for safety -- isn't necessary for Object64 lib
        self.bit_len = self.bit_len + len
        local buffer = self.buffer | (bstr << self.buffer_len)
        local buf_len = self.buffer_len + len
        if buf_len >= 6 then
            writeOctet(self, buffer)
            if buf_len >= 12 then
                writeOctet(self, buffer >> 6)
                if buf_len >= 18 then
                    writeOctet(self, buffer >> 12)
                    if buf_len >= 24 then
                        writeOctet(self, buffer >> 18)
                        if buf_len >= 30 then
                            writeOctet(self, buffer >> 24)
                            -- self.buffer <= 5, len <= 31 -> worst case: self.buffer + len == 36
                            if buf_len == 36 then -- strict case of buffer_len, len == 5, 31
                                writeOctet(self, bstr >> 25)
                                self.buffer, self.buffer_len = 0, 0
                                self.buffer, self.buffer_len = bstr >> (30 - self.buffer_len), buf_len - 30
                        self.buffer, self.buffer_len = buffer >> 24, buf_len - 24
                    self.buffer, self.buffer_len = buffer >> 18, buf_len - 18
                self.buffer, self.buffer_len = buffer >> 12, buf_len - 12
            self.buffer, self.buffer_len = buffer >> 6, buf_len - 6
        self.buffer, self.buffer_len = buffer, buf_len

    function Encoder:buildString()
        if self.buffer > 0 then
            writeOctet(self, self.buffer)
        return table.concat(self.out)

    --[ DECODER CLASS ]--

    --- Decodes a sequence of arbitrary long unsigned integers from an encoded string.
    ---@class Decoder
    ---@field buffer integer
    ---@field buffer_len integer
    ---@field bit_len integer
    ---@field pointer integer
    ---@field source string
    local Decoder = {}
    Decoder.__index = Decoder

    function Decoder.create(str)
        return setmetatable({
            buffer = 0,
            buffer_len = 0,
            bit_len = 0,
            pointer = 0,
            source = str
        }, Decoder)

    --- Reads the next 6 bits from the buffer
    ---@param e Decoder
    local function readOctet(e)
        if e.pointer >= #e.source then
            return 0
        local value = Internal.REVERSE_CHARMAP[string.byte(e.source, e.pointer + 1, e.pointer + 1)]
        e.pointer = e.pointer + 1
        return value

    ---@param len integer should be in the range [0, 31]
    function Decoder:readBitString(len)
        --- post: buffer_len < 6, buffer <= 0x1f
        -- optimised for performance
        -- reversing the operations in writeBitString
        self.bit_len = self.bit_len + len
        local buffer = 0
        local to_read = len - self.buffer_len

        if to_read > 0 then
            buffer = buffer | (readOctet(self))
            if to_read > 6 then
                buffer = buffer | (readOctet(self) << 6)
                if to_read > 12 then
                    buffer = buffer | (readOctet(self) << 12)
                    if to_read > 18 then
                        buffer = buffer | (readOctet(self) << 18)
                        if to_read > 24 then
                            buffer = buffer | (readOctet(self) << 24)
                            if to_read == 31 then -- only happens when buffer_len == 0
                                local b = readOctet(self)
                                buffer = buffer | ((b & 1) << 30)
                                self.buffer, self.buffer_len = b >> 1, 5
                                return buffer
        buffer = self.buffer | (buffer << self.buffer_len)
        local value = buffer & ((1 << len) - 1)
        self.buffer, self.buffer_len = buffer >> len, ((to_read - 1) // 6 + 1) * 6 - to_read
        return value

    Base64 = {
        Encoder = Encoder,
        Decoder = Decoder,
        Internal = Internal,
if Debug then Debug.endFile() end

The writes and the reads are in the same order.
local function test()
    local e = Base64.Encoder.create()
    e:writeBitString(0x1234567, 28)
    e:writeBitString(0xf, 4)
    e:writeBitString(0x1234567, 28)
    e:writeBitString(0x1234567, 28)
    local s = e:buildString()

    local d = Base64.Decoder.create(s)

local function test_complex(dataset_length)
    local dataset = {}
    local dataset_display = {}
    for i = 1, dataset_length do
        local len = math.random(31)
        local value = math.random(1 << len) - 1
        dataset[#dataset + 1] = {
            len = len,
            value = value
        dataset_display[#dataset_display + 1] = ("%d:0x%x"):format(len, value)

    print(table.concat(dataset_display, ', '))

    Base64.Internal.CHARMAP, Base64.Internal.REVERSE_CHARMAP = GenerateCharmap(
        "yJ3uFjRLC0h5NTSMYHm27WOGUI/Q+9rKiDPdagtsABpxlwoce48nkzXqvE1ZbfV6", 0

    local e = Base64.Encoder.create()
    for _, data in ipairs(dataset) do
        e:writeBitString(data.value, data.len)
    local s = e:buildString()
    print(e.bit_len, s)

    dataset_display = {}

    local d = Base64.Decoder.create(s)
    for i, data in ipairs(dataset) do
        local value = d:readBitString(data.len)
        if value ~= data.value then
            print(('Error at pos %d, len %d expected 0x%x got 0x%x'):format(i, data.len, data.value, value))
        dataset_display[#dataset_display + 1] = ("%d:0x%x"):format(data.len, value)

    print(table.concat(dataset_display, ', '))
Side note: if you paste it to World Editor, escape all % symbols with another %, or it'll crash unapologetically.

