• 🏆 Texturing Contest #33 is OPEN! Contestants must re-texture a SD unit model found in-game (Warcraft 3 Classic), recreating the unit into a peaceful NPC version. 🔗Click here to enter!
  • It's time for the first HD Modeling Contest of 2024. Join the theme discussion for Hive's HD Modeling Contest #6! Click here to post your idea!

[Lua] Base64 Bit Buffer

Level 8
Joined
Jan 23, 2015
Messages
121
So this's new, I needed some way to pack my data into string, so I rewrote the Base64 from WurstStdLib2 in Lua, and made it to work like Bit Buffer by @Aniki. Plus I took some time to optimize it, because in my maps I don't shy from calling the Save function on every data update, so I would better take as little time as possible. No benchmarks, I just counted how much operations each step do and tried to reduce it as much as I could. So, here you go:

Lua:
if Debug then Debug.beginFile "Base64" end
--[[
    Base64 v2a

    Provides functionality to tightly pack data into Base64 strings, optimized to be as fast and dense as possible at the same time.
    Faster would only be to store numbers in B64 Octets directly, denser would only be to use BigInteger string representation.

    API:

        Base64.Encoder.create() -> Encoder
            - Creates a new encoder instance

        Encoder:writeBitString(bitString: integer, bitLength: integer)
            - Add up to 31 bits to the resulting data

        Encoder:buildString() -> string
            - Returns a string with the encoded data

        Base64.Decoder.create(data: string) -> Decoder
            - Creates a decoder instance that will read through the string

        Decoder:readBitString(bitLength: integer) -> integer
            - Reads up to 31 bits from the provided data

        Decoder.bit_len, Encoder.bit_len: integer
            - Can be used to store and verify the bit length elsewhere

        Base64.Internal
            CHARMAP - the default RFC4822 compliant charmap
            REVERSE_CHARMAP - the inverse of the default charmap

            GenerateCharmap(charset: string, voidInt, setSize, i2ch, ch2i) -> Charmap, ReverseCharmap
                - Generates a charmap and its reverse for the given charset
                - Can be used to obfuscate the encoding, if you use it like so:
                    Base64.Internal.CHARMAP, Base64.Internal.REVERSE_CHARMAP = GenerateCharmap(
                        "yJ3uFjRLC0h5NTSMYHm27WOGUI/Q+9rKiDPdagtsABpxlwoce48nkzXqvE1ZbfV6"
                    )
                - To replicate the original encoding, use `GenerateCharmap('ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/', 0)`

    Optional requirements:
        Total Initialization by Bribe                   @ https://www.hiveworkshop.com/threads/317099/
        DebugUtils by Eikonium                          @ https://www.hiveworkshop.com/threads/330758/

    Inspired by:
        - Base64 from WurstStdlib2                      @ https://github.com/wurstscript/WurstStdlib2/blob/master/wurst/file/Base64.wurst
        - Aniki's Base64 & BitBuf                       @ https://www.hiveworkshop.com/threads/288199/

    Updated: 12 May 2023
--]]
OnInit("Base64", function()

    --[ INTERNAL ]--

    --- [1, 64] -> char
    local CHARMAP = {
        "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P",
        "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "a", "b", "c", "d", "e", "f",
        "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v",
        "w", "x", "y", "z", "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "+", "/"
    }

    --- char -> [0, 63]
    local REVERSE_CHARMAP = {
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62, 0, 0, 0, 63, 52,
        53, 54, 55, 56, 57, 58, 59, 60, 61, 0, 0, 0, 0, 0, 0, 0,
        0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
        16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 0, 0, 0, 0, 0, 0,
        26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41,
        42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
        0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
    }

    --- Generates new dictionaries from the given charset
    ---@param charset string
    local function GenerateCharmap(charset, voidInt, setSize, i2ch, ch2i)
        i2ch = i2ch or {}
        ch2i = ch2i or {}
        voidInt = voidInt or 0
        setSize = setSize or 255
        for i = 1, setSize, 1 do
            ch2i[i] = voidInt
        end
        for i = 1, #charset, 1 do
            i2ch[i] = charset:sub(i, i)
            ch2i[charset:byte(i, i)] = i - 1
        end
        return i2ch, ch2i
    end

    local Internal = {
        CHARMAP = CHARMAP,
        REVERSE_CHARMAP = REVERSE_CHARMAP,
        GenerateCharmap = GenerateCharmap,
    }

    --[ ENCODER CLASS ]--

    --- Encodes a sequence of arbitrary long unsigned integers into a string.
    ---@class Encoder
    ---@field buffer integer
    ---@field buffer_len integer
    ---@field bit_len integer
    ---@field out table
    ---@field out_len integer
    local Encoder = {}
    Encoder.__index = Encoder

    function Encoder.create()
        return setmetatable({
            buffer = 0,
            buffer_len = 0,
            bit_len = 0,
            out = {},
            out_len = 0,
        }, Encoder)
    end

    --- Writes the first 6 bits from the integer to the buffer
    ---@param e Encoder
    ---@param octet integer
    local function writeOctet(e, octet)
        e.out_len = e.out_len + 1
        e.out[e.out_len] = Internal.CHARMAP[(octet & 0x3f) + 1]
    end

    ---@param bstr integer should be in the range [0, 2^len - 1]
    ---@param len integer should be in the range [0, 31]
    function Encoder:writeBitString(bstr, len)
        --- post: buffer_len < 6, buffer <= 0x1f
        -- optimised for performance
        -- bstr = bstr & (1 << len) - 1) -- clamp extra bits for safety -- isn't necessary for Object64 lib
        self.bit_len = self.bit_len + len
        local buffer = self.buffer | (bstr << self.buffer_len)
        local buf_len = self.buffer_len + len
        if buf_len >= 6 then
            writeOctet(self, buffer)
            if buf_len >= 12 then
                writeOctet(self, buffer >> 6)
                if buf_len >= 18 then
                    writeOctet(self, buffer >> 12)
                    if buf_len >= 24 then
                        writeOctet(self, buffer >> 18)
                        if buf_len >= 30 then
                            writeOctet(self, buffer >> 24)
                            -- self.buffer <= 5, len <= 31 -> worst case: self.buffer + len == 36
                            if buf_len == 36 then -- strict case of buffer_len, len == 5, 31
                                writeOctet(self, bstr >> 25)
                                self.buffer, self.buffer_len = 0, 0
                                return
                            else
                                self.buffer, self.buffer_len = bstr >> (30 - self.buffer_len), buf_len - 30
                                return
                            end
                        end
                        self.buffer, self.buffer_len = buffer >> 24, buf_len - 24
                        return
                    end
                    self.buffer, self.buffer_len = buffer >> 18, buf_len - 18
                    return
                end
                self.buffer, self.buffer_len = buffer >> 12, buf_len - 12
                return
            end
            self.buffer, self.buffer_len = buffer >> 6, buf_len - 6
            return
        end
        self.buffer, self.buffer_len = buffer, buf_len
    end

    function Encoder:buildString()
        if self.buffer > 0 then
            writeOctet(self, self.buffer)
        end
        return table.concat(self.out)
    end

    --[ DECODER CLASS ]--

    --- Decodes a sequence of arbitrary long unsigned integers from an encoded string.
    ---@class Decoder
    ---@field buffer integer
    ---@field buffer_len integer
    ---@field bit_len integer
    ---@field pointer integer
    ---@field source string
    local Decoder = {}
    Decoder.__index = Decoder

    function Decoder.create(str)
        return setmetatable({
            buffer = 0,
            buffer_len = 0,
            bit_len = 0,
            pointer = 0,
            source = str
        }, Decoder)
    end

    --- Reads the next 6 bits from the buffer
    ---@param e Decoder
    local function readOctet(e)
        if e.pointer >= #e.source then
            return 0
        end
        local value = Internal.REVERSE_CHARMAP[string.byte(e.source, e.pointer + 1, e.pointer + 1)]
        e.pointer = e.pointer + 1
        return value
    end

    ---@param len integer should be in the range [0, 31]
    function Decoder:readBitString(len)
        --- post: buffer_len < 6, buffer <= 0x1f
        -- optimised for performance
        -- reversing the operations in writeBitString
        self.bit_len = self.bit_len + len
        local buffer = 0
        local to_read = len - self.buffer_len

        if to_read > 0 then
            buffer = buffer | (readOctet(self))
            if to_read > 6 then
                buffer = buffer | (readOctet(self) << 6)
                if to_read > 12 then
                    buffer = buffer | (readOctet(self) << 12)
                    if to_read > 18 then
                        buffer = buffer | (readOctet(self) << 18)
                        if to_read > 24 then
                            buffer = buffer | (readOctet(self) << 24)
                            if to_read == 31 then -- only happens when buffer_len == 0
                                local b = readOctet(self)
                                buffer = buffer | ((b & 1) << 30)
                                self.buffer, self.buffer_len = b >> 1, 5
                                return buffer
                            end
                        end
                    end
                end
            end
        end
        buffer = self.buffer | (buffer << self.buffer_len)
        local value = buffer & ((1 << len) - 1)
        self.buffer, self.buffer_len = buffer >> len, ((to_read - 1) // 6 + 1) * 6 - to_read
        return value
    end

    Base64 = {
        Encoder = Encoder,
        Decoder = Decoder,
        Internal = Internal,
    }
end)
if Debug then Debug.endFile() end

The writes and the reads are in the same order.
Lua:
local function test()
    local e = Base64.Encoder.create()
    e:writeBitString(0x1234567, 28)
    e:writeBitString(0xf, 4)
    e:writeBitString(0x1234567, 28)
    e:writeBitString(0x1234567, 28)
    local s = e:buildString()
    print(s)

    local d = Base64.Decoder.create(s)
    print(("0x%x"):format(d:readBitString(28)))
    print(("0x%x"):format(d:readBitString(28)))
    print(("0x%x"):format(d:readBitString(4)))
    print(("0x%x"):format(d:readBitString(28)))
    print(("0x%x"):format(d:readBitString(28)))
end

local function test_complex(dataset_length)
    local dataset = {}
    local dataset_display = {}
    for i = 1, dataset_length do
        local len = math.random(31)
        local value = math.random(1 << len) - 1
        dataset[#dataset + 1] = {
            len = len,
            value = value
        }
        dataset_display[#dataset_display + 1] = ("%d:0x%x"):format(len, value)
    end

    print(table.concat(dataset_display, ', '))

    Base64.Internal.CHARMAP, Base64.Internal.REVERSE_CHARMAP = GenerateCharmap(
        "yJ3uFjRLC0h5NTSMYHm27WOGUI/Q+9rKiDPdagtsABpxlwoce48nkzXqvE1ZbfV6", 0
    )

    local e = Base64.Encoder.create()
    for _, data in ipairs(dataset) do
        e:writeBitString(data.value, data.len)
    end
    local s = e:buildString()
    print(e.bit_len, s)

    dataset_display = {}

    local d = Base64.Decoder.create(s)
    for i, data in ipairs(dataset) do
        local value = d:readBitString(data.len)
        if value ~= data.value then
            print(('Error at pos %d, len %d expected 0x%x got 0x%x'):format(i, data.len, data.value, value))
        end
        dataset_display[#dataset_display + 1] = ("%d:0x%x"):format(data.len, value)
    end

    print(table.concat(dataset_display, ', '))
end
Side note: if you paste it to World Editor, escape all % symbols with another %, or it'll crash unapologetically.

Optional requirements:

Every suggestion is welcome!
 
Last edited:
Top