- Joined
- Feb 8, 2025
- Messages
- 239
Poker Strike - Most advanced poker like custom game
WC3 Tracker - Live Warcraft 3 Lobby Stats
This is looking good! cheers for this. Ill need to translate it into jass but thats fine.
false is a valid return value.if Debug then Debug.beginFile("Serializer") end
do
--[[
Serializer v1.0.0 by Tomotz
Based on SaveLoadHelper by Wrda https://www.hiveworkshop.com/threads/lua-save-and-load-1-6.355977/
Allows packing arbitrary variables into strings (and unpacking those strings back into variables).
Supports booleans, numbers (integers and floats), strings and tables, including recursive tables.
Optionally supports units and items (given a mapping from unit/item to unique integer ids and back).
Note: float numbers might lose a small amount of precision (0.0000001 difference)
Supports any data stated above including all unprintable characters
API:
---@param input boolean | number | string | table
---@return string? -- returns the full packed value of the input or nil on input error
--- Dumps a variable into a string
function Serializer.dumpVariable(input)
---Loads data packed with dumpVariable into a variable.
---@param data string -- the packed variable
---@return any -- value of the loaded variable. Returns nil on fail. Note that `false` is a valid return value.
---@return integer? -- number of characters consumed
function Serializer.loadVariable(data)
---@param p player
---@param input boolean | number | string | table - the input variable to be saved
---@param filePath string
---@param scrambleCallback fun(string):string? | nil - optional callback to be executed on the data (including the checksum) before saving.
--- saves a variable to a file in a Serialized format. Adds a checksum to validate data integrity
function Serializer.saveFile(p, input, filePath, scrambleCallback)
---Loads serialized data from a file or list of files into a table. Syncing the data to all players. Once completed, fires the callback function.
---@param whichPlayer player
---@param filePaths string | string[]-- name of the file/files to load
---@param callback fun(loadedVariables: any) -- callback to execute once loading is done. The callback is getting the synced data that was saved in the file(s). If a single file was given, the data is the variable loaded from that file. If a list of files was given, the data is a list of variables loaded from each file in the same order.
--- where loadedTables is the list of tables loaded from the files or nil on error and whichPlayer is the player that requested the load
---@param unscrambleCallback fun(string):string? | nil - optional callback to be executed on each file data after loading. Must match the scrambleCallback given to saveFile.
---@param isDeflate boolean? - if true, the data will be compressed using zlib before being synced between players (use if you want to reduce network traffic on large data).
---@return string -- an error message. Note that this message can differ between players, and must not be used in a synced way!
-- Message will be an empty string on success, or an error message on failure.
function Serializer.loadFile(whichPlayer, filePaths, callback, unscrambleCallback, isDeflate)
Recommendation - for very large files call saveFile/loadFile with LibDeflate.CompressDeflate/LibDeflate.DecompressDeflate in the scramble/unscramble callbacks, and set isDeflate to true.
Optional requirements
DebugUtils by Eikonium @ https://www.hiveworkshop.com/threads/330758/
FileIO (my version) - needed if you want to use the save/load to file functions.
SyncStream (my version) - needed if you want to use the save/load to file functions @ https://www.hiveworkshop.com/threads/optimized-syncstream-and-stringescape.367925/
My versions of SyncStream and FileIO are needed to be able to save/load all characters correctly (which the original version can't do)
LibDeflate by Magi - needed if you want to sync large amounts of data faster @ https://www.hiveworkshop.com/threads/magi-log-n-load-the-ultimate-save-load-system.357602/
LogUtils by me @ https://www.hiveworkshop.com/threads/logutils.357625/
If you want to serialize units/items, you need to provide two mappings:
UnitToUniqueId : table<unit, integer>, UniqueIdToUnit : table<integer, unit>
ItemToUniqueId : table<item, integer>, UniqueIdToItem : table<integer, item>
Updated Nov 2025
Doesn't support arbitrary classes with overriden metamethods. Doesn't support groups and hashtables
]]
Serializer = {}
--[[----------------------------------------------------------------------------------------------------
CONFIGURATION ]]
Serializer.VERSION = "000"
Serializer.TEXT_PREFIX = Serializer.VERSION .. " Serializer "
local IS_DEBUG = false -- enable debug prints
--------------------------------------------------------------------------------------------------------
--- Only print error messages unless we're in debug mode
---@param isError boolean
---@param ... any
local function debugPrint(isError, ...)
if isError or IS_DEBUG then
if LogWriteNoFlush == nil then
print(...)
else
LogWriteNoFlush(...)
end
end
end
---@param tbl table
---@return table? -- the class of the input tbl
function GetClass(tbl)
local mt = getmetatable(tbl)
return mt and mt.class or nil
end
---@param num integer
---@param char_count integer
---@return string
local function packBytes(num, char_count)
char_count = char_count or math.ceil(math.log(num + 1, 256)) -- Ensure char_count is sufficient to hold the number
local bytes = {} ---@type integer[]
local orig_num = num
-- Extract bytes from the number
for _ = 1, char_count do
local mod = num & 0xFF
table.insert(bytes, mod)
num = num >> 8
end
if num ~= 0 then
debugPrint(true, "number too large for char count.", orig_num, char_count)
end
-- Create the format string, one byte per "B" format specifier
local fmt = string.rep("B", char_count)
-- Pack the bytes into a string
return string.pack(fmt, table.unpack(bytes))
end
---@param bytes string
---@return integer
local function unpackBytes(bytes)
local fmt = string.rep("B", #bytes)
local values = table.pack(string.unpack(fmt, bytes))
local num = 0
-- Combine the bytes into a single number
-- minus one because string.unpack returns and extra value
for i = #values - 1, 1, -1 do
num = num * 256 + values[i]
end
return num
end
--compress
---@param float number
---@return integer
local function binaryFloat2Integer(float)
local result = string.unpack("i4", string.pack("f", float))
return result
end
---@param int integer
---@return number
local function binaryInteger2Float(int)
local result = string.unpack("f", string.pack("i4", int))
return result
end
--validating parts of the file
---@param str string
---@return integer
local function getChecksum(str)
local checksum = 0
for i = 1, #str do
checksum = checksum + string.byte(str, i, i)
end
return checksum
end
---@param str string
---@return string
local function addChecksum(str)
local checksum = getChecksum(str)
return packBytes(checksum, 4) .. str
end
---@param str string
---@return string, boolean
local function separateAndValidateChecksum(str)
local separatedString = str:sub(5)
return separatedString, getChecksum(separatedString) == unpackBytes(str:sub(1, 4))
end
---@class tokenProperties
---@field type string -- the token class - one of const, int, uint, flt, chr, str
---@field num_characters integer? -- the number of characters needed to represent the value
---@field value any -- the value of the token
---@field subType string? -- the subtype of the token, used only for tables and numbers
---@type table<string, tokenProperties>
local delimiterList = {
--- constant types:
["A"] = {type="const", value=true},
["B"] = {type="const", value=false},
["D"] = {type="const", subType="int", num_characters=0, value=0},
["E"] = {type="const", subType="int", num_characters=0, value=1},
--- userdata types
["P"] = {type="unit", num_characters=4},
["Q"] = {type="item", num_characters=4},
--- string types. Format is TLV (type length value).
--- num_characters is the length of the length field for the string:
-- Max length of 255 characters.
["0"] = {type="str", num_characters=1},
-- Didn't check the exact string length you can get, but 256*175=44800 characters seems to work
-- and 256*200=51200 characters doesn't work
["1"] = {type="str", num_characters=2},
--- character types. Basically constant length strings
-- a single ascii character
["2"] = {type="chr", num_characters=1},
-- 4 ascii characters
["3"] = {type="chr", num_characters=4},
--- unsigned integer types:
--- values in range [0, 255]
["5"] = {type="num", subType="int", num_characters=1},
-- values in range [0, 256^2 - 1]
["6"] = {type="num", subType="int", num_characters=2},
--- signed integer types:
-- 32bit integer. values in range [0x80000000, 0x7FFFFFFF] (So any integer value supported in lua32 bit)
["8"] = {type="num", subType="int", num_characters=4},
--- float types:
-- Single percision float, represented by 4 characters like a 32bit int
["9"] = {type="num", subType="flt", num_characters=4},
-- Table types. Tables are handled recursively. They can be 1 or 2 dimentional.
-- They can be generic and contain any type, or only one specific type.
-- The table starts with a type delimiter (as all other types), then the length as a 2 byte field.
-- after this, comes an optional node type delimiter (only for tables with specific type nodes)
-- and then the table contents.
-- any[] - array of any type. Format: <delimiter> <2byte_item_count> <type1> <item1> <type2> <item2> ...
["b"] = {type="tbl", subType="var_arr"},
-- type[] - array with specific type. Format: <delimiter> <2byte_item_count> <all_value_type> <item1> <item2> ...
["c"] = {type="tbl", subType="type_arr"},
-- 2 dimentional tables are saved in a similar way to two 1 dimentional tables
-- The format is
-- <delimiter> <2byte_item_count> <key_table_delimiter> <key_table> <value_table_delimiter> <value_table>
-- the length field is onlt saved once.
-- table[any, any] - table with generic keys and generic values
["d"] = {type="tbl", subType="var_tbl"},
-- table[type, any] - table with single type keys and generic values
["e"] = {type="tbl", subType="var_val_tbl"},
-- table[any, type] - table with generic keys and single type values
["f"] = {type="tbl", subType="var_key_tbl"},
-- table[type, type] - table with single type keys and single type values
["g"] = {type="tbl", subType="typed_tbl"},
}
---@type table<string, string>
--- mapping from variable type to delimiter in the table above
local delimiterMapping = {
["True"] = "A",
["False"] = "B",
["zero"] = "D",
["one"] = "E",
["unit"] = "P",
["item"] = "Q",
["str1"] = "0",
["str2"] = "1",
["chr1"] = "2",
["chr4"] = "3",
["int1"] = "5",
["int2"] = "6",
["int4"] = "8",
["flt4"] = "9",
["var_arr"] = "b",
["type_arr"] = "c",
["var_tbl"] = "d",
["var_val_tbl"] = "e",
["var_key_tbl"] = "f",
["typed_tbl"] = "g",
}
---@param input boolean | number | string | table | unit
---@param buffer string[] -- in-out buffer to dump the packed data into
---@param skipDelimiter boolean? -- if true, will not write the delimiter byte to the buffer
---@return string? -- returns the delimiter type or nil on input error
function packVariable(input, buffer, skipDelimiter)
local delimiterType ---@type string
local tokenValue = nil ---@type string?
if not skipDelimiter then
table.insert(buffer, "") -- placeholder for the delimiter
end
local delimiterOffset = #buffer
debugPrint(false, "saving ", type(input), ": ", tostring(input):sub(1, 100))
if type(input) == "boolean" then
delimiterType = input and delimiterMapping.True or delimiterMapping.False
elseif type(input) == "userdata"then
if string.sub(tostring(input), 1, 4) == "unit" then
delimiterType = delimiterMapping.unit
if UnitToUniqueId[input] ~= nil then
debugPrint(false, "packing unit variable. uid:", GetUnitTypeId(input), ", uniqueId:", UnitToUniqueId[input])
tokenValue = packBytes(UnitToUniqueId[input], 4)
else
tokenValue = packBytes(0, 4)
end
elseif string.sub(tostring(input), 1, 4) == "item" then
delimiterType = delimiterMapping.item
if ItemToUniqueId[input] ~= nil then
tokenValue = packBytes(ItemToUniqueId[input], 4)
else
tokenValue = packBytes(0, 4)
end
else
debugPrint(true, "unsupported userdata type: ", input)
if not skipDelimiter then
table.remove(buffer, delimiterOffset) -- packing failed. Cleanup
end
return nil
end
elseif type(input) == "number" then
if math.floor(input) == input then
-- int nubmer
if input == 0 then
delimiterType = delimiterMapping.zero
elseif input == 1 then
delimiterType = delimiterMapping.one
elseif input < 0 or input >= 256 ^ 2 then
delimiterType = delimiterMapping.int4
tokenValue = packBytes(input, 4)
elseif input <= 255 then
delimiterType = delimiterMapping.int1
tokenValue = packBytes(input, 1)
else
if input >= 256^2 then
debugPrint(true, "unexpected number value (too large):", input)
end
delimiterType = delimiterMapping.int2
tokenValue = packBytes(input, 2)
end
else
delimiterType = delimiterMapping.flt4
tokenValue = packBytes(binaryFloat2Integer(input), 4)
end
elseif type(input) == "string" then
if #input == 1 then
delimiterType = delimiterMapping.chr1
elseif #input == 4 then
delimiterType = delimiterMapping.chr4
elseif #input <= 255 then
delimiterType = delimiterMapping.str1
-- pack length field
table.insert(buffer, packBytes(#input, 1))
else
if #input >= 256 ^ 2 then
debugPrint(true, "string too long for str2 length:", #input)
end
delimiterType = delimiterMapping.str2
-- pack length field
table.insert(buffer, packBytes(#input, 2))
end
tokenValue = input
elseif type(input) == "table" then
delimiterType = dumpTable(input, buffer)
if delimiterType == nil then
if not skipDelimiter then
table.remove(buffer, delimiterOffset) -- packing failed. Cleanup
end
return nil
end
else
if input ~= nil and tostring(input) ~= "nil" then
debugPrint(true, "unsupported type for saving: ", type(input), ". data: ", input, ". returning zero")
end
delimiterType = delimiterMapping.zero
end
if not skipDelimiter then
buffer[delimiterOffset] = delimiterType
end
if type(tokenValue) == "string" then
table.insert(buffer, tokenValue)
end
return delimiterType
end
---@param tbl table
---@return boolean
function isArray(tbl)
local nn = #tbl -- Get the "length" of the table
for key, _ in pairs(tbl) do
if type(key) ~= "number" or key < 1 or key > nn or math.floor(key) ~= key then
return false -- Found a non-integer or out-of-range key
end
end
return true
end
--- dumps an array into a string. Note that this function only returns the delimiter and element count, and does not pack them
---@param input table
---@param buffer string[] -- in-out buffer to dump the packed data into
---@return string? -- returns the delimiter type or nil on input error
---@return integer? -- returns the number of elements in the array
function dumpArr(input, buffer)
---@class CacheEntry
---@field delimiter string
---@field packedData string[]
---@type CacheEntry[]
local childrenCache = {} -- cache to save the results of the children
-- first run the dump recursively, and check if all the children are of the same type as the first child
-- we will also cache the results of the children so we don't have to dump them again
local childBuffer = {} ---@type string[]
local childType = packVariable(input[1], childBuffer, true)
if childType == nil then
return nil
end
table.insert(childrenCache, {delimiter=childType, packedData=childBuffer})
---@type string?
local firstType = childType
for ii = 2, #input do
childBuffer = {}
local vv = input[ii]
childType = packVariable(vv, childBuffer, true)
if childType == nil then
return nil
end
if childType ~= firstType then
if firstType and delimiterList[childType].subType == "int" and delimiterList[firstType].subType == "int" then
-- Allow saving the int as a longer type
-- In some cases this might not be the most space efficient way, but I don't want to invest in optimizing it
if delimiterList[childType].num_characters == 0 and delimiterList[firstType].num_characters == 0 then
-- two different constant ints. Need at least 1 byte to save the type
firstType = delimiterMapping.int1
else
-- take the bigger of the two delimiters
firstType = delimiterList[childType].num_characters > delimiterList[firstType].num_characters and childType or firstType
end
else
firstType = nil
end
end
table.insert(childrenCache, {delimiter=childType, packedData=childBuffer})
end
local delimiterType = ""
-- now we pack the children
if firstType == nil then
-- there are different types of children. We will pack them as var_arr
delimiterType = delimiterMapping.var_arr
for _, child in ipairs(childrenCache) do
table.insert(buffer, child.delimiter)
for _, node in ipairs(child.packedData) do
table.insert(buffer, node)
end
end
else
-- all children are of the same type. We will pack them as type_arr
delimiterType = delimiterMapping.type_arr
table.insert(buffer, firstType)
for _, child in ipairs(childrenCache) do
-- we need to pad the value with zeros and handle const values correctly
local firstDelimiter = delimiterList[firstType]
if firstDelimiter.subType == "int" then
local curDelimiter = delimiterList[child.delimiter]
if firstDelimiter.type == "const" then
-- all of the same const type. No need to do anything, there is no data
elseif curDelimiter.type == "const" then
table.insert(buffer, packBytes(curDelimiter.value, 1) .. string.rep(string.char(0), firstDelimiter.num_characters - 1))
else
local curValue = child.packedData[1] -- since this is an int and we packed it without the delimiter, there should be only one entry
-- all of the same int type. We need to pad the value with zeros
table.insert(buffer, curValue .. string.rep(string.char(0), firstDelimiter.num_characters - #curValue))
end
else
for _, node in ipairs(child.packedData) do
table.insert(buffer, node)
end
end
end
end
return delimiterType, #input
end
---@param input table
---@param buffer string[] -- in-out buffer to dump the packed data into
---@return string?, integer?
function dump2DTable(input, buffer)
-- first we split the table into keys and values
local key_table = {}
local value_table = {}
for kk, vv in pairs(input) do
table.insert(key_table, kk)
table.insert(value_table, vv)
end
table.insert(buffer, "") -- placeholder for the length field
local lengthOffset = #buffer
local keyDelimiterType, nodeCount = dumpArr(key_table, buffer)
if keyDelimiterType == nil then
return nil
end
local valueDelimiterType, _ = dumpArr(value_table, buffer)
if valueDelimiterType == nil then
return nil
end
buffer[lengthOffset] = packBytes(nodeCount, 2)
if keyDelimiterType == delimiterMapping.var_arr then
if valueDelimiterType == delimiterMapping.var_arr then
-- both keys and values are of different types. We will pack them as var_tbl
return delimiterMapping.var_tbl, nodeCount
end
-- keys are of different types, values are of the same type. We will pack them as var_key_tbl
return delimiterMapping.var_key_tbl, nodeCount
end
if valueDelimiterType == delimiterMapping.var_arr then
-- keys are of the same type, values are of different types. We will pack them as var_val_tbl
return delimiterMapping.var_val_tbl, nodeCount
end
-- both keys and values are of the same type. We will pack them as typed_tbl
return delimiterMapping.typed_tbl, nodeCount
end
---@param class any
---@return boolean
local function isClassOverridingMetatables(class)
local meta = getmetatable(class)
if meta == nil then
return false
end
for key, _ in pairs(meta) do
if type(key) == "string" and key:sub(1,2) == "__" and key ~= "__call" then
-- we don't care about call because it should not be used at this point as the class was already created
return true
end
end
return false
end
---@param input table
---@param buffer string[] -- in-out buffer to dump the packed data into
---@return string? -- returns the delimiter type or nil on error
function dumpTable(input, buffer)
if next(input) == nil and GetClass(input) == nil then
-- input is empty
-- delimiterMapping.var_arr is the shortest table type in string represent
table.insert(buffer, packBytes(0, 2))
return delimiterMapping.var_arr
end
local class = GetClass(input)
if class ~= nil and isClassOverridingMetatables(class) then
debugPrint(true, "warning, saving a table with class:", class, "when loading it, it will be treated as a normal table, and changes to it's metamethod would not apply")
end
if isArray(input) then
table.insert(buffer, "") -- placeholder for the length field
local lengthOffset = #buffer
local delimiterType, itemCount = dumpArr(input, buffer)
if delimiterType == nil then
return nil
end
buffer[lengthOffset] = packBytes(itemCount, 2)
return delimiterType
end
local delimiterType, itemCount = dump2DTable(input, buffer)
return delimiterType
end
---@param input boolean | number | string | table
---@return string? -- returns the full packed value of the input or nil on input error
--- Dumps a variable into a string
function Serializer.dumpVariable(input)
local buffer = {}
local delimiterType = packVariable(input, buffer)
if delimiterType == nil then
return nil
end
return table.concat(buffer)
end
---@param p player
---@param input boolean | number | string | table - the input variable to be saved
---@param filePath string
---@param scrambleCallback fun(string):string? | nil - optional callback to be executed on the data (including the checksum) before saving.
--- saves a variable to a file in a Serialized format. Adds a checksum to validate data integrity
function Serializer.saveFile(p, input, filePath, scrambleCallback)
local data = Serializer.dumpVariable(input)
if data == nil then
debugPrint(true, "Failed to save data")
return false
end
debugPrint(false, "after dump variable:", data:sub(1, 10), "end:", data:sub(-10))
data = addChecksum(data)
debugPrint(false, "after add checksum:", data:sub(1, 10), "end:", data:sub(-10))
if scrambleCallback ~= nil then data = scrambleCallback(data) end
if data == nil then
debugPrint(true, "Failed to scramble data")
return false
end
debugPrint(false, "after scramble:", data:sub(1, 10), "end:", data:sub(-10))
if GetLocalPlayer() == p then
FileIO.Save(filePath, Serializer.TEXT_PREFIX .. data .. "\n")
end
return true
end
---@param data string
---@param in_ptr integer
---@param numBytes integer
---@return integer?, integer?
function consumeBytes(data, in_ptr, numBytes)
if in_ptr + numBytes - 1 > #data then
debugPrint(true, "invalid data length! ", in_ptr, " ", numBytes, " ", #data)
return nil
end
local out = unpackBytes(data:sub(in_ptr, in_ptr + numBytes - 1))
debugPrint(false, "consumed ", numBytes, " bytes. ", string.format("0x\x25X", out))
in_ptr = in_ptr + numBytes
return out, in_ptr
end
---@param data string -- the packed array
---@param count integer -- the number of elements in the array
---@param isSingleType boolean -- true if all array nodes are of the same type
---@return table? -- the loaded table or nil on error
---@return integer? -- number of characters consumed
function loadArr(data, count, isSingleType)
local in_ptr = 1
local allValType = nil
if isSingleType then
allValType, in_ptr = consumeBytes(data, in_ptr, 1)
if allValType == nil then
return nil
end
end
local outTable = {}
while count > 0 do
local valueType = allValType
if valueType == nil then
valueType, in_ptr = consumeBytes(data, in_ptr, 1)
if valueType == nil then
return nil
end
end
local success, tokenVal, numChars = loadToken(string.char(valueType), data:sub(in_ptr))
if not success then
return nil
end
in_ptr = in_ptr + numChars
local value = tokenVal
outTable[#outTable + 1] = value
count = count - 1
end
return outTable, in_ptr - 1
end
---@param data string
---@param token tokenProperties
---@return table? -- the loaded table or nil on error
---@return integer? -- number of characters consumed
function loadTable(data, token)
local in_ptr = 1
local nodeCount
if #data < 2 then
debugPrint(true, "invalid data length! ", #data)
return nil
end
nodeCount, in_ptr = consumeBytes(data, in_ptr, 2)
if nodeCount == nil then
return nil
end
if token.subType == "var_arr" or token.subType == "type_arr" then
local isSingleType = token.subType == "type_arr"
local valTable, consumed = loadArr(data:sub(in_ptr), nodeCount, isSingleType)
if valTable == nil then
return nil
end
return valTable, in_ptr + consumed - 1
end
local isKeyTyped = token.subType == "var_val_tbl" or token.subType == "typed_tbl"
local keyTable, consumed = loadArr(data:sub(in_ptr), nodeCount, isKeyTyped)
if keyTable == nil then
return nil
end
in_ptr = in_ptr + consumed
local isValTyped = token.subType == "var_key_tbl" or token.subType == "typed_tbl"
local valTable, consumed0 = loadArr(data:sub(in_ptr), nodeCount, isValTyped)
if valTable == nil then
return nil
end
in_ptr = in_ptr + consumed0
local outTable = {}
for i = 1, nodeCount do
outTable[keyTable[i]] = valTable[i]
end
return outTable, in_ptr - 1
end
---Loads packed with packVariable data into a variable.
---@param delType string -- type of the token
---@param data string -- the packed token
---@return boolean -- isSuccess - returns false on fail
---@return any -- value of the loaded variable, or nil on input error
---@return integer? -- number of characters consumed
function loadToken(delType, data)
if delimiterList[delType] == nil then
debugPrint(true, "invalid delimiter! ", delType)
return false -- bad format
end
if delimiterList[delType].type == "const" then
return true, delimiterList[delType].value, 0
end
if delimiterList[delType].type == "tbl" then
local tbl, consumed = loadTable(data, delimiterList[delType])
if tbl == nil then
return false
end
return true, tbl, consumed
end
local in_ptr = 1
-- If we didn't return by now, the token is a number or string
local tokenLen = delimiterList[delType].num_characters
local tokenEnd = in_ptr + tokenLen - 1
if tokenEnd > #data then
debugPrint(true, "invalid token length!")
return false -- bad format
end
local tokenContent = data:sub(in_ptr, tokenEnd)
debugPrint(false, "consumed token. len=", tokenLen, ". first byte=: ", tostring(tokenContent):byte(1))
in_ptr = tokenEnd + 1
local value
if delimiterList[delType].type == "num" then
value = unpackBytes(tokenContent)
if delimiterList[delType].subType == "flt" then
-- float doesn't need sine as it's always represented by 4 byte with sign in it already
value = binaryInteger2Float(value)
end
elseif delimiterList[delType].type == "chr" then
value = tokenContent
elseif delimiterList[delType].type == "str" then
local strLen = unpackBytes(tokenContent)
local strEnd = tokenEnd + strLen
if strEnd > #data then
debugPrint(true, "invalid str length! ", strLen, " ", #data)
return false -- bad format
end
value = data:sub(tokenEnd + 1, strEnd)
in_ptr = strEnd + 1
elseif delimiterList[delType].type == "unit" then
value = UniqueIdToUnit[unpackBytes(tokenContent)]
debugPrint(false, "loading unit token. UniqueId:", unpackBytes(tokenContent), ", unit:", value)
if value == nil then value = 0 end
elseif delimiterList[delType].type == "item" then
value = UniqueIdToItem[unpackBytes(tokenContent)]
if value == nil then
value = 0
end
end
debugPrint(false, "finished loading token: ", tostring(value):sub(1, 100))
return true, value, in_ptr - 1
end
---Loads data packed with dumpVariable into a variable.
---@param data string -- the packed variable
---@return any -- value of the loaded variable. Returns nil on fail. Note that `false` is a valid return value.
---@return integer? -- number of characters consumed
function Serializer.loadVariable(data)
if #data < 1 then
debugPrint(true, "empty data!")
return nil
end
local delType = string.sub(data, 1, 1)
debugPrint(false, "consumed delimiter (1 byte): ", string.format("0x\x2502X", string.byte(delType)))
local success, val, numChars = loadToken(delType, data:sub(2))
if not success then
return nil
end
return val, numChars + 1
end
-- gets the packed and scrambled variable and returns the unpacked version of it
---@param packedData string -- the packed and scrambled variable
---@param isDeflate boolean? -- if true, the data will be compressed using zlib. Must match the compression used in Serializer.saveFile.
--- Note that this changes the whole behavior of the packed data, not just the deflate itself.
---@return table? -- value of the loaded variable. Returns nil on fail. Note that `false` is a valid return value.
local function handleSyncData(packedData, isDeflate)
local decompressed = packedData
if isDeflate then
decompressed = LibDeflate.DecompressDeflate(packedData)
if decompressed == nil then
debugPrint(true, "Failed to decompress data.")
print("loading error")
return nil
end
debugPrint(false, "deflate the second inflate:", decompressed:sub(1, 10), "end:", decompressed:sub(-10))
end
local loaded = Serializer.loadVariable(decompressed)
if loaded == nil then
debugPrint(true, "Failed to load variable from data.")
print("loading error")
return nil
end
if type(loaded) ~= "table" then
debugPrint(true, "expected loaded data to be a table.", type(loaded))
print("loading error")
return nil
end
return loaded
end
---Loads serialized data from a file or list of files into a table. Syncing the data to all players. Once completed, fires the callback function.
---@param whichPlayer player
---@param filePaths string | string[]-- name of the file/files to load
---@param callback fun(loadedVariables: any) -- callback to execute once loading is done. The callback is getting the synced data that was saved in the file(s). If a single file was given, the data is the variable loaded from that file. If a list of files was given, the data is a list of variables loaded from each file in the same order.
--- where loadedTables is the list of tables loaded from the files or nil on error and whichPlayer is the player that requested the load
---@param unscrambleCallback fun(string):string? | nil - optional callback to be executed on each file data after loading. Must match the scrambleCallback given to saveFile.
---@param isDeflate boolean? - if true, the data will be compressed using zlib before being synced between players (use if you want to reduce network traffic on large data).
---@return string -- an error message. Note that this message can differ between players, and must not be used in a synced way!
-- Message will be an empty string on success, or an error message on failure.
function Serializer.loadFile(whichPlayer, filePaths, callback, unscrambleCallback, isDeflate)
-- The data in this function is not synced yet, so we have to get to the sync call even if the data is wrong, so that all clients reach the same state
local errorMsg = ""
local inputType = type(filePaths)
if inputType == "string" then
filePaths = {filePaths}
end
local allData = {} ---@type string[]
for _, filePath in ipairs(filePaths) do
local isSuccess, scrambledData = false, nil
if (whichPlayer == GetLocalPlayer()) then
isSuccess, scrambledData = pcall(FileIO.Load, filePath)
end
if not isSuccess then
errorMsg = "Failed reading load file - Unknown file format."
scrambledData = ""
elseif scrambledData == nil then
errorMsg = "Could not load from local disk."
scrambledData = ""
elseif #scrambledData > #Serializer.VERSION and Serializer.VERSION ~= scrambledData:sub(1, #Serializer.VERSION) then
errorMsg = "Unsupported load file version! expected: " .. Serializer.VERSION .. " got: " .. scrambledData:sub(1, #Serializer.VERSION)
scrambledData = ""
elseif #scrambledData < #Serializer.TEXT_PREFIX + 2 then
errorMsg = "Save file corrupted or unrecognised."
scrambledData = ""
else
debugPrint(false, "After load file:", scrambledData:sub(1, 10), "end:", scrambledData:sub(-10))
scrambledData = scrambledData:sub(#Serializer.TEXT_PREFIX + 1, #scrambledData - 1) -- remove prefix and ending newline
debugPrint(false, "After prefix removal:", scrambledData:sub(1, 10), "end:", scrambledData:sub(-10))
end
if whichPlayer ~= GetLocalPlayer() and errorMsg ~= "" then
debugPrint(false, errorMsg)
end
local unscrambled = scrambledData
local loadedVar = ""
if errorMsg == "" then
if unscrambleCallback ~= nil then unscrambled = unscrambleCallback(scrambledData) end
if unscrambled == nil then
errorMsg = "Failed to unscramble data."
unscrambled = ""
else
debugPrint(false, "After unscramble:", unscrambled:sub(1, 10), "end:", unscrambled:sub(-10))
end
if type(unscrambled) ~= "string" then
errorMsg = "Failed to load from file - Unknown file format."
unscrambled = ""
debugPrint(true, "expected loaded data to be a list of strings.")
end
debugPrint(false, "after load variable:", unscrambled:sub(1, 10), "end:", unscrambled:sub(-10))
local rawData, isValid = separateAndValidateChecksum(unscrambled)
debugPrint(false, "after removing checksum:", rawData:sub(1, 10), "end:", rawData:sub(-10))
if isValid then
-- this should return the original variable saved by Serializer.saveFile
loadedVar = Serializer.loadVariable(rawData)
if loadedVar == nil then
debugPrint(true, "Failed to load variable from data1.")
loadedVar = ""
end
else
--tampering detected
if (whichPlayer == GetLocalPlayer()) then
-- data was not synced yet, so we can expect other players to fail here
debugPrint(true, "invalid checksum! bytes=", unpackBytes(rawData:sub(1, 4)), ". calc:", getChecksum(rawData:sub(5)), ". len(rawData) ==", #rawData)
-- for i=1,#rawData do
-- debugPrint(false, tostring(i), ": ", tostring(rawData:byte(i)))
-- end
end
errorMsg = "Tempered file detected - bad checksum."
end
end
table.insert(allData, loadedVar)
end
local packedData = Serializer.dumpVariable(allData)
if packedData == nil then
errorMsg = "Bad data in save file."
packedData = ""
end
debugPrint(false, "After dump all to one:", packedData:sub(1, 10), "end:", packedData:sub(-10))
if isDeflate then
packedData = LibDeflate.CompressDeflate(packedData) or ""
debugPrint(false, "After second inflate:", packedData:sub(1, 10), "end:", packedData:sub(-10))
end
debugPrint(false, "data to sync:", packedData:sub(1, 10), "end:", packedData:sub(-10))
SyncStream.sync(whichPlayer, packedData, function(syncedData)
debugPrint(false, "finished syncing", #syncedData, "bytes of data. syncedData:", syncedData:sub(1, 10), "end:", syncedData:sub(-10))
if syncedData == "" or syncedData == nil then
debugPrint(false, "no data synced!")
return
end
local loadedTables = handleSyncData(syncedData, isDeflate)
if inputType == "string" and loadedTables ~= nil and #loadedTables == 1 then
loadedTables = loadedTables[1]
end
if loadedTables ~= nil then callback(loadedTables) end
end)
return errorMsg
end
end
if Debug then Debug.endFile() end
-isLoadable that will make the file look nicer if you never plan to load it.if Debug then Debug.beginFile("FileIO") end
--[[
Optimized FileIO v1.0.0 by Tomotz
Based on Trokkin's version (https://www.hiveworkshop.com/threads/fileio-lua-optimized.347049/)
- Read and write files with any data.
- This version allows writing/reading any data including characters that the preload natives can't work with, by escaping them.
- Allow Saving with the `-isLoadable` that will make the file look nicer if you never plan to load it.
It's similar to Antares's Stable Lua FileIO, only it allows writing null terminators as well.
API:
FileIO.Save(filename, data, isLoadable?)
- Write string data to a file
---@param isLoadable boolean? -- Use false only if you never plan to load the file to make it format nicer and be more size efficient. Default is true.
FileIO.Load(filename) -> string?
- Read string data from a file. Returns nil if file doesn't exist.
FileIO.SaveAsserted(filename, data, onFail?) -> bool
- Saves the file and checks that it was saved successfully.
If it fails, passes (filename, data, loadResult) to onFail.
FileIO.enabled : bool
- field that indicates that files can be accessed correctly.
Requirements:
StringEscape by Tomotz @ https://www.hiveworkshop.com/threads/optimized-syncstream-and-stringescape.367925/
Total Initialization by Bribe @ https://www.hiveworkshop.com/threads/317099/
Optional requirements:
DebugUtils by Eikonium @ https://www.hiveworkshop.com/threads/330758/
Patch by Tomotz 1 Mar 2025:
- Added escaping to all characters unsupported by FileIO. Those include null terminator (for saving and loading),
and line feed, backslash, closing square bracket.
- Added optional parameter isLoadable to savefile, which defaults to true. If false, the file will have nicer format -
less character are escaped, and there are less extra characters added by FileIO. This also means there is more room for user
data in each Preload call. Such files will not be loadable with FileIO.Load.
--]]
local RAW_PREFIX = ']]i([['
local RAW_SUFFIX = ']])--[['
local MAX_PRELOAD_SIZE = 256
MAX_TEXT_SAVE = MAX_PRELOAD_SIZE
MAX_TEXT_LOAD = MAX_PRELOAD_SIZE - #RAW_PREFIX - #RAW_SUFFIX
local LOAD_ABILITY = FourCC('ANdc')
local LOAD_EMPTY_KEY = '!@#$, empty data'
local name = nil ---@type string?
-- carriage return seems to turn into new line when written and read back
FileIO_unsupportedLoadChars = {0, 10, 13, 92, 93} -- null terminator, line feed, carriage return, slash, closing square bracket
FileIO_unsupportedSaveChars = {0} -- only null terminator is not supported when saving. \ becomed \\ though, so the string becomes longer and we lose the last characters in the Preload
---@param filename any
---@param isLoadable any
local function open(filename, isLoadable)
-- turns out you can't save a file without an extension.
if Debug then Debug.assert(filename:find('.', 1, true), "FileIO: filename must have an extension") end
name = filename
PreloadGenClear()
if isLoadable then
Preload('")\nendfunction\n//!beginusercode\nlocal p={} local i=function(s) table.insert(p,s) end--[[')
end
end
local function write(s, isLoadable)
local maxSize = isLoadable and MAX_TEXT_LOAD or MAX_TEXT_SAVE
local prefix = isLoadable and RAW_PREFIX or ''
local suffix = isLoadable and RAW_SUFFIX or ''
local curPos = 1
while curPos < #s do
local chunk = s:sub(curPos, curPos + maxSize - 1)
local lastChar = #chunk
if not isLoadable then
-- handle \ characters which are escaped as \\ in preload (for loadable files, we replace the \ with unprintable char)
local _, numSlash = chunk:gsub("[\\]", "")
local curLen = lastChar + numSlash -- This is the actuall length the chunk will take in preload
while curLen > maxSize and lastChar > 0 do
local char = chunk:sub(lastChar, lastChar)
if char == '\\' then
curLen = curLen - 1
end
curLen = curLen - 1
lastChar = lastChar - 1
end
end
chunk = chunk:sub(1, lastChar)
Preload(prefix .. chunk .. suffix)
curPos = curPos + #chunk
end
end
local function close(isLoadable)
if isLoadable then
Preload(']]BlzSetAbilityTooltip(' ..
LOAD_ABILITY .. ', table.concat(p), 0)\n//!endusercode\nfunction a takes nothing returns nothing\n//')
end
PreloadGenEnd(name --[[@as string]])
name = nil
end
---@param filename string
---@param data string
---@param isLoadable boolean? -- Use false only if you never plan to load the file. Default is true.
-- This controls which characters are escaped and replaced. For loadable files, we must remove more characters.
local function savefile(filename, data, isLoadable)
if isLoadable == nil then
isLoadable = true
end
local unsupportedChars ---@type integer[]
if isLoadable then
unsupportedChars = FileIO_unsupportedLoadChars
else
unsupportedChars = FileIO_unsupportedSaveChars
end
local data2 = AddEscaping(data, unsupportedChars)
open(filename, isLoadable)
write(data2, isLoadable)
close(isLoadable)
end
---@param filename string
---@return string?
local function loadfile(filename)
local s = BlzGetAbilityTooltip(LOAD_ABILITY, 0)
BlzSetAbilityTooltip(LOAD_ABILITY, LOAD_EMPTY_KEY, 0)
Preloader(filename)
local loaded = BlzGetAbilityTooltip(LOAD_ABILITY, 0)
if loaded == LOAD_EMPTY_KEY then
return nil
end
return RemoveEscaping(loaded, FileIO_unsupportedLoadChars)
end
---@param filename string
---@param data string
---@param onFail nil | fun(filename:string, data:string, loadResult:string?)
---@return boolean
local function saveAsserted(filename, data, onFail)
savefile(filename, data)
local res = loadfile(filename)
if res == data then
return true
end
if onFail then
onFail(filename, data, res)
end
return false
end
FileIO = {
Save = savefile,
Load = loadfile,
SaveAsserted = saveAsserted,
}
OnInit.global(function()
FileIO.enabled = saveAsserted('TestFileIO.pld', 'FileIO is Enabled')
end)
if Debug then Debug.endFile() end
if Debug then Debug.beginFile("LibDeflate") end
--[[
Adaptation of LibDeflate v1.08
(C) ModdieMads @ https://www.hiveworkshop.com/members/moddiemads.310879/
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
LibDeflate
Pure Lua compressor and decompressor with high compression ratio using
DEFLATE/zlib format.
(C) 2018-2021 Haoqian He
zlib License
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source distribution.
License History:
1. GNU General Public License Version 3 in v1.0.0 and earlier versions.
2. GNU Lesser General Public License Version 3 in v1.0.1
3. the zlib License since v1.0.2
]]
do
LibDeflate = {}
-- localize Lua api for faster access.
-- this is highly unecessary, but here we go...
local assert = assert
local error = error
local pairs = pairs
local string_byte = string.byte
local string_char = string.char
local string_sub = string.sub
local table_concat = table.concat
local table_sort = table.sort
local math_fmod = math.fmod
local fmod = function(v,mod) return (math_fmod(v,mod) + .5)//1 end
-- Converts i to 2^i
-- This is used to implement bit left shift and bit right shift.
-- "x << y" in C: "x*_pow2[y]" in Lua
local _pow2 = {}
-- Converts any byte to a character, (0<=byte<=255)
local _byte_to_char = {}
-- _reverseBitsTbl[len][val] stores the bit reverse of
-- the number with bit length "len" and value "val"
-- For example, decimal number 6 with bits length 5 is binary 00110
-- It's reverse is binary 01100,
-- which is decimal 12 and 12 == _reverseBitsTbl[5][6]
-- 1<=len<=9, 0<=val<=2^len-1
-- The reason for 1<=len<=9 is that the max of min bitlen of huffman code
-- of a huffman alphabet is 9?
local _reverse_bits_tbl = {}
-- Convert a LZ77 length (3<=len<=258) to
-- a deflate literal,LZ77_length code (257<=code<=285)
local _length_to_deflate_code = {}
-- convert a LZ77 length (3<=len<=258) to
-- a deflate literal,LZ77_length code extra bits.
local _length_to_deflate_extra_bits = {}
-- Convert a LZ77 length (3<=len<=258) to
-- a deflate literal,LZ77_length code extra bit length.
local _length_to_deflate_extra_bitlen = {}
-- Convert a small LZ77 distance (1<=dist<=256) to a deflate code.
local _dist256_to_deflate_code = {}
-- Convert a small LZ77 distance (1<=dist<=256) to
-- a deflate distance code extra bits.
local _dist256_to_deflate_extra_bits = {}
-- Convert a small LZ77 distance (1<=dist<=256) to
-- a deflate distance code extra bit length.
local _dist256_to_deflate_extra_bitlen = {}
-- Convert a literal,LZ77_length deflate code to LZ77 base length
-- The key of the table is (code - 256), 257<=code<=285
local _literal_deflate_code_to_base_len =
{
3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31, 35, 43, 51, 59, 67,
83, 99, 115, 131, 163, 195, 227, 258
}
-- Convert a literal,LZ77_length deflate code to base LZ77 length extra bits
-- The key of the table is (code - 256), 257<=code<=285
local _literal_deflate_code_to_extra_bitlen =
{
0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5,
5, 5, 5, 0
}
-- Convert a distance deflate code to base LZ77 distance. (0<=code<=29)
local _dist_deflate_code_to_base_dist = {
[0] = 1,
2,
3,
4,
5,
7,
9,
13,
17,
25,
33,
49,
65,
97,
129,
193,
257,
385,
513,
769,
1025,
1537,
2049,
3073,
4097,
6145,
8193,
12289,
16385,
24577
}
-- Convert a distance deflate code to LZ77 bits length. (0<=code<=29)
local _dist_deflate_code_to_extra_bitlen =
{
[0] = 0,
0,
0,
0,
1,
1,
2,
2,
3,
3,
4,
4,
5,
5,
6,
6,
7,
7,
8,
8,
9,
9,
10,
10,
11,
11,
12,
12,
13,
13
}
-- The code order of the first huffman header in the dynamic deflate block.
-- See the page 12 of RFC1951
local _rle_codes_huffman_bitlen_order = {
16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
}
-- The following tables are used by fixed deflate block.
-- The value of these tables are assigned at the bottom of the source.
-- The huffman code of the literal,LZ77_length deflate codes,
-- in fixed deflate block.
local _fix_block_literal_huffman_code
-- Convert huffman code of the literal,LZ77_length to deflate codes,
-- in fixed deflate block.
local _fix_block_literal_huffman_to_deflate_code
-- The bit length of the huffman code of literal,LZ77_length deflate codes,
-- in fixed deflate block.
local _fix_block_literal_huffman_bitlen
-- The count of each bit length of the literal,LZ77_length deflate codes,
-- in fixed deflate block.
local _fix_block_literal_huffman_bitlen_count
-- The huffman code of the distance deflate codes,
-- in fixed deflate block.
local _fix_block_dist_huffman_code
-- Convert huffman code of the distance to deflate codes,
-- in fixed deflate block.
local _fix_block_dist_huffman_to_deflate_code
-- The bit length of the huffman code of the distance deflate codes,
-- in fixed deflate block.
local _fix_block_dist_huffman_bitlen
-- The count of each bit length of the huffman code of
-- the distance deflate codes,
-- in fixed deflate block.
local _fix_block_dist_huffman_bitlen_count
--- Calculate the Adler-32 checksum of the string. <br>
-- definition of Adler-32 checksum.
-- @param str [string] the input string to calcuate its Adler-32 checksum.
-- @return [integer] The Adler-32 checksum, which is greater or equal to 0,
-- and less than 2^32 (4294967296).
function LibDeflate.Adler32(str)
-- This function is loop unrolled by better performance.
--
-- Here is the minimum code:
local strlen = #str
local i = 1
local a = 1
local b = 0
while i <= strlen - 15 do
local x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x16 = string_byte(str, i, i + 15)
b = fmod(b + 16 * a + 16 * x1 + 15 * x2 + 14 * x3 + 13 * x4 + 12 * x5 + 11 * x6 +
10 * x7 + 9 * x8 + 8 * x9 + 7 * x10 + 6 * x11 + 5 * x12 + 4 * x13 + 3 *
x14 + 2 * x15 + x16, 65521)
a = fmod(a + x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 + x10 + x11 + x12 + x13 +
x14 + x15 + x16, 65521)
i = i + 16
end
while (i <= strlen) do
local x = string_byte(str, i, i)
a = fmod((a + x), 65521)
b = fmod((b + a), 65521)
i = i + 1
end
return fmod((b * 65536 + a), 4294967296)
end
-- Compare adler32 checksum.
-- adler32 should be compared with a mod to avoid sign problem
-- 4072834167 (unsigned) is the same adler32 as -222133129
function LibDeflate.IsEqualAdler32(actual, expected)
return fmod(actual, 4294967296) == fmod(expected, 4294967296)
end
local _compression_level_configs = {
[0] = {false, nil, 0, 0, 0}, -- level 0, no compression
[1] = {false, nil, 4, 8, 4}, -- level 1, similar to zlib level 1
[2] = {false, nil, 5, 18, 8}, -- level 2, similar to zlib level 2
[3] = {false, nil, 6, 32, 32}, -- level 3, similar to zlib level 3
[4] = {true, 4, 4, 16, 16}, -- level 4, similar to zlib level 4
[5] = {true, 8, 16, 32, 32}, -- level 5, similar to zlib level 5
[6] = {true, 8, 16, 128, 128}, -- level 6, similar to zlib level 6
[7] = {true, 8, 32, 128, 256}, -- (SLOW) level 7, similar to zlib level 7
[8] = {true, 32, 128, 258, 1024}, -- (SLOW) level 8,similar to zlib level 8
[9] = {true, 32, 258, 258, 4096}
-- (VERY SLOW) level 9, similar to zlib level 9
}
-- partial flush to save memory
local _FLUSH_MODE_MEMORY_CLEANUP = 0
-- full flush with partial bytes
local _FLUSH_MODE_OUTPUT = 1
-- write bytes to get to byte boundary
local _FLUSH_MODE_BYTE_BOUNDARY = 2
-- no flush, just get num of bits written so far
local _FLUSH_MODE_NO_FLUSH = 3
--[[
Create an empty writer to easily write stuffs as the unit of bits.
Return values:
1. WriteBits(code, bitlen):
2. WriteString(str):
3. Flush(mode):
--]]
local function CreateWriter()
local buffer_size = 0
local cache = 0
local cache_bitlen = 0
local total_bitlen = 0
local buffer = {}
-- When buffer is big enough, flush into result_buffer to save memory.
local result_buffer = {}
-- Write bits with value "value" and bit length of "bitlen" into writer.
-- @param value: The value being written
-- @param bitlen: The bit length of "value"
-- @return nil
local function WriteBits(value, bitlen)
cache = cache + value * _pow2[cache_bitlen]
cache_bitlen = cache_bitlen + bitlen
total_bitlen = total_bitlen + bitlen
-- Only bulk to buffer every 4 bytes. This is quicker.
if cache_bitlen >= 16 then
buffer_size = buffer_size + 1
buffer[buffer_size] = _byte_to_char[cache&255] .. _byte_to_char[(((cache - (cache&255))) >> 8)&255]
local rshift_mask = _pow2[16 - cache_bitlen + bitlen]
cache = (value - (value&(rshift_mask-1))) >> (16 - cache_bitlen + bitlen)
cache_bitlen = cache_bitlen - 16
end
end
-- Write the entire string into the writer.
-- @param str The string being written
-- @return nil
local function WriteString(str)
for _ = 1, cache_bitlen, 8 do
buffer_size = buffer_size + 1
buffer[buffer_size] = string_char((cache&255))
cache = (cache - (cache&255)) >> 8
end
cache_bitlen = 0
buffer_size = buffer_size + 1
buffer[buffer_size] = str
total_bitlen = total_bitlen + #str * 8
end
-- Flush current stuffs in the writer and return it.
-- This operation will free most of the memory.
-- @param mode See the descrtion of the constant and the source code.
-- @return The total number of bits stored in the writer right now.
-- for byte boundary mode, it includes the padding bits.
-- for output mode, it does not include padding bits.
-- @return Return the outputs if mode is output.
local function FlushWriter(mode)
if mode == _FLUSH_MODE_NO_FLUSH then return total_bitlen end
if mode == _FLUSH_MODE_OUTPUT or mode == _FLUSH_MODE_BYTE_BOUNDARY then
-- Full flush, also output cache.
-- Need to pad some bits if cache_bitlen is not multiple of 8.
local padding_bitlen = ((8 - (cache_bitlen&7))&7)
if cache_bitlen > 0 then
-- padding with all 1 bits, mainly because "\000" is not
-- good to be tranmitted. I do this so "\000" is a little bit
-- less frequent.
cache = cache - _pow2[cache_bitlen] + _pow2[cache_bitlen + padding_bitlen]
for _ = 1, cache_bitlen, 8 do
buffer_size = buffer_size + 1
buffer[buffer_size] = _byte_to_char[(cache&255)]
cache = (cache - (cache&255)) >> 8
end
cache = 0
cache_bitlen = 0
end
if mode == _FLUSH_MODE_BYTE_BOUNDARY then
total_bitlen = total_bitlen + padding_bitlen
return total_bitlen
end
end
local flushed = table_concat(buffer)
buffer = {}
buffer_size = 0
result_buffer[#result_buffer + 1] = flushed
if mode == _FLUSH_MODE_MEMORY_CLEANUP then
return total_bitlen
else
return total_bitlen, table_concat(result_buffer)
end
end
return WriteBits, WriteString, FlushWriter
end
-- Push an element into a max heap
-- @param heap A max heap whose max element is at index 1.
-- @param e The element to be pushed. Assume element "e" is a table
-- and comparison is done via its first entry e[1]
-- @param heap_size current number of elements in the heap.
-- NOTE: There may be some garbage stored in
-- heap[heap_size+1], heap[heap_size+2], etc..
-- @return nil
local function MinHeapPush(heap, e, heap_size)
heap_size = heap_size + 1
heap[heap_size] = e
local value = e[1]
local pos = heap_size
local parent_pos = (pos - (pos&1)) >> 1
while (parent_pos >= 1 and heap[parent_pos][1] > value) do
local t = heap[parent_pos]
heap[parent_pos] = e
heap[pos] = t
pos = parent_pos
parent_pos = (parent_pos - (parent_pos&1)) >> 1
end
end
-- Pop an element from a max heap
-- @param heap A max heap whose max element is at index 1.
-- @param heap_size current number of elements in the heap.
-- @return the poped element
-- Note: This function does not change table size of "heap" to save CPU time.
local function MinHeapPop(heap, heap_size)
local top = heap[1]
local e = heap[heap_size]
local value = e[1]
heap[1] = e
heap[heap_size] = top
heap_size = heap_size - 1
local pos = 1
local left_child_pos = pos * 2
local right_child_pos = left_child_pos + 1
while (left_child_pos <= heap_size) do
local left_child = heap[left_child_pos]
if (right_child_pos <= heap_size and heap[right_child_pos][1] < left_child[1]) then
local right_child = heap[right_child_pos]
if right_child[1] < value then
heap[right_child_pos] = e
heap[pos] = right_child
pos = right_child_pos
left_child_pos = pos * 2
right_child_pos = left_child_pos + 1
else
break
end
else
if left_child[1] < value then
heap[left_child_pos] = e
heap[pos] = left_child
pos = left_child_pos
left_child_pos = pos * 2
right_child_pos = left_child_pos + 1
else
break
end
end
end
return top
end
-- Deflate defines a special huffman tree, which is unique once the bit length
-- of huffman code of all symbols are known.
-- @param bitlen_count Number of symbols with a specific bitlen
-- @param symbol_bitlen The bit length of a symbol
-- @param max_symbol The max symbol among all symbols,
-- which is (number of symbols - 1)
-- @param max_bitlen The max huffman bit length among all symbols.
-- @return The huffman code of all symbols.
local function GetHuffmanCodeFromBitlen(bitlen_counts, symbol_bitlens, max_symbol, max_bitlen)
local huffman_code = 0
local next_codes = {}
local symbol_huffman_codes = {}
for bitlen = 1, max_bitlen do
huffman_code = (huffman_code + (bitlen_counts[bitlen - 1] or 0)) * 2
next_codes[bitlen] = huffman_code
end
for symbol = 0, max_symbol do
local bitlen = symbol_bitlens[symbol]
if bitlen then
huffman_code = next_codes[bitlen]
next_codes[bitlen] = huffman_code + 1
-- Reverse the bits of huffman code,
-- because most signifant bits of huffman code
-- is stored first into the compressed data.
-- @see RFC1951 Page5 Section 3.1.1
if bitlen <= 9 then -- Have cached reverse for small bitlen.
symbol_huffman_codes[symbol] = _reverse_bits_tbl[bitlen][huffman_code]
else
local reverse = 0
for _ = 1, bitlen do
reverse = reverse - (reverse&1) + ((((reverse&1) == 1) or ((huffman_code&1)) == 1) and 1 or 0)
huffman_code = (huffman_code - (huffman_code&1)) >> 1
reverse = reverse * 2
end
symbol_huffman_codes[symbol] = (reverse - (reverse&1)) >> 1
end
end
end
return symbol_huffman_codes
end
-- A helper function to sort heap elements
-- a[1], b[1] is the huffman frequency
-- a[2], b[2] is the symbol value.
local function SortByFirstThenSecond(a, b)
return a[1] < b[1] or (a[1] == b[1] and a[2] < b[2])
end
-- Calculate the huffman bit length and huffman code.
-- @param symbol_count: A table whose table key is the symbol, and table value
-- is the symbol frenquency (nil means 0 frequency).
-- @param max_bitlen: See description of return value.
-- @param max_symbol: The maximum symbol
-- @return a table whose key is the symbol, and the value is the huffman bit
-- bit length. We guarantee that all bit length <= max_bitlen.
-- For 0<=symbol<=max_symbol, table value could be nil if the frequency
-- of the symbol is 0 or nil.
-- @return a table whose key is the symbol, and the value is the huffman code.
-- @return a number indicating the maximum symbol whose bitlen is not 0.
local function GetHuffmanBitlenAndCode(symbol_counts, max_bitlen, max_symbol)
local heap_size
local max_non_zero_bitlen_symbol = -1
local leafs = {}
local heap = {}
local symbol_bitlens = {}
local symbol_codes = {}
local bitlen_counts = {}
--[[
tree[1]: weight, temporarily used as parent and bitLengths
tree[2]: symbol
tree[3]: left child
tree[4]: right child
--]]
local number_unique_symbols = 0
for symbol, count in pairs(symbol_counts) do
number_unique_symbols = number_unique_symbols + 1
leafs[number_unique_symbols] = {count, symbol}
end
if (number_unique_symbols == 0) then
-- no code.
return {}, {}, -1
elseif (number_unique_symbols == 1) then
-- Only one code. In this case, its huffman code
-- needs to be assigned as 0, and bit length is 1.
-- This is the only case that the return result
-- represents an imcomplete huffman tree.
local symbol = leafs[1][2]
symbol_bitlens[symbol] = 1
symbol_codes[symbol] = 0
return symbol_bitlens, symbol_codes, symbol
else
table_sort(leafs, SortByFirstThenSecond)
heap_size = number_unique_symbols
for i = 1, heap_size do heap[i] = leafs[i] end
while (heap_size > 1) do
-- Note: pop does not change table size of heap
local leftChild = MinHeapPop(heap, heap_size)
heap_size = heap_size - 1
local rightChild = MinHeapPop(heap, heap_size)
heap_size = heap_size - 1
local newNode = {leftChild[1] + rightChild[1], -1, leftChild, rightChild}
MinHeapPush(heap, newNode, heap_size)
heap_size = heap_size + 1
end
-- Number of leafs whose bit length is greater than max_len.
local number_bitlen_overflow = 0
-- Calculate bit length of all nodes
local fifo = {heap[1], 0, 0, 0} -- preallocate some spaces.
local fifo_size = 1
local index = 1
heap[1][1] = 0
while (index <= fifo_size) do -- Breath first search
local e = fifo[index]
local bitlen = e[1]
local symbol = e[2]
local left_child = e[3]
local right_child = e[4]
if left_child then
fifo_size = fifo_size + 1
fifo[fifo_size] = left_child
left_child[1] = bitlen + 1
end
if right_child then
fifo_size = fifo_size + 1
fifo[fifo_size] = right_child
right_child[1] = bitlen + 1
end
index = index + 1
if (bitlen > max_bitlen) then
number_bitlen_overflow = number_bitlen_overflow + 1
bitlen = max_bitlen
end
if symbol >= 0 then
symbol_bitlens[symbol] = bitlen
max_non_zero_bitlen_symbol = (symbol > max_non_zero_bitlen_symbol) and symbol or max_non_zero_bitlen_symbol
bitlen_counts[bitlen] = (bitlen_counts[bitlen] or 0) + 1
end
end
-- Resolve bit length overflow
-- @see ZLib/trees.c:gen_bitlen(s, desc), for reference
if (number_bitlen_overflow > 0) then
repeat
local bitlen = max_bitlen - 1
while ((bitlen_counts[bitlen] or 0) == 0) do bitlen = bitlen - 1 end
-- move one leaf down the tree
bitlen_counts[bitlen] = bitlen_counts[bitlen] - 1
-- move one overflow item as its brother
bitlen_counts[bitlen + 1] = (bitlen_counts[bitlen + 1] or 0) + 2
bitlen_counts[max_bitlen] = bitlen_counts[max_bitlen] - 1
number_bitlen_overflow = number_bitlen_overflow - 2
until (number_bitlen_overflow <= 0)
index = 1
for bitlen = max_bitlen, 1, -1 do
local n = bitlen_counts[bitlen] or 0
while (n > 0) do
local symbol = leafs[index][2]
symbol_bitlens[symbol] = bitlen
n = n - 1
index = index + 1
end
end
end
symbol_codes = GetHuffmanCodeFromBitlen(bitlen_counts, symbol_bitlens, max_symbol, max_bitlen)
return symbol_bitlens, symbol_codes, max_non_zero_bitlen_symbol
end
end
-- Calculate the first huffman header in the dynamic huffman block
-- @see RFC1951 Page 12
-- @param lcode_bitlen: The huffman bit length of literal,LZ77_length.
-- @param max_non_zero_bitlen_lcode: The maximum literal,LZ77_length symbol
-- whose huffman bit length is not zero.
-- @param dcode_bitlen: The huffman bit length of LZ77 distance.
-- @param max_non_zero_bitlen_dcode: The maximum LZ77 distance symbol
-- whose huffman bit length is not zero.
-- @return The run length encoded codes.
-- @return The extra bits. One entry for each rle code that needs extra bits.
-- (code == 16 or 17 or 18).
-- @return The count of appearance of each rle codes.
local function RunLengthEncodeHuffmanBitlen(lcode_bitlens, max_non_zero_bitlen_lcode, dcode_bitlens, max_non_zero_bitlen_dcode)
local rle_code_tblsize = 0
local rle_codes = {}
local rle_code_counts = {}
local rle_extra_bits_tblsize = 0
local rle_extra_bits = {}
local prev = nil
local count = 0
-- If there is no distance code, assume one distance code of bit length 0.
-- RFC1951: One distance code of zero bits means that
-- there are no distance codes used at all (the data is all literals).
max_non_zero_bitlen_dcode = (max_non_zero_bitlen_dcode < 0) and 0 or max_non_zero_bitlen_dcode
local max_code = max_non_zero_bitlen_lcode + max_non_zero_bitlen_dcode + 1
for code = 0, max_code + 1 do
local len = (code <= max_non_zero_bitlen_lcode) and
(lcode_bitlens[code] or 0) or ((code <= max_code) and
(dcode_bitlens[code - max_non_zero_bitlen_lcode - 1] or 0) or
nil)
if len == prev then
count = count + 1
if len ~= 0 and count == 6 then
rle_code_tblsize = rle_code_tblsize + 1
rle_codes[rle_code_tblsize] = 16
rle_extra_bits_tblsize = rle_extra_bits_tblsize + 1
rle_extra_bits[rle_extra_bits_tblsize] = 3
rle_code_counts[16] = (rle_code_counts[16] or 0) + 1
count = 0
elseif len == 0 and count == 138 then
rle_code_tblsize = rle_code_tblsize + 1
rle_codes[rle_code_tblsize] = 18
rle_extra_bits_tblsize = rle_extra_bits_tblsize + 1
rle_extra_bits[rle_extra_bits_tblsize] = 127
rle_code_counts[18] = (rle_code_counts[18] or 0) + 1
count = 0
end
else
if count == 1 then
rle_code_tblsize = rle_code_tblsize + 1
rle_codes[rle_code_tblsize] = prev
rle_code_counts[prev] = (rle_code_counts[prev] or 0) + 1
elseif count == 2 then
rle_code_tblsize = rle_code_tblsize + 1
rle_codes[rle_code_tblsize] = prev
rle_code_tblsize = rle_code_tblsize + 1
rle_codes[rle_code_tblsize] = prev
rle_code_counts[prev] = (rle_code_counts[prev] or 0) + 2
elseif count >= 3 then
rle_code_tblsize = rle_code_tblsize + 1
local rleCode = (prev ~= 0) and 16 or (count <= 10 and 17 or 18)
rle_codes[rle_code_tblsize] = rleCode
rle_code_counts[rleCode] = (rle_code_counts[rleCode] or 0) + 1
rle_extra_bits_tblsize = rle_extra_bits_tblsize + 1
rle_extra_bits[rle_extra_bits_tblsize] =
(count <= 10) and (count - 3) or (count - 11)
end
prev = len
if len and len ~= 0 then
rle_code_tblsize = rle_code_tblsize + 1
rle_codes[rle_code_tblsize] = len
rle_code_counts[len] = (rle_code_counts[len] or 0) + 1
count = 0
else
count = 1
end
end
end
return rle_codes, rle_extra_bits, rle_code_counts
end
-- Load the string into a table, in order to speed up LZ77.
-- Loop unrolled 16 times to speed this function up.
-- @param str The string to be loaded.
-- @param t The load destination
-- @param start str[index] will be the first character to be loaded.
-- @param end str[index] will be the last character to be loaded
-- @param offset str[index] will be loaded into t[index-offset]
-- @return t
local function LoadStringToTable(str, t, start, stop, offset)
local i = start - offset
while i <= stop - 15 - offset do
t[i], t[i + 1], t[i + 2], t[i + 3], t[i + 4], t[i + 5], t[i + 6], t[i + 7], t[i + 8], t[i + 9], t[i + 10],
t[i + 11], t[i + 12], t[i + 13], t[i + 14], t[i +15] = string_byte(str, i + offset, i + 15 + offset)
i = i + 16
end
while (i <= stop - offset) do
t[i] = string_byte(str, i + offset, i + offset)
i = i + 1
end
return t
end
-- Do LZ77 process. This function uses the majority of the CPU time.
-- @see zlib/deflate.c:deflate_fast(), zlib/deflate.c:deflate_slow()
-- This function uses the algorithms used above. You should read the
-- algorithm.txt above to understand what is the hash function and the
-- lazy evaluation.
--
-- The special optimization used here is hash functions used here.
-- The hash function is just the multiplication of the three consective
-- characters. So if the hash matches, it guarantees 3 characters are matched.
-- This optimization can be implemented because Lua table is a hash table.
--
-- @param level integer that describes compression level.
-- @param string_table table that stores the value of string to be compressed.
-- The index of this table starts from 1.
-- The caller needs to make sure all values needed by this function
-- are loaded.
-- Assume "str" is the origin input string into the compressor
-- str[block_start]..str[block_end+3] needs to be loaded into
-- string_table[block_start-offset]..string_table[block_end-offset]
-- If dictionary is presented, the last 258 bytes of the dictionary
-- needs to be loaded into sing_table[-257..0]
-- (See more in the description of offset.)
-- @param hash_tables. The table key is the hash value (0<=hash<=16777216=256^3)
-- The table value is an array0 that stores the indexes of the
-- input data string to be compressed, such that
-- hash == str[index]*str[index+1]*str[index+2]
-- Indexes are ordered in this array.
-- @param block_start The indexes of the input data string to be compressed.
-- that starts the LZ77 block.
-- @param block_end The indexes of the input data string to be compressed.
-- that stores the LZ77 block.
-- @param offset str[index] is stored in string_table[index-offset],
-- This offset is mainly an optimization to limit the index
-- of string_table, so lua can access this table quicker.
-- @param dictionary See LibDeflate:CreateDictionary
-- @return literal,LZ77_length deflate codes.
-- @return the extra bits of literal,LZ77_length deflate codes.
-- @return the count of each literal,LZ77 deflate code.
-- @return LZ77 distance deflate codes.
-- @return the extra bits of LZ77 distance deflate codes.
-- @return the count of each LZ77 distance deflate code.
local function GetBlockLZ77Result(level, string_table, hash_tables, block_start, block_end, offset)--, dictionary)
local config = _compression_level_configs[level]
local config_use_lazy, config_good_prev_length, config_max_lazy_match,
config_nice_length, config_max_hash_chain = config[1], config[2], config[3], config[4], config[5]
local config_max_insert_length = (not config_use_lazy) and config_max_lazy_match or 2147483646
local config_good_hash_chain = (config_max_hash_chain - ((config_max_hash_chain&3) >> 2))
local hash
local dict_hash_tables
local dict_string_table
local dict_string_len_plus3 = 3
hash = (string_table[block_start - offset] or 0) * 256 + (string_table[block_start + 1 - offset] or 0)
local lcodes = {}
local lcode_tblsize = 0
local lcodes_counts = {}
local dcodes = {}
local dcodes_tblsize = 0
local dcodes_counts = {}
local lextra_bits = {}
local lextra_bits_tblsize = 0
local dextra_bits = {}
local dextra_bits_tblsize = 0
local match_available = false
local prev_len
local prev_dist
local cur_len = 0
local cur_dist = 0
local index = block_start
local index_end = block_end + (config_use_lazy and 1 or 0)
-- the zlib source code writes separate code for lazy evaluation and
-- not lazy evaluation, which is easier to understand.
-- I put them together, so it is a bit harder to understand.
-- because I think this is easier for me to maintain it.
while (index <= index_end) do
local string_table_index = index - offset
local offset_minus_three = offset - 3
prev_len = cur_len
prev_dist = cur_dist
cur_len = 0
hash = (hash * 256 + (string_table[string_table_index + 2] or 0)) & 16777215
local chain_index
local cur_chain
local hash_chain = hash_tables[hash]
local chain_old_size
if not hash_chain then
chain_old_size = 0
hash_chain = {}
hash_tables[hash] = hash_chain
if dict_hash_tables then
cur_chain = dict_hash_tables[hash]
chain_index = cur_chain and #cur_chain or 0
else
chain_index = 0
end
else
chain_old_size = #hash_chain
cur_chain = hash_chain
chain_index = chain_old_size
end
if index <= block_end then hash_chain[chain_old_size + 1] = index end
if (chain_index > 0 and index + 2 <= block_end and (not config_use_lazy or prev_len < config_max_lazy_match)) then
local depth = (config_use_lazy and prev_len >= config_good_prev_length) and config_good_hash_chain or config_max_hash_chain
local max_len_minus_one = block_end - index
max_len_minus_one = (max_len_minus_one >= 257) and 257 or max_len_minus_one
max_len_minus_one = max_len_minus_one + string_table_index
local string_table_index_plus_three = string_table_index + 3
while chain_index >= 1 and depth > 0 do
local prev = cur_chain[chain_index]
if index - prev > 32768 then break end
if prev < index then
local sj = string_table_index_plus_three
if prev >= -257 then
local pj = prev - offset_minus_three
while (sj <= max_len_minus_one and string_table[pj] == string_table[sj]) do
sj = sj + 1
pj = pj + 1
end
else
local pj = dict_string_len_plus3 + prev
end
local j = sj - string_table_index
if j > cur_len then
cur_len = j
cur_dist = index - prev
end
if cur_len >= config_nice_length then break end
end
chain_index = chain_index - 1
depth = depth - 1
if chain_index == 0 and prev > 0 and dict_hash_tables then
cur_chain = dict_hash_tables[hash]
chain_index = cur_chain and #cur_chain or 0
end
end
end
if not config_use_lazy then prev_len, prev_dist = cur_len, cur_dist end
if ((not config_use_lazy or match_available) and (prev_len > 3 or (prev_len == 3 and prev_dist < 4096)) and cur_len <= prev_len) then
local code = _length_to_deflate_code[prev_len]
local length_extra_bits_bitlen = _length_to_deflate_extra_bitlen[prev_len]
local dist_code, dist_extra_bits_bitlen, dist_extra_bits
if prev_dist <= 256 then -- have cached code for small distance.
dist_code = _dist256_to_deflate_code[prev_dist]
dist_extra_bits = _dist256_to_deflate_extra_bits[prev_dist]
dist_extra_bits_bitlen = _dist256_to_deflate_extra_bitlen[prev_dist]
else
dist_code = 16
dist_extra_bits_bitlen = 7
local a = 384
local b = 512
while true do
if prev_dist <= a then
dist_extra_bits = (prev_dist - (b >> 1) - 1) & ((b >> 2)-1)
break
elseif prev_dist <= b then
dist_extra_bits = (prev_dist - (b >> 1) - 1) & ((b >> 2)-1)
dist_code = dist_code + 1
break
else
dist_code = dist_code + 2
dist_extra_bits_bitlen = dist_extra_bits_bitlen + 1
a = a * 2
b = b * 2
end
end
end
lcode_tblsize = lcode_tblsize + 1
lcodes[lcode_tblsize] = code
lcodes_counts[code] = (lcodes_counts[code] or 0) + 1
dcodes_tblsize = dcodes_tblsize + 1
dcodes[dcodes_tblsize] = dist_code
dcodes_counts[dist_code] = (dcodes_counts[dist_code] or 0) + 1
if length_extra_bits_bitlen > 0 then
local lenExtraBits = _length_to_deflate_extra_bits[prev_len]
lextra_bits_tblsize = lextra_bits_tblsize + 1
lextra_bits[lextra_bits_tblsize] = lenExtraBits
end
if dist_extra_bits_bitlen > 0 then
dextra_bits_tblsize = dextra_bits_tblsize + 1
dextra_bits[dextra_bits_tblsize] = dist_extra_bits
end
for i = index + 1, index + prev_len - (config_use_lazy and 2 or 1) do
hash = (hash * 256 + (string_table[i - offset + 2] or 0)) & 16777215
if prev_len <= config_max_insert_length then
hash_chain = hash_tables[hash]
if not hash_chain then
hash_chain = {}
hash_tables[hash] = hash_chain
end
hash_chain[#hash_chain + 1] = i
end
end
index = index + prev_len - (config_use_lazy and 1 or 0)
match_available = false
elseif (not config_use_lazy) or match_available then
local code = string_table[config_use_lazy and (string_table_index - 1) or string_table_index]
lcode_tblsize = lcode_tblsize + 1
lcodes[lcode_tblsize] = code
lcodes_counts[code] = (lcodes_counts[code] or 0) + 1
index = index + 1
else
match_available = true
index = index + 1
end
end
-- Write "end of block" symbol
lcode_tblsize = lcode_tblsize + 1
lcodes[lcode_tblsize] = 256
lcodes_counts[256] = (lcodes_counts[256] or 0) + 1
return lcodes, lextra_bits, lcodes_counts, dcodes, dextra_bits, dcodes_counts
end
-- Get the header data of dynamic block.
-- @param lcodes_count The count of each literal,LZ77_length codes.
-- @param dcodes_count The count of each Lz77 distance codes.
-- @return a lots of stuffs.
-- @see RFC1951 Page 12
local function GetBlockDynamicHuffmanHeader(lcodes_counts, dcodes_counts)
local lcodes_huffman_bitlens, lcodes_huffman_codes, max_non_zero_bitlen_lcode = GetHuffmanBitlenAndCode(lcodes_counts, 15, 285)
local dcodes_huffman_bitlens, dcodes_huffman_codes, max_non_zero_bitlen_dcode = GetHuffmanBitlenAndCode(dcodes_counts, 15, 29)
local rle_deflate_codes, rle_extra_bits, rle_codes_counts =
RunLengthEncodeHuffmanBitlen(lcodes_huffman_bitlens,
max_non_zero_bitlen_lcode,
dcodes_huffman_bitlens,
max_non_zero_bitlen_dcode)
local rle_codes_huffman_bitlens, rle_codes_huffman_codes = GetHuffmanBitlenAndCode(rle_codes_counts, 7, 18)
local HCLEN = 0
for i = 1, 19 do
local symbol = _rle_codes_huffman_bitlen_order[i]
local length = rle_codes_huffman_bitlens[symbol] or 0
if length ~= 0 then HCLEN = i end
end
HCLEN = HCLEN - 4
local HLIT = max_non_zero_bitlen_lcode + 1 - 257
local HDIST = max_non_zero_bitlen_dcode + 1 - 1
if HDIST < 0 then HDIST = 0 end
return HLIT, HDIST, HCLEN, rle_codes_huffman_bitlens, rle_codes_huffman_codes,
rle_deflate_codes, rle_extra_bits, lcodes_huffman_bitlens,
lcodes_huffman_codes, dcodes_huffman_bitlens, dcodes_huffman_codes
end
-- Get the size of dynamic block without writing any bits into the writer.
-- @param ... Read the source code of GetBlockDynamicHuffmanHeader()
-- @return the bit length of the dynamic block
local function GetDynamicHuffmanBlockSize(lcodes, dcodes, HCLEN,
rle_codes_huffman_bitlens,
rle_deflate_codes,
lcodes_huffman_bitlens,
dcodes_huffman_bitlens)
local block_bitlen = 17 -- 1+2+5+5+4
block_bitlen = block_bitlen + (HCLEN + 4) * 3
for i = 1, #rle_deflate_codes do
local code = rle_deflate_codes[i]
block_bitlen = block_bitlen + rle_codes_huffman_bitlens[code]
if code >= 16 then
block_bitlen = block_bitlen + ((code == 16) and 2 or (code == 17 and 3 or 7))
end
end
local length_code_count = 0
for i = 1, #lcodes do
local code = lcodes[i]
local huffman_bitlen = lcodes_huffman_bitlens[code]
block_bitlen = block_bitlen + huffman_bitlen
if code > 256 then -- Length code
length_code_count = length_code_count + 1
if code > 264 and code < 285 then -- Length code with extra bits
local extra_bits_bitlen = _literal_deflate_code_to_extra_bitlen[code - 256]
block_bitlen = block_bitlen + extra_bits_bitlen
end
local dist_code = dcodes[length_code_count]
local dist_huffman_bitlen = dcodes_huffman_bitlens[dist_code]
block_bitlen = block_bitlen + dist_huffman_bitlen
if dist_code > 3 then -- dist code with extra bits
local dist_extra_bits_bitlen = ((dist_code - (dist_code&1)) >> 1) - 1
block_bitlen = block_bitlen + dist_extra_bits_bitlen
end
end
end
return block_bitlen
end
-- Write dynamic block.
-- @param ... Read the source code of GetBlockDynamicHuffmanHeader()
local function CompressDynamicHuffmanBlock(WriteBits, is_last_block, lcodes,
lextra_bits, dcodes, dextra_bits,
HLIT, HDIST, HCLEN,
rle_codes_huffman_bitlens,
rle_codes_huffman_codes,
rle_deflate_codes, rle_extra_bits,
lcodes_huffman_bitlens,
lcodes_huffman_codes,
dcodes_huffman_bitlens,
dcodes_huffman_codes)
WriteBits(is_last_block and 1 or 0, 1) -- Last block identifier
WriteBits(2, 2) -- Dynamic Huffman block identifier
WriteBits(HLIT, 5)
WriteBits(HDIST, 5)
WriteBits(HCLEN, 4)
for i = 1, HCLEN + 4 do
local symbol = _rle_codes_huffman_bitlen_order[i]
local length = rle_codes_huffman_bitlens[symbol] or 0
WriteBits(length, 3)
end
local rleExtraBitsIndex = 1
for i = 1, #rle_deflate_codes do
local code = rle_deflate_codes[i]
WriteBits(rle_codes_huffman_codes[code], rle_codes_huffman_bitlens[code])
if code >= 16 then
local extraBits = rle_extra_bits[rleExtraBitsIndex]
WriteBits(extraBits, (code == 16) and 2 or (code == 17 and 3 or 7))
rleExtraBitsIndex = rleExtraBitsIndex + 1
end
end
local length_code_count = 0
local length_code_with_extra_count = 0
local dist_code_with_extra_count = 0
for i = 1, #lcodes do
local deflate_codee = lcodes[i]
local huffman_code = lcodes_huffman_codes[deflate_codee]
local huffman_bitlen = lcodes_huffman_bitlens[deflate_codee]
WriteBits(huffman_code, huffman_bitlen)
if deflate_codee > 256 then -- Length code
length_code_count = length_code_count + 1
if deflate_codee > 264 and deflate_codee < 285 then
-- Length code with extra bits
length_code_with_extra_count = length_code_with_extra_count + 1
local extra_bits = lextra_bits[length_code_with_extra_count]
local extra_bits_bitlen = _literal_deflate_code_to_extra_bitlen[deflate_codee - 256]
WriteBits(extra_bits, extra_bits_bitlen)
end
-- Write distance code
local dist_deflate_code = dcodes[length_code_count]
local dist_huffman_code = dcodes_huffman_codes[dist_deflate_code]
local dist_huffman_bitlen = dcodes_huffman_bitlens[dist_deflate_code]
WriteBits(dist_huffman_code, dist_huffman_bitlen)
if dist_deflate_code > 3 then -- dist code with extra bits
dist_code_with_extra_count = dist_code_with_extra_count + 1
local dist_extra_bits = dextra_bits[dist_code_with_extra_count]
local dist_extra_bits_bitlen = ((dist_deflate_code - (dist_deflate_code&1)) >> 1) - 1
WriteBits(dist_extra_bits, dist_extra_bits_bitlen)
end
end
end
end
-- Get the size of fixed block without writing any bits into the writer.
-- @param lcodes literal,LZ77_length deflate codes
-- @param decodes LZ77 distance deflate codes
-- @return the bit length of the fixed block
local function GetFixedHuffmanBlockSize(lcodes, dcodes)
local block_bitlen = 3
local length_code_count = 0
for i = 1, #lcodes do
local code = lcodes[i]
local huffman_bitlen = _fix_block_literal_huffman_bitlen[code]
block_bitlen = block_bitlen + huffman_bitlen
if code > 256 then -- Length code
length_code_count = length_code_count + 1
if code > 264 and code < 285 then -- Length code with extra bits
local extra_bits_bitlen = _literal_deflate_code_to_extra_bitlen[code - 256]
block_bitlen = block_bitlen + extra_bits_bitlen
end
local dist_code = dcodes[length_code_count]
block_bitlen = block_bitlen + 5
if dist_code > 3 then -- dist code with extra bits
local dist_extra_bits_bitlen = ((dist_code - (dist_code&1)) >> 1) - 1
block_bitlen = block_bitlen + dist_extra_bits_bitlen
end
end
end
return block_bitlen
end
-- Get the size of store block without writing any bits into the writer.
-- @param block_start The start index of the origin input string
-- @param block_end The end index of the origin input string
-- @param Total bit lens had been written into the compressed result before,
-- because store block needs to shift to byte boundary.
-- @return the bit length of the fixed block
local function GetStoreBlockSize(block_start, block_end, total_bitlen)
assert(block_end - block_start + 1 <= 65535)
local block_bitlen = 3
total_bitlen = total_bitlen + 3
local padding_bitlen = ((8 - (total_bitlen&7))&7)
block_bitlen = block_bitlen + padding_bitlen
block_bitlen = block_bitlen + 16
block_bitlen = block_bitlen + (block_end - block_start + 1) * 8
return block_bitlen
end
-- Do the deflate
-- Currently using a simple way to determine the block size
-- (This is why the compression ratio is little bit worse than zlib when
-- the input size is very large
-- The first block is 64KB, the following block is 32KB.
-- After each block, there is a memory cleanup operation.
-- This is not a fast operation, but it is needed to save memory usage, so
-- the memory usage does not grow unboundly. If the data size is less than
-- 64KB, then memory cleanup won't happen.
-- This function determines whether to use store,fixed,dynamic blocks by
-- calculating the block size of each block type and chooses the smallest one.
local function Deflate( WriteBits, WriteString, FlushWriter, str)
local string_table = {}
local hash_tables = {}
local is_last_block = nil
local block_start
local block_end
local bitlen_written
local total_bitlen = FlushWriter(_FLUSH_MODE_NO_FLUSH)
local strlen = #str
local offset
local level = strlen < 2048 and 7 or 3
while not is_last_block do
if not block_start then
block_start = 1
block_end = 32 * 1024 - 1
offset = 0
else
block_start = block_end + 1
block_end = block_end + 16 * 1024
offset = block_start - 16 * 1024 - 1
end
if block_end >= strlen then
block_end = strlen
is_last_block = true
else
is_last_block = false
end
local lcodes, lextra_bits, lcodes_counts, dcodes, dextra_bits, dcodes_counts
local HLIT, HDIST, HCLEN, rle_codes_huffman_bitlens,
rle_codes_huffman_codes, rle_deflate_codes, rle_extra_bits,
lcodes_huffman_bitlens, lcodes_huffman_codes, dcodes_huffman_bitlens,
dcodes_huffman_codes
local dynamic_block_bitlen
local fixed_block_bitlen
local store_block_bitlen
if level ~= 0 then
-- GetBlockLZ77 needs block_start to block_end+3 to be loaded.
LoadStringToTable(str, string_table, block_start, block_end + 3, offset)
lcodes, lextra_bits, lcodes_counts, dcodes, dextra_bits, dcodes_counts =
GetBlockLZ77Result(level, string_table, hash_tables, block_start, block_end, offset)
-- LuaFormatter off
HLIT, HDIST, HCLEN, rle_codes_huffman_bitlens, rle_codes_huffman_codes, rle_deflate_codes,
rle_extra_bits, lcodes_huffman_bitlens, lcodes_huffman_codes, dcodes_huffman_bitlens, dcodes_huffman_codes
= GetBlockDynamicHuffmanHeader(lcodes_counts, dcodes_counts)
dynamic_block_bitlen = GetDynamicHuffmanBlockSize(lcodes, dcodes, HCLEN,
rle_codes_huffman_bitlens,
rle_deflate_codes,
lcodes_huffman_bitlens,
dcodes_huffman_bitlens)
fixed_block_bitlen = GetFixedHuffmanBlockSize(lcodes, dcodes)
end
store_block_bitlen = GetStoreBlockSize(block_start, block_end, total_bitlen)
local min_bitlen = store_block_bitlen
min_bitlen = (fixed_block_bitlen and fixed_block_bitlen < min_bitlen) and fixed_block_bitlen or min_bitlen
min_bitlen = (dynamic_block_bitlen and dynamic_block_bitlen < min_bitlen) and dynamic_block_bitlen or min_bitlen
CompressDynamicHuffmanBlock(WriteBits, is_last_block, lcodes, lextra_bits,
dcodes, dextra_bits, HLIT, HDIST, HCLEN,
rle_codes_huffman_bitlens,
rle_codes_huffman_codes, rle_deflate_codes,
rle_extra_bits, lcodes_huffman_bitlens,
lcodes_huffman_codes, dcodes_huffman_bitlens,
dcodes_huffman_codes)
total_bitlen = total_bitlen + dynamic_block_bitlen
if is_last_block then
bitlen_written = FlushWriter(_FLUSH_MODE_NO_FLUSH)
else
bitlen_written = FlushWriter(_FLUSH_MODE_MEMORY_CLEANUP)
end
assert(bitlen_written == total_bitlen)
-- Memory clean up, so memory consumption does not always grow linearly,
-- even if input string is > 64K.
-- Not a very efficient operation, but this operation won't happen
-- when the input data size is less than 64K.
if not is_last_block then
local j
j = 1
for i = block_end - 32767, block_end do
string_table[j] = string_table[i - offset]
j = j + 1
end
for k, t in pairs(hash_tables) do
local tSize = #t
if tSize > 0 and block_end + 1 - t[1] > 32768 then
if tSize == 1 then
hash_tables[k] = nil
else
local new = {}
local newSize = 0
for i = 2, tSize do
j = t[i]
if block_end + 1 - j <= 32768 then
newSize = newSize + 1
new[newSize] = j
end
end
hash_tables[k] = new
end
end
end
end
end
end
--[[ --------------------------------------------------------------------------
Decompress code
--]] --------------------------------------------------------------------------
--[[
Create a reader to easily reader stuffs as the unit of bits.
Return values:
1. ReadBits(bitlen)
2. ReadBytes(bytelen, buffer, buffer_size)
3. Decode(huffman_bitlen_count, huffman_symbol, min_bitlen)
4. ReaderBitlenLeft()
5. SkipToByteBoundary()
--]]
local function CreateReader(input_string)
local input = input_string
local input_strlen = #input_string
local input_next_byte_pos = 1
local cache_bitlen = 0
local cache = 0
-- Read some bits.
-- To improve speed, this function does not
-- check if the input has been exhausted.
-- Use ReaderBitlenLeft() < 0 to check it.
-- @param bitlen the number of bits to read
-- @return the data is read.
local function ReadBits(bitlen)
local rshift_mask = _pow2[bitlen]
local code = 0
if bitlen <= cache_bitlen then
code = (cache&(rshift_mask-1))
cache = (cache - code) >> bitlen
cache_bitlen = cache_bitlen - bitlen
else -- Whether input has been exhausted is not checked.
local lshift_mask = _pow2[cache_bitlen]
local byte1, byte2 = string_byte(input, input_next_byte_pos, input_next_byte_pos + 1)
cache = cache + ((byte1 or 0) + (byte2 or 0) * 256) * lshift_mask
input_next_byte_pos = input_next_byte_pos + 2
cache_bitlen = cache_bitlen + 16 - bitlen
code = (cache&(rshift_mask-1))
cache = (cache - code) >> bitlen
end
return code
end
-- Read some bytes from the reader.
-- Assume reader is on the byte boundary.
-- @param bytelen The number of bytes to be read.
-- @param buffer The byte read will be stored into this buffer.
-- @param buffer_size The buffer will be modified starting from
-- buffer[buffer_size+1], ending at buffer[buffer_size+bytelen-1]
-- @return the new buffer_size
local function ReadBytes(bytelen, buffer, buffer_size)
assert((cache_bitlen&7) == 0)
local byte_from_cache = ((cache_bitlen >> 3) < bytelen) and (cache_bitlen >> 3) or bytelen
for _ = 1, byte_from_cache do
local byte = ((cache&255))
buffer_size = buffer_size + 1
buffer[buffer_size] = string_char(byte)
cache = (cache - byte) >> 8
end
cache_bitlen = cache_bitlen - byte_from_cache * 8
bytelen = bytelen - byte_from_cache
if (input_strlen - input_next_byte_pos - bytelen + 1) * 8 + cache_bitlen < 0 then
return -1 -- out of input
end
for i = input_next_byte_pos, input_next_byte_pos + bytelen - 1 do
buffer_size = buffer_size + 1
buffer[buffer_size] = string_sub(input, i, i)
end
input_next_byte_pos = input_next_byte_pos + bytelen
return buffer_size
end
-- Decode huffman code
-- To improve speed, this function does not check
-- if the input has been exhausted.
-- Use ReaderBitlenLeft() < 0 to check it.
-- Credits for Mark Adler. This code is from puff:Decode()
-- @see puff:Decode(...)
-- @param huffman_bitlen_count
-- @param huffman_symbol
-- @param min_bitlen The minimum huffman bit length of all symbols
-- @return The decoded deflate code.
-- Negative value is returned if decoding fails.
local function Decode(huffman_bitlen_counts, huffman_symbols, min_bitlen)
local code = 0
local first = 0
local index = 0
local count
if min_bitlen > 0 then
if cache_bitlen < 15 and input then
local lshift_mask = _pow2[cache_bitlen]
local byte1, byte2 = string_byte(input, input_next_byte_pos, input_next_byte_pos + 1)
-- This requires lua number to be at least double ()
cache = cache + ((byte1 or 0) + (byte2 or 0) * 256) * lshift_mask
input_next_byte_pos = input_next_byte_pos + 2
cache_bitlen = cache_bitlen + 16
end
local rshift_mask = _pow2[min_bitlen]
cache_bitlen = cache_bitlen - min_bitlen
code = (cache&(rshift_mask-1))
cache = (cache - code) >> min_bitlen
-- Reverse the bits
code = _reverse_bits_tbl[min_bitlen][code]
count = huffman_bitlen_counts[min_bitlen]
if code < count then return huffman_symbols[code] end
index = count
first = count * 2
code = code * 2
end
for bitlen = min_bitlen + 1, 15 do
local bit
bit = (cache&1)
cache = (cache - bit) >> 1
cache_bitlen = cache_bitlen - 1
code = (bit == 1) and (code + 1 - (code&1)) or code
count = huffman_bitlen_counts[bitlen] or 0
local diff = code - first
if diff < count then return huffman_symbols[index + diff] end
index = index + count
first = first + count
first = first * 2
code = code * 2
end
-- invalid literal,length or distance code
-- in fixed or dynamic block (run out of code)
return -10
end
local function ReaderBitlenLeft()
return (input_strlen - input_next_byte_pos + 1) * 8 + cache_bitlen
end
local function SkipToByteBoundary()
local skipped_bitlen = (cache_bitlen&7)
local rshift_mask = _pow2[skipped_bitlen]
cache_bitlen = cache_bitlen - skipped_bitlen
cache = (cache - (cache&(rshift_mask-1))) >> skipped_bitlen
end
return ReadBits, ReadBytes, Decode, ReaderBitlenLeft, SkipToByteBoundary
end
-- Create a deflate state, so I can pass in less arguments to functions.
-- @param str the whole string to be decompressed.
-- @param dictionary The preset dictionary. nil if not provided.
-- This dictionary should be produced by LibDeflate:CreateDictionary(str)
-- @return The decomrpess state.
local function CreateDecompressState(str)
local ReadBits, ReadBytes, Decode, ReaderBitlenLeft, SkipToByteBoundary = CreateReader(str)
local state = {
ReadBits = ReadBits,
ReadBytes = ReadBytes,
Decode = Decode,
ReaderBitlenLeft = ReaderBitlenLeft,
SkipToByteBoundary = SkipToByteBoundary,
buffer_size = 0,
buffer = {},
result_buffer = {},
}
return state
end
-- Get the stuffs needed to decode huffman codes
-- @see puff.c:construct(...)
-- @param huffman_bitlen The huffman bit length of the huffman codes.
-- @param max_symbol The maximum symbol
-- @param max_bitlen The min huffman bit length of all codes
-- @return zero or positive for success, negative for failure.
-- @return The count of each huffman bit length.
-- @return A table to convert huffman codes to deflate codes.
-- @return The minimum huffman bit length.
local function GetHuffmanForDecode(huffman_bitlens, max_symbol, max_bitlen)
local huffman_bitlen_counts = {}
local min_bitlen = max_bitlen
for symbol = 0, max_symbol do
local bitlen = huffman_bitlens[symbol] or 0
min_bitlen = (bitlen > 0 and bitlen < min_bitlen) and bitlen or min_bitlen
huffman_bitlen_counts[bitlen] = (huffman_bitlen_counts[bitlen] or 0) + 1
end
if huffman_bitlen_counts[0] == max_symbol + 1 then -- No Codes
return 0, huffman_bitlen_counts, {}, 0 -- Complete, but decode will fail
end
local left = 1
for len = 1, max_bitlen do
left = left * 2
left = left - (huffman_bitlen_counts[len] or 0)
if left < 0 then
return left -- Over-subscribed, return negative
end
end
-- Generate offsets info symbol table for each length for sorting
local offsets = {}
offsets[1] = 0
for len = 1, max_bitlen - 1 do
offsets[len + 1] = offsets[len] + (huffman_bitlen_counts[len] or 0)
end
local huffman_symbols = {}
for symbol = 0, max_symbol do
local bitlen = huffman_bitlens[symbol] or 0
if bitlen ~= 0 then
local offset = offsets[bitlen]
huffman_symbols[offset] = symbol
offsets[bitlen] = offsets[bitlen] + 1
end
end
-- Return zero for complete set, positive for incomplete set.
return left, huffman_bitlen_counts, huffman_symbols, min_bitlen
end
-- Decode a fixed or dynamic huffman blocks, excluding last block identifier
-- and block type identifer.
-- @see puff.c:codes()
-- @param state decompression state that will be modified by this function.
-- @see CreateDecompressState
-- @param ... Read the source code
-- @return 0 on success, other value on failure.
local function DecodeUntilEndOfBlock(state, lcodes_huffman_bitlens,
lcodes_huffman_symbols,
lcodes_huffman_min_bitlen,
dcodes_huffman_bitlens,
dcodes_huffman_symbols,
dcodes_huffman_min_bitlen)
local buffer, buffer_size, ReadBits, Decode, ReaderBitlenLeft, result_buffer =
state.buffer, state.buffer_size, state.ReadBits, state.Decode,
state.ReaderBitlenLeft, state.result_buffer
local dict_string_table
local dict_strlen
local buffer_end = 1
repeat
local symbol = Decode(lcodes_huffman_bitlens, lcodes_huffman_symbols,
lcodes_huffman_min_bitlen)
if symbol < 0 or symbol > 285 then
-- invalid literal,length or distance code in fixed or dynamic block
return -10
elseif symbol < 256 then -- Literal
buffer_size = buffer_size + 1
buffer[buffer_size] = _byte_to_char[symbol]
elseif symbol > 256 then -- Length code
symbol = symbol - 256
local bitlen = _literal_deflate_code_to_base_len[symbol]
bitlen = (symbol >= 8) and (bitlen + ReadBits(_literal_deflate_code_to_extra_bitlen[symbol])) or bitlen
symbol = Decode(dcodes_huffman_bitlens, dcodes_huffman_symbols, dcodes_huffman_min_bitlen)
if symbol < 0 or symbol > 29 then
-- invalid literal,length or distance code in fixed or dynamic block
return -10
end
local dist = _dist_deflate_code_to_base_dist[symbol]
dist = (dist > 4) and (dist + ReadBits(_dist_deflate_code_to_extra_bitlen[symbol])) or dist
local char_buffer_index = buffer_size - dist + 1
if char_buffer_index < buffer_end then
-- distance is too far back in fixed or dynamic block
return -11
end
if char_buffer_index >= -257 then
for _ = 1, bitlen do
buffer_size = buffer_size + 1
buffer[buffer_size] = buffer[char_buffer_index]
char_buffer_index = char_buffer_index + 1
end
else
char_buffer_index = dict_strlen + char_buffer_index
for _ = 1, bitlen do
buffer_size = buffer_size + 1
buffer[buffer_size] = _byte_to_char[dict_string_table[char_buffer_index]]
char_buffer_index = char_buffer_index + 1
end
end
end
if ReaderBitlenLeft() < 0 then
return 2 -- available inflate data did not terminate
end
if buffer_size >= 65536 then
result_buffer[#result_buffer + 1] = table_concat(buffer, "", 1, 32768)
for i = 32769, buffer_size do buffer[i - 32768] = buffer[i] end
buffer_size = buffer_size - 32768
buffer[buffer_size + 1] = nil
-- NOTE: buffer[32769..end] and buffer[-257..0] are not cleared.
-- This is why "buffer_size" variable is needed.
end
until symbol == 256
state.buffer_size = buffer_size
return 0
end
-- Decompress a store block
-- @param state decompression state that will be modified by this function.
-- @return 0 if succeeds, other value if fails.
local function DecompressStoreBlock(state)
local buffer, buffer_size, ReadBits, ReadBytes, ReaderBitlenLeft,
SkipToByteBoundary, result_buffer = state.buffer, state.buffer_size,
state.ReadBits, state.ReadBytes,
state.ReaderBitlenLeft,
state.SkipToByteBoundary,
state.result_buffer
SkipToByteBoundary()
local bytelen = ReadBits(16)
if ReaderBitlenLeft() < 0 then
return 2 -- available inflate data did not terminate
end
local bytelenComp = ReadBits(16)
if ReaderBitlenLeft() < 0 then
return 2 -- available inflate data did not terminate
end
if (bytelen&255) + (bytelenComp&255) ~= 255 then
return -2 -- Not one's complement
end
if ((bytelen - (bytelen&255)) >> 8) + ((bytelenComp - (bytelenComp&255)) >> 8) ~= 255 then
return -2 -- Not one's complement
end
-- Note that ReadBytes will skip to the next byte boundary first.
buffer_size = ReadBytes(bytelen, buffer, buffer_size)
if buffer_size < 0 then
return 2 -- available inflate data did not terminate
end
-- memory clean up when there are enough bytes in the buffer.
if buffer_size >= 65536 then
result_buffer[#result_buffer + 1] = table_concat(buffer, "", 1, 32768)
for i = 32769, buffer_size do buffer[i - 32768] = buffer[i] end
buffer_size = buffer_size - 32768
buffer[buffer_size + 1] = nil
end
state.buffer_size = buffer_size
return 0
end
-- Decompress a fixed block
-- @param state decompression state that will be modified by this function.
-- @return 0 if succeeds other value if fails.
local function DecompressFixBlock(state)
return DecodeUntilEndOfBlock(state, _fix_block_literal_huffman_bitlen_count,
_fix_block_literal_huffman_to_deflate_code, 7,
_fix_block_dist_huffman_bitlen_count,
_fix_block_dist_huffman_to_deflate_code, 5)
end
-- Decompress a dynamic block
-- @param state decompression state that will be modified by this function.
-- @return 0 if success, other value if fails.
local function DecompressDynamicBlock(state)
local ReadBits, Decode = state.ReadBits, state.Decode
local nlen = ReadBits(5) + 257
local ndist = ReadBits(5) + 1
local ncode = ReadBits(4) + 4
if nlen > 286 or ndist > 30 then
-- dynamic block code description: too many length or distance codes
return -3
end
local rle_codes_huffman_bitlens = {}
for i = 1, ncode do
rle_codes_huffman_bitlens[_rle_codes_huffman_bitlen_order[i]] = ReadBits(3)
end
local rle_codes_err, rle_codes_huffman_bitlen_counts, rle_codes_huffman_symbols, rle_codes_huffman_min_bitlen = GetHuffmanForDecode(rle_codes_huffman_bitlens, 18, 7)
if rle_codes_err ~= 0 then -- Require complete code set here
-- dynamic block code description: code lengths codes incomplete
return -4
end
local lcodes_huffman_bitlens = {}
local dcodes_huffman_bitlens = {}
-- Read length,literal and distance code length tables
local index = 0
while index < nlen + ndist do
local symbol -- Decoded value
local bitlen -- Last length to repeat
symbol = Decode(rle_codes_huffman_bitlen_counts, rle_codes_huffman_symbols, rle_codes_huffman_min_bitlen)
if symbol < 0 then
return symbol -- Invalid symbol
elseif symbol < 16 then
if index < nlen then
lcodes_huffman_bitlens[index] = symbol
else
dcodes_huffman_bitlens[index - nlen] = symbol
end
index = index + 1
else
bitlen = 0
if symbol == 16 then
if index == 0 then
-- dynamic block code description: repeat lengths
-- with no first length
return -5
end
if index - 1 < nlen then
bitlen = lcodes_huffman_bitlens[index - 1]
else
bitlen = dcodes_huffman_bitlens[index - nlen - 1]
end
symbol = 3 + ReadBits(2)
elseif symbol == 17 then -- Repeat zero 3..10 times
symbol = 3 + ReadBits(3)
else -- == 18, repeat zero 11.138 times
symbol = 11 + ReadBits(7)
end
if index + symbol > nlen + ndist then
-- dynamic block code description:
-- repeat more than specified lengths
return -6
end
while symbol > 0 do -- Repeat last or zero symbol times
symbol = symbol - 1
if index < nlen then
lcodes_huffman_bitlens[index] = bitlen
else
dcodes_huffman_bitlens[index - nlen] = bitlen
end
index = index + 1
end
end
end
if (lcodes_huffman_bitlens[256] or 0) == 0 then
-- dynamic block code description: missing end-of-block code
return -9
end
local lcodes_err, lcodes_huffman_bitlen_counts, lcodes_huffman_symbols, lcodes_huffman_min_bitlen = GetHuffmanForDecode(lcodes_huffman_bitlens, nlen - 1, 15)
-- dynamic block code description: invalid literal,length code lengths,
-- Incomplete code ok only for single length 1 code
if (lcodes_err ~= 0 and (lcodes_err < 0 or nlen ~= (lcodes_huffman_bitlen_counts[0] or 0) + (lcodes_huffman_bitlen_counts[1] or 0))) then
return -7
end
local dcodes_err, dcodes_huffman_bitlen_counts, dcodes_huffman_symbols, dcodes_huffman_min_bitlen = GetHuffmanForDecode(dcodes_huffman_bitlens, ndist - 1, 15)
-- dynamic block code description: invalid distance code lengths,
-- Incomplete code ok only for single length 1 code
if (dcodes_err ~= 0 and (dcodes_err < 0 or ndist ~= (dcodes_huffman_bitlen_counts[0] or 0) + (dcodes_huffman_bitlen_counts[1] or 0))) then
return -8
end
-- Build buffman table for literal,length codes
return DecodeUntilEndOfBlock(state, lcodes_huffman_bitlen_counts,
lcodes_huffman_symbols,
lcodes_huffman_min_bitlen,
dcodes_huffman_bitlen_counts,
dcodes_huffman_symbols, dcodes_huffman_min_bitlen)
end
-- Decompress a deflate stream
-- @param state: a decompression state
-- @return the decompressed string if succeeds. nil if fails.
local function Inflate(state)
local ReadBits = state.ReadBits
local is_last_block
while not is_last_block do
is_last_block = (ReadBits(1) == 1)
local block_type = ReadBits(2)
local status
if block_type == 0 then
status = DecompressStoreBlock(state)
elseif block_type == 1 then
status = DecompressFixBlock(state)
elseif block_type == 2 then
status = DecompressDynamicBlock(state)
else
return nil, -1 -- invalid block type (type == 3)
end
if status ~= 0 then return nil, status end
end
state.result_buffer[#state.result_buffer + 1] = table_concat(state.buffer, "", 1, state.buffer_size)
local result = table_concat(state.result_buffer)
return result
end
function LibDeflate.DecompressDeflate(str)
local state = CreateDecompressState(str)
local result, status = Inflate(state)
if not result then return nil, status end
local bitlen_left = state.ReaderBitlenLeft()
local bytelen_left = (bitlen_left - (bitlen_left&7)) >> 3
return result, bytelen_left
end
function LibDeflate.CompressDeflate(str)
local WriteBits, WriteString, FlushWriter = CreateWriter()
Deflate(WriteBits, WriteString, FlushWriter, str)
local total_bitlen, result = FlushWriter(_FLUSH_MODE_OUTPUT)
local padding_bitlen = ((8 - (total_bitlen&7))&7)
return result, padding_bitlen
end
function LibDeflate.InitCompressor()
for i = 0, 255 do _byte_to_char[i] = string_char(i) end
local pow = 1
for i = 0, 24 do
_pow2[i] = pow
pow = pow * 2
end
for i = 1, 9 do
_reverse_bits_tbl[i] = {}
for j = 0, _pow2[i + 1] - 1 do
local reverse = 0
local value = j
for _ = 1, i do
reverse = reverse - (reverse&1) + ((((reverse&1) == 1) or ((value&1)) == 1) and 1 or 0)
value = (value - (value&1)) >> 1
reverse = reverse << 1
end
_reverse_bits_tbl[i][j] = (reverse - (reverse&1)) >> 1
end
end
-- The source code is written according to the pattern in the numbers
-- in RFC1951 Page10.
local a = 18
local b = 16
local c = 265
local bitlen = 1
for len = 3, 258 do
if len <= 10 then
_length_to_deflate_code[len] = len + 254
_length_to_deflate_extra_bitlen[len] = 0
elseif len == 258 then
_length_to_deflate_code[len] = 285
_length_to_deflate_extra_bitlen[len] = 0
else
if len > a then
a = a + b
b = b * 2
c = c + 4
bitlen = bitlen + 1
end
local t = len - a - 1 + (b >> 1)
_length_to_deflate_code[len] = (t - (t&((b >> 3)-1))) // (b >> 3) + c
_length_to_deflate_extra_bitlen[len] = bitlen
_length_to_deflate_extra_bits[len] = (t&((b >> 3)-1))
end
end
-- The source code is written according to the pattern in the numbers
-- in RFC1951 Page11.
_dist256_to_deflate_code[1] = 0
_dist256_to_deflate_code[2] = 1
_dist256_to_deflate_extra_bitlen[1] = 0
_dist256_to_deflate_extra_bitlen[2] = 0
local a = 3
local b = 4
local code = 2
local bitlen = 0
for dist = 3, 256 do
if dist > b then
a = a * 2
b = b * 2
code = code + 2
bitlen = bitlen + 1
end
_dist256_to_deflate_code[dist] = (dist <= a) and code or (code + 1)
_dist256_to_deflate_extra_bitlen[dist] = (bitlen < 0) and 0 or bitlen
if b >= 8 then
_dist256_to_deflate_extra_bits[dist] = ((dist - (b >> 1) - 1)&((b >> 2)-1))
end
end
_fix_block_literal_huffman_bitlen = {}
for sym = 0, 143 do _fix_block_literal_huffman_bitlen[sym] = 8 end
for sym = 144, 255 do _fix_block_literal_huffman_bitlen[sym] = 9 end
for sym = 256, 279 do _fix_block_literal_huffman_bitlen[sym] = 7 end
for sym = 280, 287 do _fix_block_literal_huffman_bitlen[sym] = 8 end
_fix_block_dist_huffman_bitlen = {}
for dist = 0, 31 do _fix_block_dist_huffman_bitlen[dist] = 5 end
local status
status, _fix_block_literal_huffman_bitlen_count, _fix_block_literal_huffman_to_deflate_code = GetHuffmanForDecode(_fix_block_literal_huffman_bitlen, 287, 9)
assert(status == 0)
status, _fix_block_dist_huffman_bitlen_count, _fix_block_dist_huffman_to_deflate_code = GetHuffmanForDecode(_fix_block_dist_huffman_bitlen, 31, 5)
assert(status == 0)
_fix_block_literal_huffman_code = GetHuffmanCodeFromBitlen(_fix_block_literal_huffman_bitlen_count, _fix_block_literal_huffman_bitlen, 287, 9)
_fix_block_dist_huffman_code = GetHuffmanCodeFromBitlen(_fix_block_dist_huffman_bitlen_count, _fix_block_dist_huffman_bitlen, 31, 5)
end
OnInit.map(function()
LibDeflate.InitCompressor()
end)
end
if Debug then Debug.endFile() end