From ae3ae644dad23687baad00ff44433edb27fe7f0b Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Sat, 18 Sep 2021 17:55:43 -0400 Subject: [PATCH 1/4] Move pointer conversions to Base.unsafe_convert --- src/libzstd.jl | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/src/libzstd.jl b/src/libzstd.jl index 9ec26ae..06a0b2e 100644 --- a/src/libzstd.jl +++ b/src/libzstd.jl @@ -18,12 +18,14 @@ const MAX_CLEVEL = max_clevel() const InBuffer = LibZstd.ZSTD_inBuffer InBuffer() = InBuffer(C_NULL, 0, 0) +Base.unsafe_convert(::Type{Ptr{InBuffer}}, buffer::InBuffer) = Ptr{InBuffer}(pointer_from_objref(buffer)) const OutBuffer = LibZstd.ZSTD_outBuffer OutBuffer() = OutBuffer(C_NULL, 0, 0) +Base.unsafe_convert(::Type{Ptr{OutBuffer}}, buffer::OutBuffer) = Ptr{OutBuffer}(pointer_from_objref(buffer)) # ZSTD_CStream mutable struct CStream - ptr::Ptr{Cvoid} + ptr::Ptr{LibZstd.ZSTD_CStream} ibuffer::InBuffer obuffer::OutBuffer @@ -36,14 +38,18 @@ mutable struct CStream end end +Base.unsafe_convert(::Type{Ptr{LibZstd.ZSTD_CStream}}, cstream::CStream) = cstream.ptr +Base.unsafe_convert(::Type{Ptr{InBuffer}}, cstream::CStream) = Base.unsafe_convert(Ptr{InBuffer}, cstream.ibuffer) +Base.unsafe_convert(::Type{Ptr{OutBuffer}}, cstream::CStream) = Base.unsafe_convert(Ptr{OutBuffer}, cstream.obuffer) + function initialize!(cstream::CStream, level::Integer) - return LibZstd.ZSTD_initCStream(cstream.ptr, level) + return LibZstd.ZSTD_initCStream(cstream, level) end function reset!(cstream::CStream, srcsize::Integer) # ZSTD_resetCStream is deprecated # https://github.com/facebook/zstd/blob/9d2a45a705e22ad4817b41442949cd0f78597154/lib/zstd.h#L2253-L2272 - res = LibZstd.ZSTD_CCtx_reset(cstream.ptr, LibZstd.ZSTD_reset_session_only) + res = LibZstd.ZSTD_CCtx_reset(cstream, LibZstd.ZSTD_reset_session_only) if iserror(res) return res end @@ -54,26 +60,26 @@ function reset!(cstream::CStream, srcsize::Integer) # explicitly specified. srcsize = ZSTD_CONTENTSIZE_UNKNOWN end - return LibZstd.ZSTD_CCtx_setPledgedSrcSize(cstream.ptr, srcsize) + return LibZstd.ZSTD_CCtx_setPledgedSrcSize(cstream, srcsize) #return ccall((:ZSTD_resetCStream, libzstd), Csize_t, (Ptr{Cvoid}, Culonglong), cstream.ptr, srcsize) end function compress!(cstream::CStream) - return LibZstd.ZSTD_compressStream(cstream.ptr, pointer_from_objref(cstream.obuffer), pointer_from_objref(cstream.ibuffer)) + return LibZstd.ZSTD_compressStream(cstream, cstream, cstream) end function finish!(cstream::CStream) - return LibZstd.ZSTD_endStream(cstream.ptr, pointer_from_objref(cstream.obuffer)) + return LibZstd.ZSTD_endStream(cstream, cstream) end function free!(cstream::CStream) - return LibZstd.ZSTD_freeCStream(cstream.ptr) + return LibZstd.ZSTD_freeCStream(cstream) end # ZSTD_DStream mutable struct DStream - ptr::Ptr{Cvoid} + ptr::Ptr{LibZstd.ZSTD_DStream} ibuffer::InBuffer obuffer::OutBuffer @@ -85,23 +91,26 @@ mutable struct DStream return new(ptr, InBuffer(), OutBuffer()) end end +Base.unsafe_convert(::Type{Ptr{LibZstd.ZSTD_DStream}}, dstream::DStream) = dstream.ptr +Base.unsafe_convert(::Type{Ptr{InBuffer}}, dstream::DStream) = Ptr{InBuffer}(Base.unsafe_convert(Ptr{InBuffer}, dstream.ibuffer)) +Base.unsafe_convert(::Type{Ptr{OutBuffer}}, dstream::DStream) = Ptr{OutBuffer}(Base.unsafe_convert(Ptr{OutBuffer}, dstream.obuffer)) function initialize!(dstream::DStream) - return LibZstd.ZSTD_initDStream(dstream.ptr) + return LibZstd.ZSTD_initDStream(dstream) end function reset!(dstream::DStream) # LibZstd.ZSTD_resetDStream is deprecated # https://github.com/facebook/zstd/blob/9d2a45a705e22ad4817b41442949cd0f78597154/lib/zstd.h#L2332-L2339 - return LibZstd.ZSTD_DCtx_reset(dstream.ptr, LibZstd.ZSTD_reset_session_only) + return LibZstd.ZSTD_DCtx_reset(dstream, LibZstd.ZSTD_reset_session_only) end function decompress!(dstream::DStream) - return LibZstd.ZSTD_decompressStream(dstream.ptr, pointer_from_objref(dstream.obuffer), pointer_from_objref(dstream.ibuffer)) + return LibZstd.ZSTD_decompressStream(dstream, dstream, dstream) end function free!(dstream::DStream) - return LibZstd.ZSTD_freeDStream(dstream.ptr) + return LibZstd.ZSTD_freeDStream(dstream) end From 583d955b14edab8531604b543031e1b510c7a88f Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Mon, 20 Sep 2021 05:57:14 -0400 Subject: [PATCH 2/4] Explicitly refer to out and in buffers --- src/libzstd.jl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/libzstd.jl b/src/libzstd.jl index 06a0b2e..1f97139 100644 --- a/src/libzstd.jl +++ b/src/libzstd.jl @@ -66,11 +66,11 @@ function reset!(cstream::CStream, srcsize::Integer) end function compress!(cstream::CStream) - return LibZstd.ZSTD_compressStream(cstream, cstream, cstream) + return LibZstd.ZSTD_compressStream(cstream, cstream.obuffer, cstream.ibuffer) end function finish!(cstream::CStream) - return LibZstd.ZSTD_endStream(cstream, cstream) + return LibZstd.ZSTD_endStream(cstream, cstream.obuffer) end function free!(cstream::CStream) @@ -106,7 +106,7 @@ function reset!(dstream::DStream) end function decompress!(dstream::DStream) - return LibZstd.ZSTD_decompressStream(dstream, dstream, dstream) + return LibZstd.ZSTD_decompressStream(dstream, dstream.obuffer, dstream.ibuffer) end function free!(dstream::DStream) From c4a1270c1862aa1d670ee8c4189c317b98b434ee Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Wed, 29 Sep 2021 17:24:57 -0400 Subject: [PATCH 3/4] Normalize path separators in include, add comments --- src/CodecZstd.jl | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/CodecZstd.jl b/src/CodecZstd.jl index 85a0ec3..157af53 100644 --- a/src/CodecZstd.jl +++ b/src/CodecZstd.jl @@ -17,7 +17,9 @@ import TranscodingStreams: using Zstd_jll -include(joinpath("libzstd","LibZstd.jl")) +# Module containing directly wrapped ccalls generated by CLang.jl +include("libzstd/LibZstd.jl") +# Library functions that have a Julian interface. This file originally preceded the above module include("libzstd.jl") include("compression.jl") include("decompression.jl") From bc53ff4c145ceb75142a8deefc926244e9f67139 Mon Sep 17 00:00:00 2001 From: Mark Kittisopikul Date: Wed, 29 Sep 2021 23:25:02 -0400 Subject: [PATCH 4/4] Add Julian API for ZSTD dictionaries and parameters --- src/libzstd.jl | 192 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 192 insertions(+) diff --git a/src/libzstd.jl b/src/libzstd.jl index 1f97139..46d222a 100644 --- a/src/libzstd.jl +++ b/src/libzstd.jl @@ -9,6 +9,16 @@ function zstderror(stream, code::Csize_t) ptr = LibZstd.ZSTD_getErrorName(code) error("zstd error: ", unsafe_string(ptr)) end +function zstderror(code::Csize_t) + ptr = LibZstd.ZSTD_getErrorName(code) + error("zstd error: ", unsafe_string(ptr)) +end +function checkerror(code::Csize_t) + if iserror(code) + zstderror(code) + end + code +end function max_clevel() return LibZstd.ZSTD_maxCLevel() @@ -113,6 +123,188 @@ function free!(dstream::DStream) return LibZstd.ZSTD_freeDStream(dstream) end +# Dictionary +# ========== + +""" + CodecZstd.Dictionary(buffer::AbstractVector{UInt8}, samplesBuffers::AbstractVector{UInt8}, sampleSizes::Vector{Csize_t}) + CodecZstd.Dictionary(samplesBuffers::AbstractVector{UInt8}, sampleSizes::Vector{Csize_t}, bufferCapacity=1024*100) + CodecZstd.Dictionary(buffer::AbstractVector{UInt8}, samples::AbstractVector{UInt8}...) + CodecZstd.Dictionary(bufferCapacity::Integer, samples::AbstractVector{UInt8}...) + +Pre-trained dictionary used to aid ZSTD in the compression of small files. +`buffer` is pre-allocated UInt8 array used to contain the dictionary. +`samplesBuffers` is a single UInt8 array that constains the samples of size specified by `sampleSizes` +`bufferCapacity` specifies the byte size of the buffer to create. +`samples` is variable number of UInt8 arrays that will be used as samples. They will be concatenated together to form `samplesBuffers`. + +Use with `loadDictionary(stream, dictionary)` to load the dictionary into a compression or decompression stream (CStream or DStream). +""" +mutable struct Dictionary{B <: AbstractVector{UInt8}} + buffer::B +end +function Dictionary(buffer::AbstractVector{UInt8}, samplesBuffer::AbstractVector{UInt8}, samplesSizes::Vector{Csize_t}) + bufferSize = LibZstd.ZDICT_trainFromBuffer(buffer, sizeof(buffer), samplesBuffer, samplesSizes, length(samplesSizes)) + if LibZstd.ZDICT_isError(bufferSize) != 0 + throw(ErrorException("Error training Zstd dictionary. ZDICT: " * unsafe_string(LibZstd.ZDICT_getErrorName(bufferSize)))) + end + Dictionary(@view buffer[1:Int(bufferSize)]) +end +Dictionary(samplesBuffer::AbstractVector{UInt8}, samplesSizes::Vector{Csize_t}, bufferCapacity::Integer=1024*100) = + Dictionary(Vector{UInt8}(undef, bufferCapacity), samplesBuffer, samplesSizes) +Dictionary(buffer::AbstractVector{UInt8}, samples::AbstractVector{UInt8}...) = Dictionary(buffer, vcat(samples...), Csize_t.(lengths.(samples))) +Dictionary(bufferCapacity::Integer, samples::AbstractVector{UInt8}...) = Dictionary(vcat(samples...), Csize_t.(lengths.(samples)), bufferCapacity) + +""" + CodecZstd.finalizeDictionary + + See documentation for ZDICT_finalizeDictionary +""" +function finalizeDictionary( + dstDictBuffer::AbstractVector{UInt8}, dictContent::AbstractVector{UInt8}, + samplesBuffer::AbstractVector{UInt8}, samplesSizes::Vector{Csize_t}, + parameters::LibZstd.ZDICT_params_t + ) + bufferSize = + LibZstd.ZDICT_finalizeDictionary(dstDictBuffer, size(dstDictBuffer), + dictContent, size(dictContent), + samplesBuffer, + samplesSizes, length(samplesSizes), + parameters + ) + if LibZstd.ZDICT_isError(bufferSize) != 0 + throw(ErrorException("Error training Zstd dictionary. ZDICT: " * unsafe_string(LibZstd.ZDICT_getErrorName(bufferSize)))) + end + Dictionary(@view dstDictBuffer[1:Int(bufferSize)]) +end + +Base.size(dict::Dictionary) = Int.(size(dict.buffer)) +Base.length(dict::Dictionary) = Int(length(dict.buffer)) +Base.pointer(dict::Dictionary) = pointer(dict.buffer) +Base.unsafe_convert(::Type{Ptr{Nothing}}, dict::Dictionary) = pointer(dict) +getHeaderSize(dict::Dictionary) = Int(LibZstd.ZDICT_getDictHeaderSize(dict, length(dict))) +getID(dict::Dictionary) = LibZstd.ZDICT_getDictID(dict, length(dict)) + +# CDict + +# This actually just a box for an opaque pointer +mutable struct CDict + ptr::Ptr{LibZstd.ZSTD_CDict} + function CDict(ptr) + cdict = new(ptr) + finalizer(free!, cdict) + end +end +createCDict(dict::Dictionary, compressionLevel) = + LibZstd.ZSTD_createCDict(dict.buffer, length(dict), compressionLevel) +free!(ptr::Ptr{CDict}) = LibZstd.ZSTD_freeCDict(ptr) +free!(cdict::CDict) = free!(cdict.ptr) +Base.unsafe_convert(::Type{Ptr{LibZstd.ZSTD_CDict}}, cdict::CDict) = cdict.ptr + +function loadDictionary(cstream::CStream, dict::Dictionary) + checkerror( LibZstd.ZSTD_CCtx_loadDictionary(cstream, dict, length(dict)) ) +end +function loadDictionary(cstream::CStream, cdict::CDict) + checkerror( LibZstd.ZSTD_CCtx_refCDict(cstream, cdict) ) +end + +# DDict + +# This actually just a box for an opaque pointer +mutable struct DDict + ptr::Ptr{LibZstd.ZSTD_DDict} + function DDict(ptr) + ddict = new(ptr) + finalizer(free!, ddict) + end +end +createDDict(dict::Dictionary) = + LibZstd.ZSTD_createDDict(dict.buffer, length(dict)) +free!(ptr::Ptr{DDict}) = LibZstd.ZSTD_freeDDict(ptr) +free!(ddict::DDict) = free!(ddict.ptr) +Base.unsafe_convert(::Type{Ptr{LibZstd.ZSTD_DDict}}, cdict::DDict) = cdict.ptr + +function loadDictionary(dstream::DStream, dict::Dictionary) + checkerror( LibZstd.ZSTD_DCtx_loadDictionary(dstream, dict, length(dict)) ) +end +function loadDirectory(dstream::DStream, ddict::DDict) + checkerror( LibZstd.ZSTD_DCtx_refDDict(dstream, ddict) ) +end + +# Parameters +# ========== + +function setParameter(cstream::CStream, parameter::LibZstd.ZSTD_cParameter, value) + checkerror( LibZstd.ZSTD_CCtx_setParameter(cstream, parameter, value) ) +end +function getParameter(cstream::CStream, parameter::LibZstd.ZSTD_cParameter, out=Ref{Cint}()) + checkerror( LibZstd.ZSTD_CCtx_getParameter(cstream, parameter, out) ) + out[] +end + +""" + CStreamParameters + +An AbstractDict interface to that allows retrieving and setting the parameters for a +compression stream +""" +struct CStreamParameters <: AbstractDict{LibZstd.ZSTD_cParameter, Cint} + cstream::CStream +end +Base.keys(params::CStreamParameters) = instances(LibZstd.ZSTD_cParameter) +Base.values(params::CStreamParameters) = (getParameter(params.cstream, param) for param in keys(params)) +Base.getindex(params::CStreamParameters, key::LibZstd.ZSTD_cParameter) = getParameter(params.cstream, key) +Base.setindex!(params::CStreamParameters, value, key::LibZstd.ZSTD_cParameter) = setParameter(params.cstream, key, value) +function Base.iterate(params::CStreamParameters, state=1) + ks = keys(params) + if state <= length(ks) + k = ks[state] + return (k => params[k], state+1) + else + return nothing + end +end +Base.length(::CStreamParameters) = length(instances(LibZstd.ZSTD_cParameter)) + +function getParameters(cstream::CStream) + CStreamParameters(cstream) +end +function setParameter(dstream::DStream, parameter::LibZstd.ZSTD_dParameter, value) + checkerror( LibZstd.ZSTD_DCtx_setParameter(dstream, parameter, value) ) +end +function getParameter(dstream::DStream, parameter::LibZstd.ZSTD_dParameter, out=Ref{Cint}()) + checkerror( LibZstd.ZSTD_DCtx_getParameter(dstream, parameter, out) ) + out[] +end + +""" + DStreamParameters + +An AbstractDict that allows retrieving and setting the parameters for a +decompression stream +""" +struct DStreamParameters <: AbstractDict{LibZstd.ZSTD_dParameter, Cint} + dstream::DStream +end +Base.keys(params::DStreamParameters) = instances(LibZstd.ZSTD_dParameter) +Base.values(params::DStreamParameters) = (getParameter(params.dstream, param) for param in keys(params)) +Base.getindex(params::DStreamParameters, key::LibZstd.ZSTD_dParameter) = getParameter(params.dstream, key) +Base.setindex!(params::DStreamParameters, value, key::LibZstd.ZSTD_dParameter) = setParameter(params.dstream, value, key) +function Base.iterate(params::DStreamParameters, state=1) + ks = keys(params) + if state <= length(ks) + k = ks[state] + return (k => params[k], state+1) + else + return nothing + end +end +Base.length(::DStreamParameters) = length(instances(LibZstd.ZSTD_dParameter)) + +function getParameters(dstream::DStream) + DStreamParameters(dstream) +end + # Misc. functions # ---------------