-
Notifications
You must be signed in to change notification settings - Fork 143
Add bzip2, lz4, and zstd filters from HDF5Plugins.jl #875
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 5 commits
aa03f38
8e84992
6713689
297e51a
4d39a95
48b5334
5deb635
9b9430b
b349f57
cc92c6b
7303742
b3b4215
84c89fc
61fbb96
c4f68c3
351c7e1
c450eba
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,8 +1,15 @@ | ||
module H5Zblosc | ||
# port of https://github.com/Blosc/c-blosc/blob/3a668dcc9f61ad22b5c0a0ab45fe8dad387277fd/hdf5/blosc_filter.c (copyright 2010 Francesc Alted, license: MIT/expat) | ||
|
||
using ..API | ||
import Blosc | ||
import ..Filters: FILTERS, Filter, filterid, register_filter, FilterPipeline | ||
import ..Filters: filterid, filtername, encoder_present, decoder_present | ||
import ..Filters: set_local_func, set_local_cfunc, can_apply_func, can_apply_cfunc, filter_func, filter_cfunc, register_filter | ||
|
||
const FILTER_BLOSC = API.H5Z_filter_t(32001) # Filter ID registered with the HDF Group for Blosc | ||
export H5Z_FILTER_BLOSC, blosc_filter, BloscFilter | ||
|
||
const H5Z_FILTER_BLOSC = API.H5Z_filter_t(32001) # Filter ID registered with the HDF Group for Blosc | ||
const FILTER_BLOSC_VERSION = 2 | ||
const blosc_name = "blosc" | ||
|
||
|
@@ -12,7 +19,7 @@ function blosc_set_local(dcpl::API.hid_t, htype::API.hid_t, space::API.hid_t) | |
blosc_nelements = Ref{Csize_t}(length(blosc_values)) | ||
blosc_chunkdims = Vector{API.hsize_t}(undef,32) | ||
|
||
API.h5p_get_filter_by_id(dcpl, FILTER_BLOSC, blosc_flags, blosc_nelements, blosc_values, 0, C_NULL, C_NULL) | ||
API.h5p_get_filter_by_id(dcpl, H5Z_FILTER_BLOSC, blosc_flags, blosc_nelements, blosc_values, 0, C_NULL, C_NULL) | ||
flags = blosc_flags[] | ||
|
||
nelements = max(blosc_nelements[], 4) # First 4 slots reserved | ||
|
@@ -45,7 +52,7 @@ function blosc_set_local(dcpl::API.hid_t, htype::API.hid_t, space::API.hid_t) | |
blosc_values[3] = basetypesize | ||
blosc_values[4] = chunksize * htypesize # size of the chunk | ||
|
||
API.h5p_modify_filter(dcpl, FILTER_BLOSC, flags, nelements, blosc_values) | ||
API.h5p_modify_filter(dcpl, H5Z_FILTER_BLOSC, flags, nelements, blosc_values) | ||
|
||
return API.herr_t(1) | ||
end | ||
|
@@ -109,8 +116,8 @@ function register_blosc() | |
c_blosc_filter = @cfunction(blosc_filter, Csize_t, | ||
(Cuint, Csize_t, Ptr{Cuint}, Csize_t, | ||
Ptr{Csize_t}, Ptr{Ptr{Cvoid}})) | ||
API.h5z_register(API.H5Z_class_t(API.H5Z_CLASS_T_VERS, FILTER_BLOSC, 1, 1, pointer(blosc_name), C_NULL, c_blosc_set_local, c_blosc_filter)) | ||
|
||
API.h5z_register(API.H5Z_class_t(API.H5Z_CLASS_T_VERS, H5Z_FILTER_BLOSC, 1, 1, pointer(blosc_name), C_NULL, c_blosc_set_local, c_blosc_filter)) | ||
FILTERS[H5Z_FILTER_BLOSC] = BloscFilter | ||
return nothing | ||
end | ||
|
||
|
@@ -142,6 +149,17 @@ function BloscFilter(;level=5, shuffle=true, compressor="blosclz") | |
BloscFilter(0,0,0,0,level,shuffle,compcode) | ||
end | ||
|
||
filterid(::Type{BloscFilter}) = H5Z_FILTER_BLOSC | ||
filtername(::Type{BloscFilter}) = blosc_name | ||
set_local_func(::Type{BloscFilter}) = blosc_set_local | ||
set_local_cfunc(::Type{BloscFilter}) = @cfunction(blosc_set_local, API.herr_t, (API.hid_t,API.hid_t,API.hid_t)) | ||
filter_func(::Type{BloscFilter}) = blosc_filter | ||
filter_cfunc(::Type{BloscFilter}) = @cfunction(blosc_filter, Csize_t, | ||
(Cuint, Csize_t, Ptr{Cuint}, Csize_t, | ||
Ptr{Csize_t}, Ptr{Ptr{Cvoid}})) | ||
Comment on lines
+143
to
+145
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this still needed? Won't the generic one work? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Yes, this is useful since all the arguments here are constants and https://docs.julialang.org/en/v1/manual/calling-c-and-fortran-code/#Closure-cfunctions There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. ah right. maybe add a comment to that effect? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Added a comment to that effect in c450eba (still needs to be pushed to this branch) |
||
register_filter(::Type{BloscFilter}) = register_blosc() | ||
register_filter(::BloscFilter) = register_blosc() | ||
|
||
function Base.show(io::IO, blosc::BloscFilter) | ||
print(io, BloscFilter, | ||
"(level=", Int(blosc.level), | ||
|
@@ -150,9 +168,6 @@ function Base.show(io::IO, blosc::BloscFilter) | |
")") | ||
end | ||
|
||
filterid(::Type{BloscFilter}) = FILTER_BLOSC | ||
FILTERS[FILTER_BLOSC] = BloscFilter | ||
|
||
function Base.push!(f::FilterPipeline, blosc::BloscFilter) | ||
0 <= blosc.level <= 9 || throw(ArgumentError("blosc compression $(blosc.level) not in [0,9]")) | ||
ref = Ref(blosc) | ||
|
@@ -161,3 +176,8 @@ function Base.push!(f::FilterPipeline, blosc::BloscFilter) | |
end | ||
return f | ||
end | ||
|
||
precompile(register_filter, (BloscFilter,)) | ||
precompile(register_filter, (Type{BloscFilter},)) | ||
|
||
end # module H5Zblosc |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,208 @@ | ||
# The code below has been ported to Julia from the original C: | ||
# https://github.com/nexusformat/HDF5-External-Filter-Plugins/blob/master/BZIP2/src/H5Zbzip2.c | ||
# The filter function H5Z_filter_bzip2 was adopted from | ||
# PyTables http://www.pytables.org. | ||
# The plugin can be used with the HDF5 library vesrion 1.8.11+ to read | ||
# HDF5 datasets compressed with bzip2 created by PyTables. | ||
# See H5Zbzip2_LICENSE.txt for the license. | ||
|
||
# The following copyright and license applies to the Julia port itself. | ||
# Copyright © 2021 Mark Kittisopikul, Howard Hughes Medical Institute | ||
# Licensed under MIT License, see LICENSE | ||
module H5Zbzip2 | ||
|
||
using ..API | ||
using CodecBzip2 | ||
import CodecBzip2: libbzip2 | ||
import ..Filters: FILTERS, Filter, filterid, register_filter, FilterPipeline | ||
import ..Filters: filterid, filtername, encoder_present, decoder_present | ||
import ..Filters: set_local_func, set_local_cfunc, can_apply_func, can_apply_cfunc, filter_func, filter_cfunc | ||
|
||
export H5Z_FILTER_BZIP2, H5Z_filter_bzip2, Bzip2Filter | ||
|
||
|
||
const H5Z_FILTER_BZIP2 = API.H5Z_filter_t(307) | ||
const bzip2_name = "HDF5 bzip2 filter; see http://www.hdfgroup.org/services/contributions.html" | ||
|
||
function H5Z_filter_bzip2(flags::Cuint, cd_nelmts::Csize_t, | ||
cd_values::Ptr{Cuint}, nbytes::Csize_t, | ||
buf_size::Ptr{Csize_t}, buf::Ptr{Ptr{Cvoid}})::Csize_t | ||
outbuf = C_NULL | ||
outdatalen = Cuint(0) | ||
|
||
# Prepare the output buffer | ||
|
||
try | ||
|
||
if flags & API.H5Z_FLAG_REVERSE != 0 | ||
# Decompress | ||
|
||
outbuflen = nbytes * 3 + 1 | ||
outbuf = Libc.malloc(outbuflen) | ||
if outbuf == C_NULL | ||
error("H5Zbzip2: memory allocation failed for bzip2 decompression.") | ||
end | ||
|
||
stream = CodecBzip2.BZStream() | ||
# Just use default malloc and free | ||
stream.bzalloc = C_NULL | ||
stream.bzfree = C_NULL | ||
# BZ2_bzDecompressInit | ||
ret = CodecBzip2.decompress_init!(stream, 0, false) | ||
if ret != CodecBzip2.BZ_OK | ||
errror("H5Zbzip2: bzip2 decompress start failed with error $ret.") | ||
end | ||
|
||
stream.next_out = outbuf | ||
stream.avail_out = outbuflen | ||
stream.next_in = unsafe_load(buf) | ||
stream.avail_in = nbytes | ||
|
||
cont = true | ||
|
||
while cont | ||
# BZ2_bzDecompress | ||
ret = CodecBzip2.decompress!(stream) | ||
if ret < 0 | ||
error("H5Zbzip2: bzip2 decompression failed with error $ret.") | ||
end | ||
cont = ret != CodecBzip2.BZ_STREAM_END | ||
if cont && stream.avail_out == 0 | ||
# Grow the output buffer | ||
newbuflen = outbuflen * 2 | ||
newbuf = Libc.realloc(outbuf, newbuflen) | ||
if newbuf == C_NULL | ||
error("H5Zbzip2: memory allocation failed for bzip2 decompression.") | ||
end | ||
stream.next_out = newbuf + outbuflen | ||
stream.avail_out = outbuflen | ||
outbuf = newbuf | ||
outbuflen = newbuflen | ||
end | ||
end | ||
|
||
outdatalen = stream.total_out_lo32 | ||
# BZ2_bzDecompressEnd | ||
ret = CodecBzip2.decompress_end!(stream) | ||
if ret != CodecBzip2.BZ_OK | ||
error("H5Zbzip2: bzip2 compression end failed with error $ret.") | ||
end | ||
else | ||
# Compress data | ||
|
||
# Maybe not the same size as outdatalen | ||
odatalen = Cuint(0) | ||
blockSize100k = 9 | ||
|
||
# Get compression blocksize if present | ||
if cd_nelmts > 0 | ||
blockSize100k = unsafe_load(cd_values) | ||
if blockSize100k < 1 || blockSize100k > 9 | ||
error("H5Zbzip2: Invalid compression blocksize: $blockSize100k") | ||
end | ||
end | ||
|
||
# Prepare the output buffer | ||
outbuflen = nbytes + nbytes ÷ 100 + 600 # worse case (bzip2 docs) | ||
outbuf = Libc.malloc(outbuflen) | ||
@debug "Allocated" outbuflen outbuf | ||
if outbuf == C_NULL | ||
error("H5Zbzip2: Memory allocation failed for bzip2 compression") | ||
end | ||
|
||
# Compress data | ||
odatalen = outbuflen | ||
r_odatalen = Ref{Cuint}(odatalen) | ||
ret = BZ2_bzBuffToBuffCompress(outbuf, r_odatalen, unsafe_load(buf), nbytes, | ||
blockSize100k, 0, 0) | ||
outdatalen = r_odatalen[] | ||
if ret != CodecBzip2.BZ_OK | ||
error("H5Zbzip2: bzip2 compression failed with error $ret.") | ||
end | ||
end # if flags & API.H5Z_FLAG_REVERSE != 0 | ||
Libc.free(unsafe_load(buf)) | ||
unsafe_store!(buf, outbuf) | ||
unsafe_store!(buf_size, outbuflen) | ||
|
||
catch err | ||
# "In the case of failure, the return value is 0 (zero) and all pointer arguments are left unchanged." | ||
outdatalen = Csize_t(0) | ||
if outbuf != C_NULL | ||
Libc.free(outbuf) | ||
end | ||
@error "H5Zbzip2.jl Non-Fatal ERROR: " err | ||
display(stacktrace(catch_backtrace())) | ||
end # try - catch | ||
|
||
return Csize_t(outdatalen) | ||
end # function H5Z_filter_bzip2 | ||
|
||
function register_bzip2() | ||
c_bzip2_filter = @cfunction(H5Z_filter_bzip2, Csize_t, | ||
(Cuint, Csize_t, Ptr{Cuint}, Csize_t, | ||
Ptr{Csize_t}, Ptr{Ptr{Cvoid}})) | ||
API.h5z_register(API.H5Z_class_t( | ||
API.H5Z_CLASS_T_VERS, | ||
H5Z_FILTER_BZIP2, | ||
1, | ||
1, | ||
pointer(bzip2_name), | ||
C_NULL, | ||
C_NULL, | ||
c_bzip2_filter | ||
)) | ||
FILTERS[H5Z_FILTER_BZIP2] = Bzip2Filter | ||
return nothing | ||
end | ||
|
||
|
||
# Need stdcall for 32-bit Windows? | ||
function BZ2_bzBuffToBuffCompress(dest, destLen, source, sourceLen, blockSize100k, verbosity, workFactor) | ||
return ccall( | ||
(:BZ2_bzBuffToBuffCompress, libbzip2), | ||
Cint, | ||
(Ptr{Cchar}, Ptr{Cuint}, Ptr{Cchar}, Cuint, Cint, Cint, Cint), | ||
dest, | ||
destLen, | ||
source, | ||
sourceLen, | ||
blockSize100k, | ||
verbosity, | ||
workFactor | ||
) | ||
end | ||
|
||
function BZ2_bzBuffToBuffDecompress(dest, destLen, source, sourceLen, small, verbosity) | ||
return ccall( | ||
(:BZ2_bzBuffToBuffDecompress, libbzip2), | ||
Cint, | ||
(Ptr{Cchar}, Ptr{Cuint}, Ptr{Cchar}, Cuint, Cint, Cint), | ||
dest, | ||
destLen, | ||
source, | ||
sourceLen, | ||
small, | ||
verbosity | ||
) | ||
end | ||
|
||
# Filters Module | ||
|
||
struct Bzip2Filter <: Filter | ||
blockSize100k::Cuint | ||
end | ||
Bzip2Filter() = Bzip2Filter(9) | ||
|
||
filterid(::Type{Bzip2Filter}) = H5Z_FILTER_BZIP2 | ||
filtername(::Type{Bzip2Filter}) = bzip2_name | ||
filter_func(::Type{Bzip2Filter}) = H5Z_filter_bzip2 | ||
filter_cfunc(::Type{Bzip2Filter}) = @cfunction(H5Z_filter_bzip2, Csize_t, | ||
(Cuint, Csize_t, Ptr{Cuint}, Csize_t, | ||
Ptr{Csize_t}, Ptr{Ptr{Cvoid}})) | ||
register_filter(::Type{Bzip2Filter}) = register_bzip2() | ||
register_filter(::Bzip2Filter) = register_bzip2() | ||
|
||
precompile(register_filter, (Bzip2Filter,)) | ||
precompile(register_filter, (Type{Bzip2Filter},)) | ||
|
||
end # module H5Zbzip2 |
Uh oh!
There was an error while loading. Please reload this page.