Skip to content
Closed
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,9 +1,12 @@
name = "HDF5"
uuid = "f67ccb44-e63f-5c2f-98bd-6dc0ccc4ba2f"
version = "0.15.5"
version = "0.16.0"

[deps]
Blosc = "a74b3585-a348-5f62-a45c-50e91977d574"
CodecBzip2 = "523fee87-0ab8-5b00-afb7-3ecf72e48cfd"
CodecLz4 = "5ba52731-8f18-5e0d-9241-30f10d1ec561"
CodecZstd = "6b39b394-51ab-5f42-8807-6242bab2b4c2"
Compat = "34da2185-b29b-5c13-b0c7-acf172513d20"
HDF5_jll = "0234f1f7-429e-5d53-9886-15a909be8d59"
Libdl = "8f399da3-3557-5675-b5ff-fb832c97cbdb"
Expand All @@ -12,6 +15,9 @@ Random = "9a3f8284-a2c9-5f02-9a11-845980a1fd5c"
Requires = "ae029012-a4dd-5104-9daa-d747884805df"

[compat]
CodecBzip2 = "0.7"
CodecLz4 = "0.4"
CodecZstd = "0.7"
Blosc = "0.7.1"
Compat = "3.1.0"
HDF5_jll = "~1.10.5, ~1.12.0"
Expand Down
2 changes: 1 addition & 1 deletion src/HDF5.jl
Original file line number Diff line number Diff line change
Expand Up @@ -1593,7 +1593,7 @@ function __init__()
ENV["HDF5_USE_FILE_LOCKING"] = "FALSE"
end

Filters.register_blosc()
Filters.register_filters()

# use our own error handling machinery (i.e. turn off automatic error printing)
API.h5e_set_auto(API.H5E_DEFAULT, C_NULL, C_NULL)
Expand Down
36 changes: 28 additions & 8 deletions src/filters/blosc.jl → src/filters/H5Zblosc.jl
Original file line number Diff line number Diff line change
@@ -1,8 +1,15 @@
module H5Zblosc
# port of https://github.com/Blosc/c-blosc/blob/3a668dcc9f61ad22b5c0a0ab45fe8dad387277fd/hdf5/blosc_filter.c (copyright 2010 Francesc Alted, license: MIT/expat)

using ..API
import Blosc
import ..Filters: FILTERS, Filter, filterid, register_filter, FilterPipeline
import ..Filters: filterid, filtername, encoder_present, decoder_present
import ..Filters: set_local_func, set_local_cfunc, can_apply_func, can_apply_cfunc, filter_func, filter_cfunc, register_filter

const FILTER_BLOSC = API.H5Z_filter_t(32001) # Filter ID registered with the HDF Group for Blosc
export H5Z_FILTER_BLOSC, blosc_filter, BloscFilter

const H5Z_FILTER_BLOSC = API.H5Z_filter_t(32001) # Filter ID registered with the HDF Group for Blosc
const FILTER_BLOSC_VERSION = 2
const blosc_name = "blosc"

Expand All @@ -12,7 +19,7 @@ function blosc_set_local(dcpl::API.hid_t, htype::API.hid_t, space::API.hid_t)
blosc_nelements = Ref{Csize_t}(length(blosc_values))
blosc_chunkdims = Vector{API.hsize_t}(undef,32)

API.h5p_get_filter_by_id(dcpl, FILTER_BLOSC, blosc_flags, blosc_nelements, blosc_values, 0, C_NULL, C_NULL)
API.h5p_get_filter_by_id(dcpl, H5Z_FILTER_BLOSC, blosc_flags, blosc_nelements, blosc_values, 0, C_NULL, C_NULL)
flags = blosc_flags[]

nelements = max(blosc_nelements[], 4) # First 4 slots reserved
Expand Down Expand Up @@ -45,7 +52,7 @@ function blosc_set_local(dcpl::API.hid_t, htype::API.hid_t, space::API.hid_t)
blosc_values[3] = basetypesize
blosc_values[4] = chunksize * htypesize # size of the chunk

API.h5p_modify_filter(dcpl, FILTER_BLOSC, flags, nelements, blosc_values)
API.h5p_modify_filter(dcpl, H5Z_FILTER_BLOSC, flags, nelements, blosc_values)

return API.herr_t(1)
end
Expand Down Expand Up @@ -109,8 +116,8 @@ function register_blosc()
c_blosc_filter = @cfunction(blosc_filter, Csize_t,
(Cuint, Csize_t, Ptr{Cuint}, Csize_t,
Ptr{Csize_t}, Ptr{Ptr{Cvoid}}))
API.h5z_register(API.H5Z_class_t(API.H5Z_CLASS_T_VERS, FILTER_BLOSC, 1, 1, pointer(blosc_name), C_NULL, c_blosc_set_local, c_blosc_filter))

API.h5z_register(API.H5Z_class_t(API.H5Z_CLASS_T_VERS, H5Z_FILTER_BLOSC, 1, 1, pointer(blosc_name), C_NULL, c_blosc_set_local, c_blosc_filter))
FILTERS[H5Z_FILTER_BLOSC] = BloscFilter
return nothing
end

Expand Down Expand Up @@ -142,6 +149,17 @@ function BloscFilter(;level=5, shuffle=true, compressor="blosclz")
BloscFilter(0,0,0,0,level,shuffle,compcode)
end

filterid(::Type{BloscFilter}) = H5Z_FILTER_BLOSC
filtername(::Type{BloscFilter}) = blosc_name
set_local_func(::Type{BloscFilter}) = blosc_set_local
set_local_cfunc(::Type{BloscFilter}) = @cfunction(blosc_set_local, API.herr_t, (API.hid_t,API.hid_t,API.hid_t))
filter_func(::Type{BloscFilter}) = blosc_filter
filter_cfunc(::Type{BloscFilter}) = @cfunction(blosc_filter, Csize_t,
(Cuint, Csize_t, Ptr{Cuint}, Csize_t,
Ptr{Csize_t}, Ptr{Ptr{Cvoid}}))
Comment on lines +143 to +145
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this still needed? Won't the generic one work?

Copy link
Member Author

@mkitti mkitti Dec 7, 2021

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, this is useful since all the arguments here are constants and @cfunction will return a pointer rather than a closure CFunction. The closure depends on LLVM trampolines, which are not available on all platforms.

https://docs.julialang.org/en/v1/manual/calling-c-and-fortran-code/#Closure-cfunctions

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah right. maybe add a comment to that effect?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added a comment to that effect in c450eba (still needs to be pushed to this branch)

register_filter(::Type{BloscFilter}) = register_blosc()
register_filter(::BloscFilter) = register_blosc()

function Base.show(io::IO, blosc::BloscFilter)
print(io, BloscFilter,
"(level=", Int(blosc.level),
Expand All @@ -150,9 +168,6 @@ function Base.show(io::IO, blosc::BloscFilter)
")")
end

filterid(::Type{BloscFilter}) = FILTER_BLOSC
FILTERS[FILTER_BLOSC] = BloscFilter

function Base.push!(f::FilterPipeline, blosc::BloscFilter)
0 <= blosc.level <= 9 || throw(ArgumentError("blosc compression $(blosc.level) not in [0,9]"))
ref = Ref(blosc)
Expand All @@ -161,3 +176,8 @@ function Base.push!(f::FilterPipeline, blosc::BloscFilter)
end
return f
end

precompile(register_filter, (BloscFilter,))
precompile(register_filter, (Type{BloscFilter},))

end # module H5Zblosc
208 changes: 208 additions & 0 deletions src/filters/H5Zbzip2.jl
Original file line number Diff line number Diff line change
@@ -0,0 +1,208 @@
# The code below has been ported to Julia from the original C:
# https://github.com/nexusformat/HDF5-External-Filter-Plugins/blob/master/BZIP2/src/H5Zbzip2.c
# The filter function H5Z_filter_bzip2 was adopted from
# PyTables http://www.pytables.org.
# The plugin can be used with the HDF5 library vesrion 1.8.11+ to read
# HDF5 datasets compressed with bzip2 created by PyTables.
# See H5Zbzip2_LICENSE.txt for the license.

# The following copyright and license applies to the Julia port itself.
# Copyright © 2021 Mark Kittisopikul, Howard Hughes Medical Institute
# Licensed under MIT License, see LICENSE
module H5Zbzip2

using ..API
using CodecBzip2
import CodecBzip2: libbzip2
import ..Filters: FILTERS, Filter, filterid, register_filter, FilterPipeline
import ..Filters: filterid, filtername, encoder_present, decoder_present
import ..Filters: set_local_func, set_local_cfunc, can_apply_func, can_apply_cfunc, filter_func, filter_cfunc

export H5Z_FILTER_BZIP2, H5Z_filter_bzip2, Bzip2Filter


const H5Z_FILTER_BZIP2 = API.H5Z_filter_t(307)
const bzip2_name = "HDF5 bzip2 filter; see http://www.hdfgroup.org/services/contributions.html"

function H5Z_filter_bzip2(flags::Cuint, cd_nelmts::Csize_t,
cd_values::Ptr{Cuint}, nbytes::Csize_t,
buf_size::Ptr{Csize_t}, buf::Ptr{Ptr{Cvoid}})::Csize_t
outbuf = C_NULL
outdatalen = Cuint(0)

# Prepare the output buffer

try

if flags & API.H5Z_FLAG_REVERSE != 0
# Decompress

outbuflen = nbytes * 3 + 1
outbuf = Libc.malloc(outbuflen)
if outbuf == C_NULL
error("H5Zbzip2: memory allocation failed for bzip2 decompression.")
end

stream = CodecBzip2.BZStream()
# Just use default malloc and free
stream.bzalloc = C_NULL
stream.bzfree = C_NULL
# BZ2_bzDecompressInit
ret = CodecBzip2.decompress_init!(stream, 0, false)
if ret != CodecBzip2.BZ_OK
errror("H5Zbzip2: bzip2 decompress start failed with error $ret.")
end

stream.next_out = outbuf
stream.avail_out = outbuflen
stream.next_in = unsafe_load(buf)
stream.avail_in = nbytes

cont = true

while cont
# BZ2_bzDecompress
ret = CodecBzip2.decompress!(stream)
if ret < 0
error("H5Zbzip2: bzip2 decompression failed with error $ret.")
end
cont = ret != CodecBzip2.BZ_STREAM_END
if cont && stream.avail_out == 0
# Grow the output buffer
newbuflen = outbuflen * 2
newbuf = Libc.realloc(outbuf, newbuflen)
if newbuf == C_NULL
error("H5Zbzip2: memory allocation failed for bzip2 decompression.")
end
stream.next_out = newbuf + outbuflen
stream.avail_out = outbuflen
outbuf = newbuf
outbuflen = newbuflen
end
end

outdatalen = stream.total_out_lo32
# BZ2_bzDecompressEnd
ret = CodecBzip2.decompress_end!(stream)
if ret != CodecBzip2.BZ_OK
error("H5Zbzip2: bzip2 compression end failed with error $ret.")
end
else
# Compress data

# Maybe not the same size as outdatalen
odatalen = Cuint(0)
blockSize100k = 9

# Get compression blocksize if present
if cd_nelmts > 0
blockSize100k = unsafe_load(cd_values)
if blockSize100k < 1 || blockSize100k > 9
error("H5Zbzip2: Invalid compression blocksize: $blockSize100k")
end
end

# Prepare the output buffer
outbuflen = nbytes + nbytes ÷ 100 + 600 # worse case (bzip2 docs)
outbuf = Libc.malloc(outbuflen)
@debug "Allocated" outbuflen outbuf
if outbuf == C_NULL
error("H5Zbzip2: Memory allocation failed for bzip2 compression")
end

# Compress data
odatalen = outbuflen
r_odatalen = Ref{Cuint}(odatalen)
ret = BZ2_bzBuffToBuffCompress(outbuf, r_odatalen, unsafe_load(buf), nbytes,
blockSize100k, 0, 0)
outdatalen = r_odatalen[]
if ret != CodecBzip2.BZ_OK
error("H5Zbzip2: bzip2 compression failed with error $ret.")
end
end # if flags & API.H5Z_FLAG_REVERSE != 0
Libc.free(unsafe_load(buf))
unsafe_store!(buf, outbuf)
unsafe_store!(buf_size, outbuflen)

catch err
# "In the case of failure, the return value is 0 (zero) and all pointer arguments are left unchanged."
outdatalen = Csize_t(0)
if outbuf != C_NULL
Libc.free(outbuf)
end
@error "H5Zbzip2.jl Non-Fatal ERROR: " err
display(stacktrace(catch_backtrace()))
end # try - catch

return Csize_t(outdatalen)
end # function H5Z_filter_bzip2

function register_bzip2()
c_bzip2_filter = @cfunction(H5Z_filter_bzip2, Csize_t,
(Cuint, Csize_t, Ptr{Cuint}, Csize_t,
Ptr{Csize_t}, Ptr{Ptr{Cvoid}}))
API.h5z_register(API.H5Z_class_t(
API.H5Z_CLASS_T_VERS,
H5Z_FILTER_BZIP2,
1,
1,
pointer(bzip2_name),
C_NULL,
C_NULL,
c_bzip2_filter
))
FILTERS[H5Z_FILTER_BZIP2] = Bzip2Filter
return nothing
end


# Need stdcall for 32-bit Windows?
function BZ2_bzBuffToBuffCompress(dest, destLen, source, sourceLen, blockSize100k, verbosity, workFactor)
return ccall(
(:BZ2_bzBuffToBuffCompress, libbzip2),
Cint,
(Ptr{Cchar}, Ptr{Cuint}, Ptr{Cchar}, Cuint, Cint, Cint, Cint),
dest,
destLen,
source,
sourceLen,
blockSize100k,
verbosity,
workFactor
)
end

function BZ2_bzBuffToBuffDecompress(dest, destLen, source, sourceLen, small, verbosity)
return ccall(
(:BZ2_bzBuffToBuffDecompress, libbzip2),
Cint,
(Ptr{Cchar}, Ptr{Cuint}, Ptr{Cchar}, Cuint, Cint, Cint),
dest,
destLen,
source,
sourceLen,
small,
verbosity
)
end

# Filters Module

struct Bzip2Filter <: Filter
blockSize100k::Cuint
end
Bzip2Filter() = Bzip2Filter(9)

filterid(::Type{Bzip2Filter}) = H5Z_FILTER_BZIP2
filtername(::Type{Bzip2Filter}) = bzip2_name
filter_func(::Type{Bzip2Filter}) = H5Z_filter_bzip2
filter_cfunc(::Type{Bzip2Filter}) = @cfunction(H5Z_filter_bzip2, Csize_t,
(Cuint, Csize_t, Ptr{Cuint}, Csize_t,
Ptr{Csize_t}, Ptr{Ptr{Cvoid}}))
register_filter(::Type{Bzip2Filter}) = register_bzip2()
register_filter(::Bzip2Filter) = register_bzip2()

precompile(register_filter, (Bzip2Filter,))
precompile(register_filter, (Type{Bzip2Filter},))

end # module H5Zbzip2
Loading