"""
This example shows how to create and extend an unlimited dataset
with gzip compression.  The program first writes integers to a gzip
compressed dataset with dataspace dimensions of DIM0xDIM1, then
closes the file.  Next, it reopens the file, reads back the data,
outputs it to the screen, extends the dataset, and writes new data
to the extended portions of the dataset.  Finally it reopens the
file again, reads back the data, and outputs it to the screen.
"""
import sys

import numpy as np
import h5py

FILE = "h5ex_d_unlimgzip.h5"
DATASET = "DS1"

# Strings are handled very differently between python2 and python3.
if sys.hexversion >= 0x03000000:
    FILE = FILE.encode()
    DATASET = DATASET.encode()

DIM0 = 4
DIM1 = 7
EDIM0 = 6
EDIM1 = 10
CHUNK0 = 4
CHUNK1 = 4

def run():

    # Check if gzip compression is available and can be used for
    # both compression and decompression.  Normally we do not perform
    # error checking in these examples for the sake of clarity, but
    # in this case we will make an exception because this filter is
    # an optional part of the hdf5 library.
    if not h5py.h5z.filter_avail(h5py.h5z.FILTER_DEFLATE):
        raise RuntimeError("Gzip filter not available.")

    filter_info = h5py.h5z.get_filter_info(h5py.h5z.FILTER_DEFLATE)
    if ((filter_info & h5py.h5z.FILTER_CONFIG_ENCODE_ENABLED) & 
        (filter_info & h5py.h5z.FILTER_CONFIG_DECODE_ENABLED)):
        msg = "Gzip filter not available for encoding and decoding."
        raise RuntimeError(msg)

    # Initialize the data.
    wdata = np.zeros((DIM0, DIM1), dtype=np.int32)
    for i in range(DIM0):
        for j in range(DIM1):
            wdata[i][j] = i * j - j

    # Create a new file using the default properties.
    file = h5py.h5f.create(FILE)

    # Create the dataspace.  
    dims = (DIM0, DIM1)
    maxdims = (h5py.h5s.UNLIMITED, h5py.h5s.UNLIMITED)
    space = h5py.h5s.create_simple(dims, maxdims)

    # Create the dataset creation property list and set the chunk size, add
    # the compression filter.
    dcpl = h5py.h5p.create(h5py.h5p.DATASET_CREATE)
    chunk = (CHUNK0, CHUNK1)
    dcpl.set_chunk(chunk)
    dcpl.set_deflate(9)

    # Create the chunked dataset.
    dset = h5py.h5d.create(file, DATASET, h5py.h5t.STD_I32LE, space, dcpl)

    # Write the data to the dataset.
    dset.write(h5py.h5s.ALL, h5py.h5s.ALL, wdata)

    # Close and release resources.
    del dcpl
    del dset
    del space
    del file

    # Now we begin the read section of this example.
    # Open the file and dataset.
    file = h5py.h5f.open(FILE, h5py.h5f.ACC_RDWR)
    dset = h5py.h5d.open(file, DATASET)

    # Get the dataspace and allocate an array for reading.  Numpy makes this
    # MUCH easier than C.
    space = dset.get_space()
    dims = space.get_simple_extent_dims()
    rdata = np.zeros(dims, dtype=np.int32)

    # Read the data using the default properties.
    dset.read(h5py.h5s.ALL, h5py.h5s.ALL, rdata)
    print("\nDataset before extension:")
    print(rdata)

    # Extend the dataset.
    extdims = (EDIM0, EDIM1)
    dset.set_extent(extdims)

    # Retrieve the dataspace for the newly extended dataset.
    space = dset.get_space()

    # Initialize data for writing to the extended dataset.
    wdata = np.zeros((EDIM0, EDIM1), dtype=np.int32)
    for i in range(EDIM0):
        for j in range(EDIM1):
            wdata[i][j] = j

    # Select the entire dataspace, then subtract a hyperslab reflecting the
    # original dimensions from the selection.  The selection now contains
    # only the newly extended portions of the dataset.
    space.select_all()
    start = (0, 0)
    count = dims
    space.select_hyperslab(start, count, None, None, h5py.h5s.SELECT_NOTB)

    # Write to the extended dataset.
    dset.write(h5py.h5s.ALL, space, wdata)

    # Close and release resources.
    del dset
    del space
    del file

    # Now simply read back the data and echo to the screen.
    file = h5py.h5f.open(FILE)
    dset = h5py.h5d.open(file, DATASET)

    # Retrieve dataset creation property list.
    dcpl = dset.get_create_plist()

    # Retrieve and print the filter type.  We only retrieve the first
    # filter because we know we only added one filter.
    filter_type, flags, vals, name = dcpl.get_filter(0)

    # No NBIT or SCALEOFFSET filter, but there is something new, LZF.
    ddict = {h5py.h5z.FILTER_DEFLATE: "DEFLATE",
             h5py.h5z.FILTER_SHUFFLE: "SHUFFLE",
             h5py.h5z.FILTER_FLETCHER32: "FLETCHER32",
             h5py.h5z.FILTER_SZIP: "SZIP",
             h5py.h5z.FILTER_LZF: "LZF"}
    print("\nFilter type for %s is H5Z_%s" % (DATASET, ddict[filter_type])) 

    # Get the dataspace and allocate an array for reading.
    space = dset.get_space()
    dims = space.get_simple_extent_dims()
    rdata = np.zeros(dims, dtype=np.int32)

    # Read the data using the default properties.
    dset.read(h5py.h5s.ALL, h5py.h5s.ALL, rdata)
    print("\nDataset after extension:")
    print(rdata)

    # Close and release resources.
    del dset
    del space
    del file


if __name__ == "__main__":
    run()