001/*****************************************************************************
002 * Copyright by The HDF Group.                                               *
003 * Copyright by the Board of Trustees of the University of Illinois.         *
004 * All rights reserved.                                                      *
005 *                                                                           *
006 * This file is part of the HDF Java Products distribution.                  *
007 * The full copyright notice, including terms governing use, modification,   *
008 * and redistribution, is contained in the files COPYING and Copyright.html. *
009 * COPYING can be found at the root of the source code distribution tree.    *
010 * Or, see https://support.hdfgroup.org/products/licenses.html               *
011 * If you do not have access to either file, you may request a copy from     *
012 * help@hdfgroup.org.                                                        *
013 ****************************************************************************/
014
015package hdf.object;
016
017import java.lang.reflect.Array;
018import java.util.List;
019
020/**
021 * The abstract class provides general APIs to create and manipulate dataset
022 * objects, and retrieve dataset properties, datatype and dimension sizes.
023 * <p>
024 * This class provides two convenient functions, read()/write(), to read/write
025 * data values. Reading/writing data may take many library calls if we use the
026 * library APIs directly. The read() and write functions hide all the details of
027 * these calls from users.
028 * <p>
029 * For more details on dataset,
030 * see <b> <a href="https://support.hdfgroup.org/HDF5/doc/UG/HDF5_Users_Guide-Responsive%20HTML5/index.html">HDF5 User's Guide</a> </b>
031 * <p>
032 *
033 * @see hdf.object.ScalarDS
034 * @see hdf.object.CompoundDS
035 *
036 * @version 1.1 9/4/2007
037 * @author Peter X. Cao
038 */
039public abstract class Dataset extends HObject implements MetaDataContainer, DataFormat {
040    private static final long serialVersionUID    = -3360885430038261178L;
041
042    private static final org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger(Dataset.class);
043
044    /**
045     * The memory buffer that holds the raw data array of the dataset.
046     */
047    protected transient Object          data;
048
049    /**
050     * The number of dimensions of the dataset.
051     */
052    protected int             rank;
053
054    /**
055     * The current dimension sizes of the dataset
056     */
057    protected long[]          dims;
058
059    /**
060     * The max dimension sizes of the dataset
061     */
062    protected long[]          maxDims;
063
064    /**
065     * Array that contains the number of data points selected (for read/write)
066     * in each dimension.
067     * <p>
068     * The selected size must be less than or equal to the current dimension size.
069     * A subset of a rectangle selection is defined by the starting position and
070     * selected sizes.
071     * <p>
072     * For example, if a 4 X 5 dataset is as follows:
073     *
074     * <pre>
075     *     0,  1,  2,  3,  4
076     *    10, 11, 12, 13, 14
077     *    20, 21, 22, 23, 24
078     *    30, 31, 32, 33, 34
079     * long[] dims = {4, 5};
080     * long[] startDims = {1, 2};
081     * long[] selectedDims = {3, 3};
082     * then the following subset is selected by the startDims and selectedDims above:
083     *     12, 13, 14
084     *     22, 23, 24
085     *     32, 33, 34
086     * </pre>
087     */
088    protected long[]          selectedDims;
089
090    /**
091     * The starting position of each dimension of a selected subset. With both
092     * the starting position and selected sizes, the subset of a rectangle
093     * selection is fully defined.
094     */
095    protected long[]          startDims;
096
097    /**
098     * Array that contains the indices of the dimensions selected for display.
099     * <p>
100     * <B>selectedIndex[] is provided for two purposes:</B>
101     * <OL>
102     * <LI>
103     * selectedIndex[] is used to indicate the order of dimensions for display,
104     * i.e. selectedIndex[0] = row, selectedIndex[1] = column and
105     * selectedIndex[2] = depth. For example, for a four dimension dataset, if
106     * selectedIndex[] is {1, 2, 3}, then dim[1] is selected as row index,
107     * dim[2] is selected as column index and dim[3] is selected as depth index.
108     * <LI>
109     * selectedIndex[] is also used to select dimensions for display for
110     * datasets with three or more dimensions. We assume that applications such
111     * as HDFView can only display data up to three dimensions (a 2D
112     * spreadsheet/image with a third dimension that the 2D spreadsheet/image is
113     * cut from). For datasets with more than three dimensions, we need
114     * selectedIndex[] to store which three dimensions are chosen for display.
115     * For example, for a four dimension dataset, if selectedIndex[] = {1, 2, 3},
116     * then dim[1] is selected as row index, dim[2] is selected as column index
117     * and dim[3] is selected as depth index. dim[0] is not selected. Its
118     * location is fixed at 0 by default.
119     * </OL>
120     */
121    protected final int[]     selectedIndex;
122
123    /**
124     * The number of elements to move from the start location in each dimension.
125     * For example, if selectedStride[0] = 2, every other data point is selected
126     * along dim[0].
127     */
128    protected long[]          selectedStride;
129
130    /**
131     * The array of dimension sizes for a chunk.
132     */
133    protected long[]          chunkSize;
134
135    /** The compression information. */
136    protected StringBuilder   compression;
137    public static final String COMPRESSION_GZIP_TXT = "GZIP: level = ";
138
139    /** The filters information. */
140    protected StringBuilder   filters;
141
142    /** The storage layout information. */
143    protected StringBuilder   storageLayout;
144
145    /** The storage information. */
146    protected StringBuilder   storage;
147
148    /** The datatype object of the dataset. */
149    protected Datatype        datatype;
150
151    /**
152     * Array of strings that represent the dimension names. It is null if dimension names do not exist.
153     */
154    protected String[]        dimNames;
155
156    /** Flag to indicate if the byte[] array is converted to strings */
157    protected boolean         convertByteToString = true;
158
159    /** Flag to indicate if data values are loaded into memory. */
160    protected boolean         isDataLoaded        = false;
161
162    /** Flag to indicate if this dataset has been initialized */
163    protected boolean         inited = false;
164
165    /** The number of data points in the memory buffer. */
166    protected long            nPoints             = 1;
167
168    /**
169     * The data buffer that contains the raw data directly reading from file
170     * (before any data conversion).
171     */
172    protected transient Object originalBuf = null;
173
174    /**
175     * The array that holds the converted data of unsigned C-type integers.
176     * <p>
177     * For example, Suppose that the original data is an array of unsigned
178     * 16-bit short integers. Since Java does not support unsigned integer, the
179     * data is converted to an array of 32-bit singed integer. In that case, the
180     * converted buffer is the array of 32-bit singed integer.
181     */
182    protected transient Object convertedBuf = null;
183
184    /**
185     * Constructs a Dataset object with a given file, name and path.
186     *
187     * @param theFile
188     *            the file that contains the dataset.
189     * @param dsName
190     *            the name of the Dataset, e.g. "dset1".
191     * @param dsPath
192     *            the full group path of this Dataset, e.g. "/arrays/".
193     */
194    public Dataset(FileFormat theFile, String dsName, String dsPath) {
195        this(theFile, dsName, dsPath, null);
196    }
197
198    /**
199     * @deprecated Not for public use in the future. <br>
200     *             Using {@link #Dataset(FileFormat, String, String)}
201     *
202     * @param theFile
203     *            the file that contains the dataset.
204     * @param dsName
205     *            the name of the Dataset, e.g. "dset1".
206     * @param dsPath
207     *            the full group path of this Dataset, e.g. "/arrays/".
208     * @param oid
209     *            the oid of this Dataset.
210     */
211    @Deprecated
212    public Dataset(FileFormat theFile, String dsName, String dsPath, long[] oid) {
213        super(theFile, dsName, dsPath, oid);
214
215        datatype = null;
216        rank = -1;
217        data = null;
218        dims = null;
219        maxDims = null;
220        selectedDims = null;
221        startDims = null;
222        selectedStride = null;
223        chunkSize = null;
224        compression = new StringBuilder("NONE");
225        filters = new StringBuilder("NONE");
226        storageLayout = new StringBuilder("NONE");
227        storage = new StringBuilder("NONE");
228        dimNames = null;
229
230        selectedIndex = new int[3];
231        selectedIndex[0] = 0;
232        selectedIndex[1] = 1;
233        selectedIndex[2] = 2;
234    }
235
236    /**
237     * Clears memory held by the dataset, such as the data buffer.
238     */
239    @SuppressWarnings("rawtypes")
240    public void clear() {
241        if (data != null) {
242            if (data instanceof List) {
243                ((List) data).clear();
244            }
245            data = null;
246            originalBuf = null;
247            convertedBuf = null;
248        }
249        isDataLoaded = false;
250    }
251
252    /**
253     * Returns the rank (number of dimensions) of the dataset.
254     *
255     * @return the number of dimensions of the dataset.
256     */
257    @Override
258    public final int getRank() {
259        if (!inited)
260            init();
261
262        return rank;
263    }
264
265    /**
266     * Returns the array that contains the dimension sizes of the dataset.
267     *
268     * @return the dimension sizes of the dataset.
269     */
270    @Override
271    public final long[] getDims() {
272        if (!inited)
273            init();
274
275        return dims;
276    }
277
278    /**
279     * Returns the array that contains the max dimension sizes of the dataset.
280     *
281     * @return the max dimension sizes of the dataset.
282     */
283    public final long[] getMaxDims() {
284        if (!inited) init();
285
286        if (maxDims == null) return dims;
287
288        return maxDims;
289    }
290
291    /**
292     * Returns the dimension sizes of the selected subset.
293     * <p>
294     * The SelectedDims is the number of data points of the selected subset.
295     * Applications can use this array to change the size of selected subset.
296     *
297     * The selected size must be less than or equal to the current dimension size.
298     * Combined with the starting position, selected sizes and stride, the
299     * subset of a rectangle selection is fully defined.
300     * <p>
301     * For example, if a 4 X 5 dataset is as follows:
302     *
303     * <pre>
304     *     0,  1,  2,  3,  4
305     *    10, 11, 12, 13, 14
306     *    20, 21, 22, 23, 24
307     *    30, 31, 32, 33, 34
308     * long[] dims = {4, 5};
309     * long[] startDims = {1, 2};
310     * long[] selectedDims = {3, 3};
311     * long[] selectedStride = {1, 1};
312     * then the following subset is selected by the startDims and selectedDims
313     *     12, 13, 14
314     *     22, 23, 24
315     *     32, 33, 34
316     * </pre>
317     *
318     * @return the dimension sizes of the selected subset.
319     */
320    @Override
321    public final long[] getSelectedDims() {
322        if (!inited) init();
323
324        return selectedDims;
325    }
326
327    /**
328     * Returns the starting position of a selected subset.
329     * <p>
330     * Applications can use this array to change the starting position of a
331     * selection. Combined with the selected dimensions, selected sizes and
332     * stride, the subset of a rectangle selection is fully defined.
333     * <p>
334     * For example, if a 4 X 5 dataset is as follows:
335     *
336     * <pre>
337     *     0,  1,  2,  3,  4
338     *    10, 11, 12, 13, 14
339     *    20, 21, 22, 23, 24
340     *    30, 31, 32, 33, 34
341     * long[] dims = {4, 5};
342     * long[] startDims = {1, 2};
343     * long[] selectedDims = {3, 3};
344     * long[] selectedStride = {1, 1};
345     * then the following subset is selected by the startDims and selectedDims
346     *     12, 13, 14
347     *     22, 23, 24
348     *     32, 33, 34
349     * </pre>
350     *
351     * @return the starting position of a selected subset.
352     */
353    @Override
354    public final long[] getStartDims() {
355        if (!inited) init();
356
357        return startDims;
358    }
359
360    /**
361     * Returns the selectedStride of the selected dataset.
362     * <p>
363     * Applications can use this array to change how many elements to move in
364     * each dimension.
365     *
366     * Combined with the starting position and selected sizes, the subset of a
367     * rectangle selection is defined.
368     * <p>
369     * For example, if a 4 X 5 dataset is as follows:
370     *
371     * <pre>
372     *     0,  1,  2,  3,  4
373     *    10, 11, 12, 13, 14
374     *    20, 21, 22, 23, 24
375     *    30, 31, 32, 33, 34
376     * long[] dims = {4, 5};
377     * long[] startDims = {0, 0};
378     * long[] selectedDims = {2, 2};
379     * long[] selectedStride = {2, 3};
380     * then the following subset is selected by the startDims and selectedDims
381     *     0,   3
382     *     20, 23
383     * </pre>
384     *
385     * @return the selectedStride of the selected dataset.
386     */
387    @Override
388    public final long[] getStride() {
389        if (!inited) init();
390
391        if (rank <= 0) {
392            return null;
393        }
394
395        if (selectedStride == null) {
396            selectedStride = new long[rank];
397            for (int i = 0; i < rank; i++) {
398                selectedStride[i] = 1;
399            }
400        }
401
402        return selectedStride;
403    }
404
405    /**
406     * Sets the flag that indicates if a byte array is converted to a string
407     * array.
408     * <p>
409     * In a string dataset, the raw data from file is stored in a byte array. By
410     * default, this byte array is converted to an array of strings. For a large
411     * dataset (e.g. more than one million strings), the conversion takes a long
412     * time and requires a lot of memory space to store the strings. In some
413     * applications, such a conversion can be delayed. For example, A GUI
414     * application may convert only the part of the strings that is visible to the
415     * users, not the entire data array.
416     * <p>
417     * setConvertByteToString(boolean b) allows users to set the flag so that
418     * applications can choose to perform the byte-to-string conversion or not.
419     * If the flag is set to false, the getData() returns an array of byte
420     * instead of an array of strings.
421     *
422     * @param b
423     *            convert bytes to strings if b is true; otherwise, if false, do
424     *            not convert bytes to strings.
425     */
426    public final void setConvertByteToString(boolean b) {
427        convertByteToString = b;
428    }
429
430    /**
431     * Returns the flag that indicates if a byte array is converted to a string
432     * array.
433     *
434     * @return true if byte array is converted to string; otherwise, returns
435     *         false if there is no conversion.
436     */
437    public final boolean getConvertByteToString() {
438        return convertByteToString;
439    }
440
441    /**
442     * Reads the raw data of the dataset from file to a byte array.
443     * <p>
444     * readBytes() reads raw data to an array of bytes instead of array of its
445     * datatype. For example, for a one-dimension 32-bit integer dataset of
446     * size 5, readBytes() returns a byte array of size 20 instead of an
447     * int array of 5.
448     * <p>
449     * readBytes() can be used to copy data from one dataset to another
450     * efficiently because the raw data is not converted to its native type, it
451     * saves memory space and CPU time.
452     *
453     * @return the byte array of the raw data.
454     *
455     * @throws Exception if data can not be read
456     */
457    public abstract byte[] readBytes() throws Exception;
458
459    /**
460     * Writes the memory buffer of this dataset to file.
461     *
462     * @throws Exception if buffer can not be written
463     */
464    @Override
465    public final void write() throws Exception {
466        if (data != null) {
467            write(data);
468        }
469    }
470
471    /**
472     * Creates a new dataset and writes the data buffer to the new dataset.
473     * <p>
474     * This function allows applications to create a new dataset for a given
475     * data buffer. For example, users can select a specific interesting part
476     * from a large image and create a new image with the selection.
477     * <p>
478     * The new dataset retains the datatype and dataset creation properties of
479     * this dataset.
480     *
481     * @param pgroup
482     *            the group which the dataset is copied to.
483     * @param name
484     *            the name of the new dataset.
485     * @param dims
486     *            the dimension sizes of the the new dataset.
487     * @param data
488     *            the data values of the subset to be copied.
489     *
490     * @return the new dataset.
491     *
492     * @throws Exception if dataset can not be copied
493     */
494    public abstract Dataset copy(Group pgroup, String name, long[] dims, Object data) throws Exception;
495
496    @Override
497    public final boolean isInited() {
498        return inited;
499    }
500
501    /**
502     * Returns the data buffer of the dataset in memory.
503     * <p>
504     * If data is already loaded into memory, returns the data; otherwise, calls
505     * read() to read data from file into a memory buffer and returns the memory
506     * buffer.
507     * <p>
508     * By default, the whole dataset is read into memory. Users can also select
509     * a subset to read. Subsetting is done in an implicit way.
510     * <p>
511     * <b>How to Select a Subset</b>
512     * <p>
513     * A selection is specified by three arrays: start, stride and count.
514     * <ol>
515     * <li>start: offset of a selection
516     * <li>stride: determines how many elements to move in each dimension
517     * <li>count: number of elements to select in each dimension
518     * </ol>
519     * getStartDims(), getStride() and getSelectedDims() returns the start,
520     * stride and count arrays respectively. Applications can make a selection
521     * by changing the values of the arrays.
522     * <p>
523     * The following example shows how to make a subset. In the example, the
524     * dataset is a 4-dimensional array of [200][100][50][10], i.e. dims[0]=200;
525     * dims[1]=100; dims[2]=50; dims[3]=10; <br>
526     * We want to select every other data point in dims[1] and dims[2]
527     *
528     * <pre>
529     * int rank = dataset.getRank(); // number of dimensions of the dataset
530     * long[] dims = dataset.getDims(); // the dimension sizes of the dataset
531     * long[] selected = dataset.getSelectedDims(); // the selected size of the dataet
532     * long[] start = dataset.getStartDims(); // the offset of the selection
533     * long[] stride = dataset.getStride(); // the stride of the dataset
534     * int[] selectedIndex = dataset.getSelectedIndex(); // the selected dimensions for display
535     *
536     * // select dim1 and dim2 as 2D data for display,and slice through dim0
537     * selectedIndex[0] = 1;
538     * selectedIndex[1] = 2;
539     * selectedIndex[1] = 0;
540     *
541     * // reset the selection arrays
542     * for (int i = 0; i &lt; rank; i++) {
543     *     start[i] = 0;
544     *     selected[i] = 1;
545     *     stride[i] = 1;
546     * }
547     *
548     * // set stride to 2 on dim1 and dim2 so that every other data point is
549     * // selected.
550     * stride[1] = 2;
551     * stride[2] = 2;
552     *
553     * // set the selection size of dim1 and dim2
554     * selected[1] = dims[1] / stride[1];
555     * selected[2] = dims[1] / stride[2];
556     *
557     * // when dataset.getData() is called, the selection above will be used since
558     * // the dimension arrays are passed by reference. Changes of these arrays
559     * // outside the dataset object directly change the values of these array
560     * // in the dataset object.
561     * </pre>
562     * <p>
563     * For ScalarDS, the memory data buffer is a one-dimensional array of byte,
564     * short, int, float, double or String type based on the datatype of the
565     * dataset.
566     * <p>
567     * For CompoundDS, the memory data object is an java.util.List object. Each
568     * element of the list is a data array that corresponds to a compound field.
569     * <p>
570     * For example, if compound dataset "comp" has the following nested
571     * structure, and member datatypes
572     *
573     * <pre>
574     * comp --&gt; m01 (int)
575     * comp --&gt; m02 (float)
576     * comp --&gt; nest1 --&gt; m11 (char)
577     * comp --&gt; nest1 --&gt; m12 (String)
578     * comp --&gt; nest1 --&gt; nest2 --&gt; m21 (long)
579     * comp --&gt; nest1 --&gt; nest2 --&gt; m22 (double)
580     * </pre>
581     *
582     * getData() returns a list of six arrays: {int[], float[], char[],
583     * String[], long[] and double[]}.
584     *
585     * @return the memory buffer of the dataset.
586     *
587     * @throws Exception if object can not be read
588     * @throws OutOfMemoryError if memory is exhausted
589     */
590    @Override
591    public final Object getData() throws Exception, OutOfMemoryError {
592        log.trace("getData: start");
593        if (!isDataLoaded) {
594            log.trace("getData: read");
595            data = read(); // load the data
596            originalBuf = data;
597            isDataLoaded = true;
598            nPoints = 1;
599            log.trace("getData: selectedDims length={}",selectedDims.length);
600            for (int j = 0; j < selectedDims.length; j++) {
601                nPoints *= selectedDims[j];
602            }
603            log.trace("getData: read {}", nPoints);
604        }
605
606        log.trace("getData: finish");
607        return data;
608    }
609
610    /**
611     * Not for public use in the future.
612     * <p>
613     * setData() is not safe to use because it changes memory buffer
614     * of the dataset object. Dataset operations such as write/read
615     * will fail if the buffer type or size is changed.
616     *
617     * @param d  the object data -must be an array of Objects
618     */
619    @Override
620    public final void setData(Object d) {
621        if (!(this instanceof Attribute))
622            throw new UnsupportedOperationException("setData: unsupported for non-Attribute objects");
623
624        log.trace("setData");
625        data = d;
626    }
627
628    /**
629     * Clears the current data buffer in memory and forces the next read() to load
630     * the data from file.
631     * <p>
632     * The function read() loads data from file into memory only if the data is
633     * not read. If data is already in memory, read() just returns the memory
634     * buffer. Sometimes we want to force read() to re-read data from file. For
635     * example, when the selection is changed, we need to re-read the data.
636     *
637     * @see #getData()
638     * @see #read()
639     */
640    @Override
641    public void clearData() {
642        isDataLoaded = false;
643    }
644
645    /**
646     * Returns the dimension size of the vertical axis.
647     *
648     * <p>
649     * This function is used by GUI applications such as HDFView. GUI
650     * applications display a dataset in a 2D table or 2D image. The display
651     * order is specified by the index array of selectedIndex as follow:
652     * <dl>
653     * <dt>selectedIndex[0] -- height</dt>
654     * <dd>The vertical axis</dd>
655     * <dt>selectedIndex[1] -- width</dt>
656     * <dd>The horizontal axis</dd>
657     * <dt>selectedIndex[2] -- depth</dt>
658     * <dd>The depth axis is used for 3 or more dimensional datasets.</dd>
659     * </dl>
660     * Applications can use getSelectedIndex() to access and change the display
661     * order. For example, in a 2D dataset of 200x50 (dim0=200, dim1=50), the
662     * following code will set the height=200 and width=50.
663     *
664     * <pre>
665     * int[] selectedIndex = dataset.getSelectedIndex();
666     * selectedIndex[0] = 0;
667     * selectedIndex[1] = 1;
668     * </pre>
669     *
670     * @see #getSelectedIndex()
671     * @see #getWidth()
672     *
673     * @return the size of dimension of the vertical axis.
674     */
675    @Override
676    public final long getHeight() {
677        if (!inited) init();
678
679        if ((selectedDims == null) || (selectedIndex == null)) {
680            return 0;
681        }
682
683        return selectedDims[selectedIndex[0]];
684    }
685
686    /**
687     * Returns the dimension size of the horizontal axis.
688     *
689     * <p>
690     * This function is used by GUI applications such as HDFView. GUI
691     * applications display a dataset in 2D Table or 2D Image. The display order is
692     * specified by the index array of selectedIndex as follow:
693     * <dl>
694     * <dt>selectedIndex[0] -- height</dt>
695     * <dd>The vertical axis</dd>
696     * <dt>selectedIndex[1] -- width</dt>
697     * <dd>The horizontal axis</dd>
698     * <dt>selectedIndex[2] -- depth</dt>
699     * <dd>The depth axis, which is used for 3 or more dimension datasets.</dd>
700     * </dl>
701     * Applications can use getSelectedIndex() to access and change the display
702     * order. For example, in a 2D dataset of 200x50 (dim0=200, dim1=50), the
703     * following code will set the height=200 and width=100.
704     *
705     * <pre>
706     * int[] selectedIndex = dataset.getSelectedIndex();
707     * selectedIndex[0] = 0;
708     * selectedIndex[1] = 1;
709     * </pre>
710     *
711     * @see #getSelectedIndex()
712     * @see #getHeight()
713     *
714     * @return the size of dimension of the horizontal axis.
715     */
716    @Override
717    public final long getWidth() {
718        if (!inited) init();
719
720        if ((selectedDims == null) || (selectedIndex == null)) {
721            return 0;
722        }
723
724        if ((selectedDims.length < 2) || (selectedIndex.length < 2)) {
725            return 1;
726        }
727
728        return selectedDims[selectedIndex[1]];
729    }
730
731    /**
732     * Returns the indices of display order.
733     * <p>
734     *
735     * selectedIndex[] is provided for two purposes:
736     * <OL>
737     * <LI>
738     * selectedIndex[] is used to indicate the order of dimensions for display.
739     * selectedIndex[0] is for the row, selectedIndex[1] is for the column and
740     * selectedIndex[2] for the depth.
741     * <p>
742     * For example, for a four dimension dataset, if selectedIndex[] = {1, 2, 3},
743     * then dim[1] is selected as row index, dim[2] is selected as column index
744     * and dim[3] is selected as depth index.
745     * <LI>
746     * selectedIndex[] is also used to select dimensions for display for
747     * datasets with three or more dimensions. We assume that applications such
748     * as HDFView can only display data values up to three dimensions (2D
749     * spreadsheet/image with a third dimension which the 2D spreadsheet/image
750     * is selected from). For datasets with more than three dimensions, we need
751     * selectedIndex[] to tell applications which three dimensions are chosen
752     * for display. <br>
753     * For example, for a four dimension dataset, if selectedIndex[] = {1, 2, 3},
754     * then dim[1] is selected as row index, dim[2] is selected as column index
755     * and dim[3] is selected as depth index. dim[0] is not selected. Its
756     * location is fixed at 0 by default.
757     * </OL>
758     *
759     * @return the array of the indices of display order.
760     */
761    @Override
762    public final int[] getSelectedIndex() {
763        if (!inited) init();
764
765        return selectedIndex;
766    }
767
768    /**
769     * Returns the string representation of compression information.
770     * <p>
771     * For example,
772     * "SZIP: Pixels per block = 8: H5Z_FILTER_CONFIG_DECODE_ENABLED".
773     *
774     * @return the string representation of compression information.
775     */
776    @Override
777    public final String getCompression() {
778        if (!inited) init();
779
780        return compression.toString();
781    }
782
783    /**
784     * Returns the string representation of filter information.
785     *
786     * @return the string representation of filter information.
787     */
788    public final String getFilters() {
789        if (!inited) init();
790
791        return filters.toString();
792    }
793
794    /**
795     * Returns the string representation of storage layout information.
796     *
797     * @return the string representation of storage layout information.
798     */
799    public final String getStorageLayout() {
800        if (!inited) init();
801
802        return storageLayout.toString();
803    }
804
805    /**
806     * Returns the string representation of storage information.
807     *
808     * @return the string representation of storage information.
809     */
810    public final String getStorage() {
811        if (!inited) init();
812
813        return storage.toString();
814    }
815
816    /**
817     * Returns the array that contains the dimension sizes of the chunk of the
818     * dataset. Returns null if the dataset is not chunked.
819     *
820     * @return the array of chunk sizes or returns null if the dataset is not
821     *         chunked.
822     */
823    public final long[] getChunkSize() {
824        if (!inited) init();
825
826        return chunkSize;
827    }
828
829    @Override
830    public Datatype getDatatype() {
831        return datatype;
832    }
833
834    /**
835     * @deprecated Not for public use in the future. <br>
836     *             Using {@link #convertFromUnsignedC(Object, Object)}
837     *
838     * @param dataIN  the object data
839     *
840     * @return the converted object
841     */
842    @Deprecated
843    public static Object convertFromUnsignedC(Object dataIN) {
844        return Dataset.convertFromUnsignedC(dataIN, null);
845    }
846
847    /**
848     * Converts one-dimension array of unsigned C-type integers to a new array
849     * of appropriate Java integer in memory.
850     * <p>
851     * Since Java does not support unsigned integer, values of unsigned C-type
852     * integers must be converted into its appropriate Java integer. Otherwise,
853     * the data value will not displayed correctly. For example, if an unsigned
854     * C byte, x = 200, is stored into an Java byte y, y will be -56 instead of
855     * the correct value of 200.
856     * <p>
857     * Unsigned C integers are upgrade to Java integers according to the
858     * following table:
859     *  <table border=1>
860     * <caption><b>Mapping Unsigned C Integers to Java Integers</b></caption>
861     * <TR>
862     * <TD><B>Unsigned C Integer</B></TD>
863     * <TD><B>JAVA Intege</B>r</TD>
864     * </TR>
865     * <TR>
866     * <TD>unsigned byte</TD>
867     * <TD>signed short</TD>
868     * </TR>
869     * <TR>
870     * <TD>unsigned short</TD>
871     * <TD>signed int</TD>
872     * </TR>
873     * <TR>
874     * <TD>unsigned int</TD>
875     * <TD>signed long</TD>
876     * </TR>
877     * <TR>
878     * <TD>unsigned long</TD>
879     * <TD>signed long</TD>
880     * </TR>
881     * </TABLE>
882     * <strong>NOTE: this conversion cannot deal with unsigned 64-bit integers.
883     * Therefore, the values of unsigned 64-bit datasets may be wrong in Java
884     * applications</strong>.
885     * <p>
886     * If memory data of unsigned integers is converted by
887     * convertFromUnsignedC(), convertToUnsignedC() must be called to convert
888     * the data back to unsigned C before data is written into file.
889     *
890     * @see #convertToUnsignedC(Object, Object)
891     *
892     * @param dataIN
893     *            the input 1D array of the unsigned C-type integers.
894     * @param dataOUT
895     *            the output converted (or upgraded) 1D array of Java integers.
896     *
897     * @return the upgraded 1D array of Java integers.
898     */
899    @SuppressWarnings("rawtypes")
900    public static Object convertFromUnsignedC(Object dataIN, Object dataOUT) {
901        log.trace("convertFromUnsignedC(): start");
902
903        if (dataIN == null) {
904            log.debug("convertFromUnsignedC(): data_in is null");
905            log.trace("convertFromUnsignedC(): finish");
906            return null;
907        }
908
909        Class dataClass = dataIN.getClass();
910        if (!dataClass.isArray()) {
911            log.debug("convertFromUnsignedC(): data_in not an array");
912            log.trace("convertFromUnsignedC(): finish");
913            return null;
914        }
915
916        if (dataOUT != null) {
917            Class dataClassOut = dataOUT.getClass();
918            if (!dataClassOut.isArray() || (Array.getLength(dataIN) != Array.getLength(dataOUT))) {
919                log.debug("convertFromUnsignedC(): data_out not an array or does not match data_in size");
920                dataOUT = null;
921            }
922        }
923
924        String cname = dataClass.getName();
925        char dname = cname.charAt(cname.lastIndexOf('[') + 1);
926        int size = Array.getLength(dataIN);
927        log.trace("convertFromUnsignedC(): cname={} dname={} size={}", cname, dname, size);
928
929        if (dname == 'B') {
930            log.debug("convertFromUnsignedC(): Java convert byte to short");
931            short[] sdata = null;
932            if (dataOUT == null) {
933                sdata = new short[size];
934            }
935            else {
936                sdata = (short[]) dataOUT;
937            }
938
939            byte[] bdata = (byte[]) dataIN;
940            for (int i = 0; i < size; i++) {
941                sdata[i] = (short) ((bdata[i] + 256) & 0xFF);
942            }
943
944            dataOUT = sdata;
945        }
946        else if (dname == 'S') {
947            log.debug("convertFromUnsignedC(): Java convert short to int");
948            int[] idata = null;
949            if (dataOUT == null) {
950                idata = new int[size];
951            }
952            else {
953                idata = (int[]) dataOUT;
954            }
955
956            short[] sdata = (short[]) dataIN;
957            for (int i = 0; i < size; i++) {
958                idata[i] = (sdata[i] + 65536) & 0xFFFF;
959            }
960
961            dataOUT = idata;
962        }
963        else if (dname == 'I') {
964            log.debug("convertFromUnsignedC(): Java convert int to long");
965            long[] ldata = null;
966            if (dataOUT == null) {
967                ldata = new long[size];
968            }
969            else {
970                ldata = (long[]) dataOUT;
971            }
972
973            int[] idata = (int[]) dataIN;
974            for (int i = 0; i < size; i++) {
975                ldata[i] = (idata[i] + 4294967296L) & 0xFFFFFFFFL;
976            }
977
978            dataOUT = ldata;
979        }
980        else {
981            dataOUT = dataIN;
982            log.debug("convertFromUnsignedC(): Java does not support unsigned long");
983        }
984
985        log.trace("convertFromUnsignedC(): finish");
986        return dataOUT;
987    }
988
989    /**
990     * @deprecated Not for public use in the future. <br>
991     *             Using {@link #convertToUnsignedC(Object, Object)}
992     *
993     * @param dataIN
994     *            the input 1D array of the unsigned C-type integers.
995     *
996     * @return the upgraded 1D array of Java integers.
997     */
998    @Deprecated
999    public static Object convertToUnsignedC(Object dataIN) {
1000        return Dataset.convertToUnsignedC(dataIN, null);
1001    }
1002
1003    /**
1004     * Converts the array of converted unsigned integers back to unsigned C-type
1005     * integer data in memory.
1006     * <p>
1007     * If memory data of unsigned integers is converted by
1008     * convertFromUnsignedC(), convertToUnsignedC() must be called to convert
1009     * the data back to unsigned C before data is written into file.
1010     *
1011     * @see #convertFromUnsignedC(Object, Object)
1012     *
1013     * @param dataIN
1014     *            the input array of the Java integer.
1015     * @param dataOUT
1016     *            the output array of the unsigned C-type integer.
1017     *
1018     * @return the converted data of unsigned C-type integer array.
1019     */
1020    @SuppressWarnings("rawtypes")
1021    public static Object convertToUnsignedC(Object dataIN, Object dataOUT) {
1022        log.trace("convertToUnsignedC(): start");
1023
1024        if (dataIN == null) {
1025            log.debug("convertToUnsignedC(): data_in is null");
1026            log.trace("convertToUnsignedC(): finish");
1027            return null;
1028        }
1029
1030        Class dataClass = dataIN.getClass();
1031        if (!dataClass.isArray()) {
1032            log.debug("convertToUnsignedC(): data_in not an array");
1033            log.trace("convertToUnsignedC(): finish");
1034            return null;
1035        }
1036
1037        if (dataOUT != null) {
1038            Class dataClassOut = dataOUT.getClass();
1039            if (!dataClassOut.isArray() || (Array.getLength(dataIN) != Array.getLength(dataOUT))) {
1040                log.debug("convertToUnsignedC(): data_out not an array or does not match data_in size");
1041                dataOUT = null;
1042            }
1043        }
1044
1045        String cname = dataClass.getName();
1046        char dname = cname.charAt(cname.lastIndexOf('[') + 1);
1047        int size = Array.getLength(dataIN);
1048        log.trace("convertToUnsignedC(): cname={} dname={} size={}", cname, dname, size);
1049
1050        if (dname == 'S') {
1051            log.debug("convertToUnsignedC(): Java convert short to byte");
1052            byte[] bdata = null;
1053            if (dataOUT == null) {
1054                bdata = new byte[size];
1055            }
1056            else {
1057                bdata = (byte[]) dataOUT;
1058            }
1059            short[] sdata = (short[]) dataIN;
1060            for (int i = 0; i < size; i++) {
1061                bdata[i] = (byte) sdata[i];
1062            }
1063            dataOUT = bdata;
1064        }
1065        else if (dname == 'I') {
1066            log.debug("convertToUnsignedC(): Java convert int to short");
1067            short[] sdata = null;
1068            if (dataOUT == null) {
1069                sdata = new short[size];
1070            }
1071            else {
1072                sdata = (short[]) dataOUT;
1073            }
1074            int[] idata = (int[]) dataIN;
1075            for (int i = 0; i < size; i++) {
1076                sdata[i] = (short) idata[i];
1077            }
1078            dataOUT = sdata;
1079        }
1080        else if (dname == 'J') {
1081            log.debug("convertToUnsignedC(): Java convert long to int");
1082            int[] idata = null;
1083            if (dataOUT == null) {
1084                idata = new int[size];
1085            }
1086            else {
1087                idata = (int[]) dataOUT;
1088            }
1089            long[] ldata = (long[]) dataIN;
1090            for (int i = 0; i < size; i++) {
1091                idata[i] = (int) ldata[i];
1092            }
1093            dataOUT = idata;
1094        }
1095        else {
1096            dataOUT = dataIN;
1097            log.debug("convertToUnsignedC(): Java does not support unsigned long");
1098        }
1099
1100        log.trace("convertToUnsignedC(): finish");
1101        return dataOUT;
1102    }
1103
1104    /**
1105     * Converts an array of bytes into an array of Strings for a fixed string
1106     * dataset.
1107     * <p>
1108     * A C-string is an array of chars while an Java String is an object. When a
1109     * string dataset is read into a Java application, the data is stored in an
1110     * array of Java bytes. byteToString() is used to convert the array of bytes
1111     * into an array of Java strings so that applications can display and modify
1112     * the data content.
1113     * <p>
1114     * For example, the content of a two element C string dataset is {"ABC",
1115     * "abc"}. Java applications will read the data into a byte array of {65,
1116     * 66, 67, 97, 98, 99). byteToString(bytes, 3) returns an array of Java
1117     * String of strs[0]="ABC", and strs[1]="abc".
1118     * <p>
1119     * If memory data of strings is converted to Java Strings, stringToByte()
1120     * must be called to convert the memory data back to byte array before data
1121     * is written to file.
1122     *
1123     * @see #stringToByte(String[], int)
1124     *
1125     * @param bytes
1126     *            the array of bytes to convert.
1127     * @param length
1128     *            the length of string.
1129     *
1130     * @return the array of Java String.
1131     */
1132    public static final String[] byteToString(byte[] bytes, int length) {
1133        log.trace("byteToString(): start");
1134
1135        if (bytes == null) {
1136            log.debug("byteToString(): input is null");
1137            log.trace("byteToString(): finish");
1138            return null;
1139        }
1140
1141        int n = bytes.length / length;
1142        log.trace("byteToString(): n={} from length of {}", n, length);
1143        String[] strArray = new String[n];
1144        String str = null;
1145        int idx = 0;
1146        for (int i = 0; i < n; i++) {
1147            str = new String(bytes, i * length, length);
1148            idx = str.indexOf('\0');
1149            if (idx >= 0) {
1150                str = str.substring(0, idx);
1151            }
1152
1153            // trim only the end
1154            int end = str.length();
1155            while (end > 0 && str.charAt(end - 1) <= '\u0020')
1156                end--;
1157
1158            strArray[i] = (end <= 0) ? "" : str.substring(0, end);
1159        }
1160
1161        log.trace("byteToString(): finish");
1162        return strArray;
1163    }
1164
1165    /**
1166     * Converts a string array into an array of bytes for a fixed string
1167     * dataset.
1168     * <p>
1169     * If memory data of strings is converted to Java Strings, stringToByte()
1170     * must be called to convert the memory data back to byte array before data
1171     * is written to file.
1172     *
1173     * @see #byteToString(byte[] bytes, int length)
1174     *
1175     * @param strings
1176     *            the array of string.
1177     * @param length
1178     *            the length of string.
1179     *
1180     * @return the array of bytes.
1181     */
1182    public static final byte[] stringToByte(String[] strings, int length) {
1183        log.trace("stringToByte(): start");
1184
1185        if (strings == null) {
1186            log.debug("stringToByte(): input is null");
1187            log.trace("stringToByte(): finish");
1188            return null;
1189        }
1190
1191        int size = strings.length;
1192        byte[] bytes = new byte[size * length];
1193        log.trace("stringToByte(): size={} length={}", size, length);
1194        StringBuilder strBuff = new StringBuilder(length);
1195        for (int i = 0; i < size; i++) {
1196            // initialize the string with spaces
1197            strBuff.replace(0, length, " ");
1198
1199            if (strings[i] != null) {
1200                if (strings[i].length() > length) {
1201                    strings[i] = strings[i].substring(0, length);
1202                }
1203                strBuff.replace(0, length, strings[i]);
1204            }
1205
1206            strBuff.setLength(length);
1207            System.arraycopy(strBuff.toString().getBytes(), 0, bytes, length * i, length);
1208        }
1209
1210        log.trace("stringToByte(): finish");
1211
1212        return bytes;
1213    }
1214
1215    /**
1216     * Returns the array of strings that represent the dimension names. Returns
1217     * null if there is no dimension name.
1218     * <p>
1219     * Some datasets have pre-defined names for each dimension such as
1220     * "Latitude" and "Longitude". getDimNames() returns these pre-defined
1221     * names.
1222     *
1223     * @return the names of dimensions, or null if there is no dimension name.
1224     */
1225    public final String[] getDimNames() {
1226        if (!inited) init();
1227
1228        return dimNames;
1229    }
1230
1231    /**
1232     * Checks if a given datatype is a string. Sub-classes must replace this
1233     * default implementation.
1234     *
1235     * @param tid
1236     *            The data type identifier.
1237     *
1238     * @return true if the datatype is a string; otherwise returns false.
1239     */
1240    public boolean isString(long tid) {
1241        return false;
1242    }
1243
1244    /**
1245     * Returns the size in bytes of a given datatype. Sub-classes must replace
1246     * this default implementation.
1247     *
1248     * @param tid
1249     *            The data type identifier.
1250     *
1251     * @return The size of the datatype
1252     */
1253    public long getSize(long tid) {
1254        return -1;
1255    }
1256
1257    /**
1258     * Get Class of the original data buffer if converted.
1259     *
1260     * @return the Class of originalBuf
1261     */
1262    @Override
1263    @SuppressWarnings("rawtypes")
1264    public final Class getOriginalClass() {
1265        return originalBuf.getClass();
1266    }
1267
1268    /*
1269     * Checks if dataset is virtual. Sub-classes must replace
1270     * this default implementation.
1271     *
1272     * @return true if the dataset is virtual; otherwise returns false.
1273     */
1274    public boolean isVirtual() {
1275        return false;
1276    }
1277
1278    /*
1279     * Gets the source file name at index if dataset is virtual. Sub-classes must replace
1280     * this default implementation.
1281     *
1282     * @return filename if the dataset is virtual; otherwise returns null.
1283     */
1284    public String getVirtualFilename(int index) {
1285        return null;
1286    }
1287
1288    /*
1289     * Gets the number of source files if dataset is virtual. Sub-classes must replace
1290     * this default implementation.
1291     *
1292     * @return the list size if the dataset is virtual; otherwise returns negative.
1293     */
1294    public int getVirtualMaps() {
1295        return -1;
1296    }
1297}