001/***************************************************************************** 002 * Copyright by The HDF Group. * 003 * Copyright by the Board of Trustees of the University of Illinois. * 004 * All rights reserved. * 005 * * 006 * This file is part of the HDF Java Products distribution. * 007 * The full copyright notice, including terms governing use, modification, * 008 * and redistribution, is contained in the files COPYING and Copyright.html. * 009 * COPYING can be found at the root of the source code distribution tree. * 010 * Or, see https://support.hdfgroup.org/products/licenses.html * 011 * If you do not have access to either file, you may request a copy from * 012 * help@hdfgroup.org. * 013 ****************************************************************************/ 014 015package hdf.object; 016 017import java.lang.reflect.Array; 018import java.util.List; 019 020/** 021 * The abstract class provides general APIs to create and manipulate dataset 022 * objects, and retrieve dataset properties, datatype and dimension sizes. 023 * <p> 024 * This class provides two convenient functions, read()/write(), to read/write 025 * data values. Reading/writing data may take many library calls if we use the 026 * library APIs directly. The read() and write functions hide all the details of 027 * these calls from users. 028 * <p> 029 * For more details on dataset, 030 * see <b> <a href="https://support.hdfgroup.org/HDF5/doc/UG/HDF5_Users_Guide-Responsive%20HTML5/index.html">HDF5 User's Guide</a> </b> 031 * <p> 032 * 033 * @see hdf.object.ScalarDS 034 * @see hdf.object.CompoundDS 035 * 036 * @version 1.1 9/4/2007 037 * @author Peter X. Cao 038 */ 039public abstract class Dataset extends HObject implements MetaDataContainer, DataFormat { 040 private static final long serialVersionUID = -3360885430038261178L; 041 042 private static final org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger(Dataset.class); 043 044 /** 045 * The memory buffer that holds the raw data array of the dataset. 046 */ 047 protected transient Object data; 048 049 /** 050 * The number of dimensions of the dataset. 051 */ 052 protected int rank; 053 054 /** 055 * The current dimension sizes of the dataset 056 */ 057 protected long[] dims; 058 059 /** 060 * The max dimension sizes of the dataset 061 */ 062 protected long[] maxDims; 063 064 /** 065 * Array that contains the number of data points selected (for read/write) 066 * in each dimension. 067 * <p> 068 * The selected size must be less than or equal to the current dimension size. 069 * A subset of a rectangle selection is defined by the starting position and 070 * selected sizes. 071 * <p> 072 * For example, if a 4 X 5 dataset is as follows: 073 * 074 * <pre> 075 * 0, 1, 2, 3, 4 076 * 10, 11, 12, 13, 14 077 * 20, 21, 22, 23, 24 078 * 30, 31, 32, 33, 34 079 * long[] dims = {4, 5}; 080 * long[] startDims = {1, 2}; 081 * long[] selectedDims = {3, 3}; 082 * then the following subset is selected by the startDims and selectedDims above: 083 * 12, 13, 14 084 * 22, 23, 24 085 * 32, 33, 34 086 * </pre> 087 */ 088 protected long[] selectedDims; 089 090 /** 091 * The starting position of each dimension of a selected subset. With both 092 * the starting position and selected sizes, the subset of a rectangle 093 * selection is fully defined. 094 */ 095 protected long[] startDims; 096 097 /** 098 * Array that contains the indices of the dimensions selected for display. 099 * <p> 100 * <B>selectedIndex[] is provided for two purposes:</B> 101 * <OL> 102 * <LI> 103 * selectedIndex[] is used to indicate the order of dimensions for display, 104 * i.e. selectedIndex[0] = row, selectedIndex[1] = column and 105 * selectedIndex[2] = depth. For example, for a four dimension dataset, if 106 * selectedIndex[] is {1, 2, 3}, then dim[1] is selected as row index, 107 * dim[2] is selected as column index and dim[3] is selected as depth index. 108 * <LI> 109 * selectedIndex[] is also used to select dimensions for display for 110 * datasets with three or more dimensions. We assume that applications such 111 * as HDFView can only display data up to three dimensions (a 2D 112 * spreadsheet/image with a third dimension that the 2D spreadsheet/image is 113 * cut from). For datasets with more than three dimensions, we need 114 * selectedIndex[] to store which three dimensions are chosen for display. 115 * For example, for a four dimension dataset, if selectedIndex[] = {1, 2, 3}, 116 * then dim[1] is selected as row index, dim[2] is selected as column index 117 * and dim[3] is selected as depth index. dim[0] is not selected. Its 118 * location is fixed at 0 by default. 119 * </OL> 120 */ 121 protected final int[] selectedIndex; 122 123 /** 124 * The number of elements to move from the start location in each dimension. 125 * For example, if selectedStride[0] = 2, every other data point is selected 126 * along dim[0]. 127 */ 128 protected long[] selectedStride; 129 130 /** 131 * The array of dimension sizes for a chunk. 132 */ 133 protected long[] chunkSize; 134 135 /** The compression information. */ 136 protected StringBuilder compression; 137 public static final String COMPRESSION_GZIP_TXT = "GZIP: level = "; 138 139 /** The filters information. */ 140 protected StringBuilder filters; 141 142 /** The storage layout information. */ 143 protected StringBuilder storageLayout; 144 145 /** The storage information. */ 146 protected StringBuilder storage; 147 148 /** The datatype object of the dataset. */ 149 protected Datatype datatype; 150 151 /** 152 * Array of strings that represent the dimension names. It is null if dimension names do not exist. 153 */ 154 protected String[] dimNames; 155 156 /** Flag to indicate if the byte[] array is converted to strings */ 157 protected boolean convertByteToString = true; 158 159 /** Flag to indicate if data values are loaded into memory. */ 160 protected boolean isDataLoaded = false; 161 162 /** Flag to indicate if this dataset has been initialized */ 163 protected boolean inited = false; 164 165 /** The number of data points in the memory buffer. */ 166 protected long nPoints = 1; 167 168 /** 169 * The data buffer that contains the raw data directly reading from file 170 * (before any data conversion). 171 */ 172 protected transient Object originalBuf = null; 173 174 /** 175 * The array that holds the converted data of unsigned C-type integers. 176 * <p> 177 * For example, Suppose that the original data is an array of unsigned 178 * 16-bit short integers. Since Java does not support unsigned integer, the 179 * data is converted to an array of 32-bit singed integer. In that case, the 180 * converted buffer is the array of 32-bit singed integer. 181 */ 182 protected transient Object convertedBuf = null; 183 184 /** 185 * Constructs a Dataset object with a given file, name and path. 186 * 187 * @param theFile 188 * the file that contains the dataset. 189 * @param dsName 190 * the name of the Dataset, e.g. "dset1". 191 * @param dsPath 192 * the full group path of this Dataset, e.g. "/arrays/". 193 */ 194 public Dataset(FileFormat theFile, String dsName, String dsPath) { 195 this(theFile, dsName, dsPath, null); 196 } 197 198 /** 199 * @deprecated Not for public use in the future. <br> 200 * Using {@link #Dataset(FileFormat, String, String)} 201 * 202 * @param theFile 203 * the file that contains the dataset. 204 * @param dsName 205 * the name of the Dataset, e.g. "dset1". 206 * @param dsPath 207 * the full group path of this Dataset, e.g. "/arrays/". 208 * @param oid 209 * the oid of this Dataset. 210 */ 211 @Deprecated 212 public Dataset(FileFormat theFile, String dsName, String dsPath, long[] oid) { 213 super(theFile, dsName, dsPath, oid); 214 215 datatype = null; 216 rank = -1; 217 data = null; 218 dims = null; 219 maxDims = null; 220 selectedDims = null; 221 startDims = null; 222 selectedStride = null; 223 chunkSize = null; 224 compression = new StringBuilder("NONE"); 225 filters = new StringBuilder("NONE"); 226 storageLayout = new StringBuilder("NONE"); 227 storage = new StringBuilder("NONE"); 228 dimNames = null; 229 230 selectedIndex = new int[3]; 231 selectedIndex[0] = 0; 232 selectedIndex[1] = 1; 233 selectedIndex[2] = 2; 234 } 235 236 /** 237 * Clears memory held by the dataset, such as the data buffer. 238 */ 239 @SuppressWarnings("rawtypes") 240 public void clear() { 241 if (data != null) { 242 if (data instanceof List) { 243 ((List) data).clear(); 244 } 245 data = null; 246 originalBuf = null; 247 convertedBuf = null; 248 } 249 isDataLoaded = false; 250 } 251 252 /** 253 * Returns the rank (number of dimensions) of the dataset. 254 * 255 * @return the number of dimensions of the dataset. 256 */ 257 @Override 258 public final int getRank() { 259 if (!inited) 260 init(); 261 262 return rank; 263 } 264 265 /** 266 * Returns the array that contains the dimension sizes of the dataset. 267 * 268 * @return the dimension sizes of the dataset. 269 */ 270 @Override 271 public final long[] getDims() { 272 if (!inited) 273 init(); 274 275 return dims; 276 } 277 278 /** 279 * Returns the array that contains the max dimension sizes of the dataset. 280 * 281 * @return the max dimension sizes of the dataset. 282 */ 283 public final long[] getMaxDims() { 284 if (!inited) init(); 285 286 if (maxDims == null) return dims; 287 288 return maxDims; 289 } 290 291 /** 292 * Returns the dimension sizes of the selected subset. 293 * <p> 294 * The SelectedDims is the number of data points of the selected subset. 295 * Applications can use this array to change the size of selected subset. 296 * 297 * The selected size must be less than or equal to the current dimension size. 298 * Combined with the starting position, selected sizes and stride, the 299 * subset of a rectangle selection is fully defined. 300 * <p> 301 * For example, if a 4 X 5 dataset is as follows: 302 * 303 * <pre> 304 * 0, 1, 2, 3, 4 305 * 10, 11, 12, 13, 14 306 * 20, 21, 22, 23, 24 307 * 30, 31, 32, 33, 34 308 * long[] dims = {4, 5}; 309 * long[] startDims = {1, 2}; 310 * long[] selectedDims = {3, 3}; 311 * long[] selectedStride = {1, 1}; 312 * then the following subset is selected by the startDims and selectedDims 313 * 12, 13, 14 314 * 22, 23, 24 315 * 32, 33, 34 316 * </pre> 317 * 318 * @return the dimension sizes of the selected subset. 319 */ 320 @Override 321 public final long[] getSelectedDims() { 322 if (!inited) init(); 323 324 return selectedDims; 325 } 326 327 /** 328 * Returns the starting position of a selected subset. 329 * <p> 330 * Applications can use this array to change the starting position of a 331 * selection. Combined with the selected dimensions, selected sizes and 332 * stride, the subset of a rectangle selection is fully defined. 333 * <p> 334 * For example, if a 4 X 5 dataset is as follows: 335 * 336 * <pre> 337 * 0, 1, 2, 3, 4 338 * 10, 11, 12, 13, 14 339 * 20, 21, 22, 23, 24 340 * 30, 31, 32, 33, 34 341 * long[] dims = {4, 5}; 342 * long[] startDims = {1, 2}; 343 * long[] selectedDims = {3, 3}; 344 * long[] selectedStride = {1, 1}; 345 * then the following subset is selected by the startDims and selectedDims 346 * 12, 13, 14 347 * 22, 23, 24 348 * 32, 33, 34 349 * </pre> 350 * 351 * @return the starting position of a selected subset. 352 */ 353 @Override 354 public final long[] getStartDims() { 355 if (!inited) init(); 356 357 return startDims; 358 } 359 360 /** 361 * Returns the selectedStride of the selected dataset. 362 * <p> 363 * Applications can use this array to change how many elements to move in 364 * each dimension. 365 * 366 * Combined with the starting position and selected sizes, the subset of a 367 * rectangle selection is defined. 368 * <p> 369 * For example, if a 4 X 5 dataset is as follows: 370 * 371 * <pre> 372 * 0, 1, 2, 3, 4 373 * 10, 11, 12, 13, 14 374 * 20, 21, 22, 23, 24 375 * 30, 31, 32, 33, 34 376 * long[] dims = {4, 5}; 377 * long[] startDims = {0, 0}; 378 * long[] selectedDims = {2, 2}; 379 * long[] selectedStride = {2, 3}; 380 * then the following subset is selected by the startDims and selectedDims 381 * 0, 3 382 * 20, 23 383 * </pre> 384 * 385 * @return the selectedStride of the selected dataset. 386 */ 387 @Override 388 public final long[] getStride() { 389 if (!inited) init(); 390 391 if (rank <= 0) { 392 return null; 393 } 394 395 if (selectedStride == null) { 396 selectedStride = new long[rank]; 397 for (int i = 0; i < rank; i++) { 398 selectedStride[i] = 1; 399 } 400 } 401 402 return selectedStride; 403 } 404 405 /** 406 * Sets the flag that indicates if a byte array is converted to a string 407 * array. 408 * <p> 409 * In a string dataset, the raw data from file is stored in a byte array. By 410 * default, this byte array is converted to an array of strings. For a large 411 * dataset (e.g. more than one million strings), the conversion takes a long 412 * time and requires a lot of memory space to store the strings. In some 413 * applications, such a conversion can be delayed. For example, A GUI 414 * application may convert only the part of the strings that is visible to the 415 * users, not the entire data array. 416 * <p> 417 * setConvertByteToString(boolean b) allows users to set the flag so that 418 * applications can choose to perform the byte-to-string conversion or not. 419 * If the flag is set to false, the getData() returns an array of byte 420 * instead of an array of strings. 421 * 422 * @param b 423 * convert bytes to strings if b is true; otherwise, if false, do 424 * not convert bytes to strings. 425 */ 426 public final void setConvertByteToString(boolean b) { 427 convertByteToString = b; 428 } 429 430 /** 431 * Returns the flag that indicates if a byte array is converted to a string 432 * array. 433 * 434 * @return true if byte array is converted to string; otherwise, returns 435 * false if there is no conversion. 436 */ 437 public final boolean getConvertByteToString() { 438 return convertByteToString; 439 } 440 441 /** 442 * Reads the raw data of the dataset from file to a byte array. 443 * <p> 444 * readBytes() reads raw data to an array of bytes instead of array of its 445 * datatype. For example, for a one-dimension 32-bit integer dataset of 446 * size 5, readBytes() returns a byte array of size 20 instead of an 447 * int array of 5. 448 * <p> 449 * readBytes() can be used to copy data from one dataset to another 450 * efficiently because the raw data is not converted to its native type, it 451 * saves memory space and CPU time. 452 * 453 * @return the byte array of the raw data. 454 * 455 * @throws Exception if data can not be read 456 */ 457 public abstract byte[] readBytes() throws Exception; 458 459 /** 460 * Writes the memory buffer of this dataset to file. 461 * 462 * @throws Exception if buffer can not be written 463 */ 464 @Override 465 public final void write() throws Exception { 466 if (data != null) { 467 write(data); 468 } 469 } 470 471 /** 472 * Creates a new dataset and writes the data buffer to the new dataset. 473 * <p> 474 * This function allows applications to create a new dataset for a given 475 * data buffer. For example, users can select a specific interesting part 476 * from a large image and create a new image with the selection. 477 * <p> 478 * The new dataset retains the datatype and dataset creation properties of 479 * this dataset. 480 * 481 * @param pgroup 482 * the group which the dataset is copied to. 483 * @param name 484 * the name of the new dataset. 485 * @param dims 486 * the dimension sizes of the the new dataset. 487 * @param data 488 * the data values of the subset to be copied. 489 * 490 * @return the new dataset. 491 * 492 * @throws Exception if dataset can not be copied 493 */ 494 public abstract Dataset copy(Group pgroup, String name, long[] dims, Object data) throws Exception; 495 496 @Override 497 public final boolean isInited() { 498 return inited; 499 } 500 501 /** 502 * Returns the data buffer of the dataset in memory. 503 * <p> 504 * If data is already loaded into memory, returns the data; otherwise, calls 505 * read() to read data from file into a memory buffer and returns the memory 506 * buffer. 507 * <p> 508 * By default, the whole dataset is read into memory. Users can also select 509 * a subset to read. Subsetting is done in an implicit way. 510 * <p> 511 * <b>How to Select a Subset</b> 512 * <p> 513 * A selection is specified by three arrays: start, stride and count. 514 * <ol> 515 * <li>start: offset of a selection 516 * <li>stride: determines how many elements to move in each dimension 517 * <li>count: number of elements to select in each dimension 518 * </ol> 519 * getStartDims(), getStride() and getSelectedDims() returns the start, 520 * stride and count arrays respectively. Applications can make a selection 521 * by changing the values of the arrays. 522 * <p> 523 * The following example shows how to make a subset. In the example, the 524 * dataset is a 4-dimensional array of [200][100][50][10], i.e. dims[0]=200; 525 * dims[1]=100; dims[2]=50; dims[3]=10; <br> 526 * We want to select every other data point in dims[1] and dims[2] 527 * 528 * <pre> 529 * int rank = dataset.getRank(); // number of dimensions of the dataset 530 * long[] dims = dataset.getDims(); // the dimension sizes of the dataset 531 * long[] selected = dataset.getSelectedDims(); // the selected size of the dataet 532 * long[] start = dataset.getStartDims(); // the offset of the selection 533 * long[] stride = dataset.getStride(); // the stride of the dataset 534 * int[] selectedIndex = dataset.getSelectedIndex(); // the selected dimensions for display 535 * 536 * // select dim1 and dim2 as 2D data for display,and slice through dim0 537 * selectedIndex[0] = 1; 538 * selectedIndex[1] = 2; 539 * selectedIndex[1] = 0; 540 * 541 * // reset the selection arrays 542 * for (int i = 0; i < rank; i++) { 543 * start[i] = 0; 544 * selected[i] = 1; 545 * stride[i] = 1; 546 * } 547 * 548 * // set stride to 2 on dim1 and dim2 so that every other data point is 549 * // selected. 550 * stride[1] = 2; 551 * stride[2] = 2; 552 * 553 * // set the selection size of dim1 and dim2 554 * selected[1] = dims[1] / stride[1]; 555 * selected[2] = dims[1] / stride[2]; 556 * 557 * // when dataset.getData() is called, the selection above will be used since 558 * // the dimension arrays are passed by reference. Changes of these arrays 559 * // outside the dataset object directly change the values of these array 560 * // in the dataset object. 561 * </pre> 562 * <p> 563 * For ScalarDS, the memory data buffer is a one-dimensional array of byte, 564 * short, int, float, double or String type based on the datatype of the 565 * dataset. 566 * <p> 567 * For CompoundDS, the memory data object is an java.util.List object. Each 568 * element of the list is a data array that corresponds to a compound field. 569 * <p> 570 * For example, if compound dataset "comp" has the following nested 571 * structure, and member datatypes 572 * 573 * <pre> 574 * comp --> m01 (int) 575 * comp --> m02 (float) 576 * comp --> nest1 --> m11 (char) 577 * comp --> nest1 --> m12 (String) 578 * comp --> nest1 --> nest2 --> m21 (long) 579 * comp --> nest1 --> nest2 --> m22 (double) 580 * </pre> 581 * 582 * getData() returns a list of six arrays: {int[], float[], char[], 583 * String[], long[] and double[]}. 584 * 585 * @return the memory buffer of the dataset. 586 * 587 * @throws Exception if object can not be read 588 * @throws OutOfMemoryError if memory is exhausted 589 */ 590 @Override 591 public final Object getData() throws Exception, OutOfMemoryError { 592 log.trace("getData: start"); 593 if (!isDataLoaded) { 594 log.trace("getData: read"); 595 data = read(); // load the data 596 originalBuf = data; 597 isDataLoaded = true; 598 nPoints = 1; 599 log.trace("getData: selectedDims length={}",selectedDims.length); 600 for (int j = 0; j < selectedDims.length; j++) { 601 nPoints *= selectedDims[j]; 602 } 603 log.trace("getData: read {}", nPoints); 604 } 605 606 log.trace("getData: finish"); 607 return data; 608 } 609 610 /** 611 * Not for public use in the future. 612 * <p> 613 * setData() is not safe to use because it changes memory buffer 614 * of the dataset object. Dataset operations such as write/read 615 * will fail if the buffer type or size is changed. 616 * 617 * @param d the object data -must be an array of Objects 618 */ 619 @Override 620 public final void setData(Object d) { 621 if (!(this instanceof Attribute)) 622 throw new UnsupportedOperationException("setData: unsupported for non-Attribute objects"); 623 624 log.trace("setData"); 625 data = d; 626 } 627 628 /** 629 * Clears the current data buffer in memory and forces the next read() to load 630 * the data from file. 631 * <p> 632 * The function read() loads data from file into memory only if the data is 633 * not read. If data is already in memory, read() just returns the memory 634 * buffer. Sometimes we want to force read() to re-read data from file. For 635 * example, when the selection is changed, we need to re-read the data. 636 * 637 * @see #getData() 638 * @see #read() 639 */ 640 @Override 641 public void clearData() { 642 isDataLoaded = false; 643 } 644 645 /** 646 * Returns the dimension size of the vertical axis. 647 * 648 * <p> 649 * This function is used by GUI applications such as HDFView. GUI 650 * applications display a dataset in a 2D table or 2D image. The display 651 * order is specified by the index array of selectedIndex as follow: 652 * <dl> 653 * <dt>selectedIndex[0] -- height</dt> 654 * <dd>The vertical axis</dd> 655 * <dt>selectedIndex[1] -- width</dt> 656 * <dd>The horizontal axis</dd> 657 * <dt>selectedIndex[2] -- depth</dt> 658 * <dd>The depth axis is used for 3 or more dimensional datasets.</dd> 659 * </dl> 660 * Applications can use getSelectedIndex() to access and change the display 661 * order. For example, in a 2D dataset of 200x50 (dim0=200, dim1=50), the 662 * following code will set the height=200 and width=50. 663 * 664 * <pre> 665 * int[] selectedIndex = dataset.getSelectedIndex(); 666 * selectedIndex[0] = 0; 667 * selectedIndex[1] = 1; 668 * </pre> 669 * 670 * @see #getSelectedIndex() 671 * @see #getWidth() 672 * 673 * @return the size of dimension of the vertical axis. 674 */ 675 @Override 676 public final long getHeight() { 677 if (!inited) init(); 678 679 if ((selectedDims == null) || (selectedIndex == null)) { 680 return 0; 681 } 682 683 return selectedDims[selectedIndex[0]]; 684 } 685 686 /** 687 * Returns the dimension size of the horizontal axis. 688 * 689 * <p> 690 * This function is used by GUI applications such as HDFView. GUI 691 * applications display a dataset in 2D Table or 2D Image. The display order is 692 * specified by the index array of selectedIndex as follow: 693 * <dl> 694 * <dt>selectedIndex[0] -- height</dt> 695 * <dd>The vertical axis</dd> 696 * <dt>selectedIndex[1] -- width</dt> 697 * <dd>The horizontal axis</dd> 698 * <dt>selectedIndex[2] -- depth</dt> 699 * <dd>The depth axis, which is used for 3 or more dimension datasets.</dd> 700 * </dl> 701 * Applications can use getSelectedIndex() to access and change the display 702 * order. For example, in a 2D dataset of 200x50 (dim0=200, dim1=50), the 703 * following code will set the height=200 and width=100. 704 * 705 * <pre> 706 * int[] selectedIndex = dataset.getSelectedIndex(); 707 * selectedIndex[0] = 0; 708 * selectedIndex[1] = 1; 709 * </pre> 710 * 711 * @see #getSelectedIndex() 712 * @see #getHeight() 713 * 714 * @return the size of dimension of the horizontal axis. 715 */ 716 @Override 717 public final long getWidth() { 718 if (!inited) init(); 719 720 if ((selectedDims == null) || (selectedIndex == null)) { 721 return 0; 722 } 723 724 if ((selectedDims.length < 2) || (selectedIndex.length < 2)) { 725 return 1; 726 } 727 728 return selectedDims[selectedIndex[1]]; 729 } 730 731 /** 732 * Returns the indices of display order. 733 * <p> 734 * 735 * selectedIndex[] is provided for two purposes: 736 * <OL> 737 * <LI> 738 * selectedIndex[] is used to indicate the order of dimensions for display. 739 * selectedIndex[0] is for the row, selectedIndex[1] is for the column and 740 * selectedIndex[2] for the depth. 741 * <p> 742 * For example, for a four dimension dataset, if selectedIndex[] = {1, 2, 3}, 743 * then dim[1] is selected as row index, dim[2] is selected as column index 744 * and dim[3] is selected as depth index. 745 * <LI> 746 * selectedIndex[] is also used to select dimensions for display for 747 * datasets with three or more dimensions. We assume that applications such 748 * as HDFView can only display data values up to three dimensions (2D 749 * spreadsheet/image with a third dimension which the 2D spreadsheet/image 750 * is selected from). For datasets with more than three dimensions, we need 751 * selectedIndex[] to tell applications which three dimensions are chosen 752 * for display. <br> 753 * For example, for a four dimension dataset, if selectedIndex[] = {1, 2, 3}, 754 * then dim[1] is selected as row index, dim[2] is selected as column index 755 * and dim[3] is selected as depth index. dim[0] is not selected. Its 756 * location is fixed at 0 by default. 757 * </OL> 758 * 759 * @return the array of the indices of display order. 760 */ 761 @Override 762 public final int[] getSelectedIndex() { 763 if (!inited) init(); 764 765 return selectedIndex; 766 } 767 768 /** 769 * Returns the string representation of compression information. 770 * <p> 771 * For example, 772 * "SZIP: Pixels per block = 8: H5Z_FILTER_CONFIG_DECODE_ENABLED". 773 * 774 * @return the string representation of compression information. 775 */ 776 @Override 777 public final String getCompression() { 778 if (!inited) init(); 779 780 return compression.toString(); 781 } 782 783 /** 784 * Returns the string representation of filter information. 785 * 786 * @return the string representation of filter information. 787 */ 788 public final String getFilters() { 789 if (!inited) init(); 790 791 return filters.toString(); 792 } 793 794 /** 795 * Returns the string representation of storage layout information. 796 * 797 * @return the string representation of storage layout information. 798 */ 799 public final String getStorageLayout() { 800 if (!inited) init(); 801 802 return storageLayout.toString(); 803 } 804 805 /** 806 * Returns the string representation of storage information. 807 * 808 * @return the string representation of storage information. 809 */ 810 public final String getStorage() { 811 if (!inited) init(); 812 813 return storage.toString(); 814 } 815 816 /** 817 * Returns the array that contains the dimension sizes of the chunk of the 818 * dataset. Returns null if the dataset is not chunked. 819 * 820 * @return the array of chunk sizes or returns null if the dataset is not 821 * chunked. 822 */ 823 public final long[] getChunkSize() { 824 if (!inited) init(); 825 826 return chunkSize; 827 } 828 829 @Override 830 public Datatype getDatatype() { 831 return datatype; 832 } 833 834 /** 835 * @deprecated Not for public use in the future. <br> 836 * Using {@link #convertFromUnsignedC(Object, Object)} 837 * 838 * @param dataIN the object data 839 * 840 * @return the converted object 841 */ 842 @Deprecated 843 public static Object convertFromUnsignedC(Object dataIN) { 844 return Dataset.convertFromUnsignedC(dataIN, null); 845 } 846 847 /** 848 * Converts one-dimension array of unsigned C-type integers to a new array 849 * of appropriate Java integer in memory. 850 * <p> 851 * Since Java does not support unsigned integer, values of unsigned C-type 852 * integers must be converted into its appropriate Java integer. Otherwise, 853 * the data value will not displayed correctly. For example, if an unsigned 854 * C byte, x = 200, is stored into an Java byte y, y will be -56 instead of 855 * the correct value of 200. 856 * <p> 857 * Unsigned C integers are upgrade to Java integers according to the 858 * following table: 859 * <table border=1> 860 * <caption><b>Mapping Unsigned C Integers to Java Integers</b></caption> 861 * <TR> 862 * <TD><B>Unsigned C Integer</B></TD> 863 * <TD><B>JAVA Intege</B>r</TD> 864 * </TR> 865 * <TR> 866 * <TD>unsigned byte</TD> 867 * <TD>signed short</TD> 868 * </TR> 869 * <TR> 870 * <TD>unsigned short</TD> 871 * <TD>signed int</TD> 872 * </TR> 873 * <TR> 874 * <TD>unsigned int</TD> 875 * <TD>signed long</TD> 876 * </TR> 877 * <TR> 878 * <TD>unsigned long</TD> 879 * <TD>signed long</TD> 880 * </TR> 881 * </TABLE> 882 * <strong>NOTE: this conversion cannot deal with unsigned 64-bit integers. 883 * Therefore, the values of unsigned 64-bit datasets may be wrong in Java 884 * applications</strong>. 885 * <p> 886 * If memory data of unsigned integers is converted by 887 * convertFromUnsignedC(), convertToUnsignedC() must be called to convert 888 * the data back to unsigned C before data is written into file. 889 * 890 * @see #convertToUnsignedC(Object, Object) 891 * 892 * @param dataIN 893 * the input 1D array of the unsigned C-type integers. 894 * @param dataOUT 895 * the output converted (or upgraded) 1D array of Java integers. 896 * 897 * @return the upgraded 1D array of Java integers. 898 */ 899 @SuppressWarnings("rawtypes") 900 public static Object convertFromUnsignedC(Object dataIN, Object dataOUT) { 901 log.trace("convertFromUnsignedC(): start"); 902 903 if (dataIN == null) { 904 log.debug("convertFromUnsignedC(): data_in is null"); 905 log.trace("convertFromUnsignedC(): finish"); 906 return null; 907 } 908 909 Class dataClass = dataIN.getClass(); 910 if (!dataClass.isArray()) { 911 log.debug("convertFromUnsignedC(): data_in not an array"); 912 log.trace("convertFromUnsignedC(): finish"); 913 return null; 914 } 915 916 if (dataOUT != null) { 917 Class dataClassOut = dataOUT.getClass(); 918 if (!dataClassOut.isArray() || (Array.getLength(dataIN) != Array.getLength(dataOUT))) { 919 log.debug("convertFromUnsignedC(): data_out not an array or does not match data_in size"); 920 dataOUT = null; 921 } 922 } 923 924 String cname = dataClass.getName(); 925 char dname = cname.charAt(cname.lastIndexOf('[') + 1); 926 int size = Array.getLength(dataIN); 927 log.trace("convertFromUnsignedC(): cname={} dname={} size={}", cname, dname, size); 928 929 if (dname == 'B') { 930 log.debug("convertFromUnsignedC(): Java convert byte to short"); 931 short[] sdata = null; 932 if (dataOUT == null) { 933 sdata = new short[size]; 934 } 935 else { 936 sdata = (short[]) dataOUT; 937 } 938 939 byte[] bdata = (byte[]) dataIN; 940 for (int i = 0; i < size; i++) { 941 sdata[i] = (short) ((bdata[i] + 256) & 0xFF); 942 } 943 944 dataOUT = sdata; 945 } 946 else if (dname == 'S') { 947 log.debug("convertFromUnsignedC(): Java convert short to int"); 948 int[] idata = null; 949 if (dataOUT == null) { 950 idata = new int[size]; 951 } 952 else { 953 idata = (int[]) dataOUT; 954 } 955 956 short[] sdata = (short[]) dataIN; 957 for (int i = 0; i < size; i++) { 958 idata[i] = (sdata[i] + 65536) & 0xFFFF; 959 } 960 961 dataOUT = idata; 962 } 963 else if (dname == 'I') { 964 log.debug("convertFromUnsignedC(): Java convert int to long"); 965 long[] ldata = null; 966 if (dataOUT == null) { 967 ldata = new long[size]; 968 } 969 else { 970 ldata = (long[]) dataOUT; 971 } 972 973 int[] idata = (int[]) dataIN; 974 for (int i = 0; i < size; i++) { 975 ldata[i] = (idata[i] + 4294967296L) & 0xFFFFFFFFL; 976 } 977 978 dataOUT = ldata; 979 } 980 else { 981 dataOUT = dataIN; 982 log.debug("convertFromUnsignedC(): Java does not support unsigned long"); 983 } 984 985 log.trace("convertFromUnsignedC(): finish"); 986 return dataOUT; 987 } 988 989 /** 990 * @deprecated Not for public use in the future. <br> 991 * Using {@link #convertToUnsignedC(Object, Object)} 992 * 993 * @param dataIN 994 * the input 1D array of the unsigned C-type integers. 995 * 996 * @return the upgraded 1D array of Java integers. 997 */ 998 @Deprecated 999 public static Object convertToUnsignedC(Object dataIN) { 1000 return Dataset.convertToUnsignedC(dataIN, null); 1001 } 1002 1003 /** 1004 * Converts the array of converted unsigned integers back to unsigned C-type 1005 * integer data in memory. 1006 * <p> 1007 * If memory data of unsigned integers is converted by 1008 * convertFromUnsignedC(), convertToUnsignedC() must be called to convert 1009 * the data back to unsigned C before data is written into file. 1010 * 1011 * @see #convertFromUnsignedC(Object, Object) 1012 * 1013 * @param dataIN 1014 * the input array of the Java integer. 1015 * @param dataOUT 1016 * the output array of the unsigned C-type integer. 1017 * 1018 * @return the converted data of unsigned C-type integer array. 1019 */ 1020 @SuppressWarnings("rawtypes") 1021 public static Object convertToUnsignedC(Object dataIN, Object dataOUT) { 1022 log.trace("convertToUnsignedC(): start"); 1023 1024 if (dataIN == null) { 1025 log.debug("convertToUnsignedC(): data_in is null"); 1026 log.trace("convertToUnsignedC(): finish"); 1027 return null; 1028 } 1029 1030 Class dataClass = dataIN.getClass(); 1031 if (!dataClass.isArray()) { 1032 log.debug("convertToUnsignedC(): data_in not an array"); 1033 log.trace("convertToUnsignedC(): finish"); 1034 return null; 1035 } 1036 1037 if (dataOUT != null) { 1038 Class dataClassOut = dataOUT.getClass(); 1039 if (!dataClassOut.isArray() || (Array.getLength(dataIN) != Array.getLength(dataOUT))) { 1040 log.debug("convertToUnsignedC(): data_out not an array or does not match data_in size"); 1041 dataOUT = null; 1042 } 1043 } 1044 1045 String cname = dataClass.getName(); 1046 char dname = cname.charAt(cname.lastIndexOf('[') + 1); 1047 int size = Array.getLength(dataIN); 1048 log.trace("convertToUnsignedC(): cname={} dname={} size={}", cname, dname, size); 1049 1050 if (dname == 'S') { 1051 log.debug("convertToUnsignedC(): Java convert short to byte"); 1052 byte[] bdata = null; 1053 if (dataOUT == null) { 1054 bdata = new byte[size]; 1055 } 1056 else { 1057 bdata = (byte[]) dataOUT; 1058 } 1059 short[] sdata = (short[]) dataIN; 1060 for (int i = 0; i < size; i++) { 1061 bdata[i] = (byte) sdata[i]; 1062 } 1063 dataOUT = bdata; 1064 } 1065 else if (dname == 'I') { 1066 log.debug("convertToUnsignedC(): Java convert int to short"); 1067 short[] sdata = null; 1068 if (dataOUT == null) { 1069 sdata = new short[size]; 1070 } 1071 else { 1072 sdata = (short[]) dataOUT; 1073 } 1074 int[] idata = (int[]) dataIN; 1075 for (int i = 0; i < size; i++) { 1076 sdata[i] = (short) idata[i]; 1077 } 1078 dataOUT = sdata; 1079 } 1080 else if (dname == 'J') { 1081 log.debug("convertToUnsignedC(): Java convert long to int"); 1082 int[] idata = null; 1083 if (dataOUT == null) { 1084 idata = new int[size]; 1085 } 1086 else { 1087 idata = (int[]) dataOUT; 1088 } 1089 long[] ldata = (long[]) dataIN; 1090 for (int i = 0; i < size; i++) { 1091 idata[i] = (int) ldata[i]; 1092 } 1093 dataOUT = idata; 1094 } 1095 else { 1096 dataOUT = dataIN; 1097 log.debug("convertToUnsignedC(): Java does not support unsigned long"); 1098 } 1099 1100 log.trace("convertToUnsignedC(): finish"); 1101 return dataOUT; 1102 } 1103 1104 /** 1105 * Converts an array of bytes into an array of Strings for a fixed string 1106 * dataset. 1107 * <p> 1108 * A C-string is an array of chars while an Java String is an object. When a 1109 * string dataset is read into a Java application, the data is stored in an 1110 * array of Java bytes. byteToString() is used to convert the array of bytes 1111 * into an array of Java strings so that applications can display and modify 1112 * the data content. 1113 * <p> 1114 * For example, the content of a two element C string dataset is {"ABC", 1115 * "abc"}. Java applications will read the data into a byte array of {65, 1116 * 66, 67, 97, 98, 99). byteToString(bytes, 3) returns an array of Java 1117 * String of strs[0]="ABC", and strs[1]="abc". 1118 * <p> 1119 * If memory data of strings is converted to Java Strings, stringToByte() 1120 * must be called to convert the memory data back to byte array before data 1121 * is written to file. 1122 * 1123 * @see #stringToByte(String[], int) 1124 * 1125 * @param bytes 1126 * the array of bytes to convert. 1127 * @param length 1128 * the length of string. 1129 * 1130 * @return the array of Java String. 1131 */ 1132 public static final String[] byteToString(byte[] bytes, int length) { 1133 log.trace("byteToString(): start"); 1134 1135 if (bytes == null) { 1136 log.debug("byteToString(): input is null"); 1137 log.trace("byteToString(): finish"); 1138 return null; 1139 } 1140 1141 int n = bytes.length / length; 1142 log.trace("byteToString(): n={} from length of {}", n, length); 1143 String[] strArray = new String[n]; 1144 String str = null; 1145 int idx = 0; 1146 for (int i = 0; i < n; i++) { 1147 str = new String(bytes, i * length, length); 1148 idx = str.indexOf('\0'); 1149 if (idx >= 0) { 1150 str = str.substring(0, idx); 1151 } 1152 1153 // trim only the end 1154 int end = str.length(); 1155 while (end > 0 && str.charAt(end - 1) <= '\u0020') 1156 end--; 1157 1158 strArray[i] = (end <= 0) ? "" : str.substring(0, end); 1159 } 1160 1161 log.trace("byteToString(): finish"); 1162 return strArray; 1163 } 1164 1165 /** 1166 * Converts a string array into an array of bytes for a fixed string 1167 * dataset. 1168 * <p> 1169 * If memory data of strings is converted to Java Strings, stringToByte() 1170 * must be called to convert the memory data back to byte array before data 1171 * is written to file. 1172 * 1173 * @see #byteToString(byte[] bytes, int length) 1174 * 1175 * @param strings 1176 * the array of string. 1177 * @param length 1178 * the length of string. 1179 * 1180 * @return the array of bytes. 1181 */ 1182 public static final byte[] stringToByte(String[] strings, int length) { 1183 log.trace("stringToByte(): start"); 1184 1185 if (strings == null) { 1186 log.debug("stringToByte(): input is null"); 1187 log.trace("stringToByte(): finish"); 1188 return null; 1189 } 1190 1191 int size = strings.length; 1192 byte[] bytes = new byte[size * length]; 1193 log.trace("stringToByte(): size={} length={}", size, length); 1194 StringBuilder strBuff = new StringBuilder(length); 1195 for (int i = 0; i < size; i++) { 1196 // initialize the string with spaces 1197 strBuff.replace(0, length, " "); 1198 1199 if (strings[i] != null) { 1200 if (strings[i].length() > length) { 1201 strings[i] = strings[i].substring(0, length); 1202 } 1203 strBuff.replace(0, length, strings[i]); 1204 } 1205 1206 strBuff.setLength(length); 1207 System.arraycopy(strBuff.toString().getBytes(), 0, bytes, length * i, length); 1208 } 1209 1210 log.trace("stringToByte(): finish"); 1211 1212 return bytes; 1213 } 1214 1215 /** 1216 * Returns the array of strings that represent the dimension names. Returns 1217 * null if there is no dimension name. 1218 * <p> 1219 * Some datasets have pre-defined names for each dimension such as 1220 * "Latitude" and "Longitude". getDimNames() returns these pre-defined 1221 * names. 1222 * 1223 * @return the names of dimensions, or null if there is no dimension name. 1224 */ 1225 public final String[] getDimNames() { 1226 if (!inited) init(); 1227 1228 return dimNames; 1229 } 1230 1231 /** 1232 * Checks if a given datatype is a string. Sub-classes must replace this 1233 * default implementation. 1234 * 1235 * @param tid 1236 * The data type identifier. 1237 * 1238 * @return true if the datatype is a string; otherwise returns false. 1239 */ 1240 public boolean isString(long tid) { 1241 return false; 1242 } 1243 1244 /** 1245 * Returns the size in bytes of a given datatype. Sub-classes must replace 1246 * this default implementation. 1247 * 1248 * @param tid 1249 * The data type identifier. 1250 * 1251 * @return The size of the datatype 1252 */ 1253 public long getSize(long tid) { 1254 return -1; 1255 } 1256 1257 /** 1258 * Get Class of the original data buffer if converted. 1259 * 1260 * @return the Class of originalBuf 1261 */ 1262 @Override 1263 @SuppressWarnings("rawtypes") 1264 public final Class getOriginalClass() { 1265 return originalBuf.getClass(); 1266 } 1267 1268 /* 1269 * Checks if dataset is virtual. Sub-classes must replace 1270 * this default implementation. 1271 * 1272 * @return true if the dataset is virtual; otherwise returns false. 1273 */ 1274 public boolean isVirtual() { 1275 return false; 1276 } 1277 1278 /* 1279 * Gets the source file name at index if dataset is virtual. Sub-classes must replace 1280 * this default implementation. 1281 * 1282 * @return filename if the dataset is virtual; otherwise returns null. 1283 */ 1284 public String getVirtualFilename(int index) { 1285 return null; 1286 } 1287 1288 /* 1289 * Gets the number of source files if dataset is virtual. Sub-classes must replace 1290 * this default implementation. 1291 * 1292 * @return the list size if the dataset is virtual; otherwise returns negative. 1293 */ 1294 public int getVirtualMaps() { 1295 return -1; 1296 } 1297}