casacore
Tables.h
Go to the documentation of this file.
1//# Tables.h: The Tables module - Casacore data storage
2//# Copyright (C) 1994-2010
3//# Associated Universities, Inc. Washington DC, USA.
4//#
5//# This library is free software; you can redistribute it and/or modify it
6//# under the terms of the GNU Library General Public License as published by
7//# the Free Software Foundation; either version 2 of the License, or (at your
8//# option) any later version.
9//#
10//# This library is distributed in the hope that it will be useful, but WITHOUT
11//# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12//# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13//# License for more details.
14//#
15//# You should have received a copy of the GNU Library General Public License
16//# along with this library; if not, write to the Free Software Foundation,
17//# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18//#
19//# Correspondence concerning AIPS++ should be addressed as follows:
20//# Internet email: aips2-request@nrao.edu.
21//# Postal address: AIPS++ Project Office
22//# National Radio Astronomy Observatory
23//# 520 Edgemont Road
24//# Charlottesville, VA 22903-2475 USA
25//#
26//# $Id$
27
28#ifndef TABLES_TABLES_H
29#define TABLES_TABLES_H
30
31//# Includes
32//# table description
33#include <casacore/casa/aips.h>
34#include <casacore/tables/Tables/TableDesc.h>
35#include <casacore/tables/Tables/ColumnDesc.h>
36#include <casacore/tables/Tables/ScaColDesc.h>
37#include <casacore/tables/Tables/ArrColDesc.h>
38#include <casacore/tables/Tables/ScaRecordColDesc.h>
39
40//# table access
41#include <casacore/tables/Tables/Table.h>
42#include <casacore/tables/Tables/TableLock.h>
43#include <casacore/tables/Tables/SetupNewTab.h>
44#include <casacore/tables/Tables/ScalarColumn.h>
45#include <casacore/tables/Tables/ArrayColumn.h>
46#include <casacore/tables/Tables/TableRow.h>
47#include <casacore/tables/Tables/TableCopy.h>
48#include <casacore/tables/Tables/TableUtil.h>
49#include <casacore/casa/Arrays/Array.h>
50#include <casacore/casa/Arrays/Slicer.h>
51#include <casacore/casa/Arrays/Slice.h>
52
53//# keywords
54#include <casacore/tables/Tables/TableRecord.h>
55#include <casacore/casa/Containers/RecordField.h>
56
57//# table lookup
58#include <casacore/tables/Tables/ColumnsIndex.h>
59#include <casacore/tables/Tables/ColumnsIndexArray.h>
60
61//# table vectors
62#include <casacore/tables/Tables/TableVector.h>
63#include <casacore/tables/Tables/TabVecMath.h>
64#include <casacore/tables/Tables/TabVecLogic.h>
65
66//# data managers
67#include <casacore/tables/DataMan.h>
68
69//# table expressions (for selection of rows)
70#include <casacore/tables/TaQL.h>
71
72
73namespace casacore { //# NAMESPACE CASACORE - BEGIN
74
75// <module>
76
77// <summary>
78// CTDS (Casacore Table Data System) is the data storage mechanism for Casacore
79// </summary>
80
81// <use visibility=export>
82
83// <reviewed reviewer="jhorstko" date="1994/08/30" tests="" demos="">
84// </reviewed>
85
86// <prerequisite>
87// <li> <linkto class="Record:description">Record</linkto> class
88// </prerequisite>
89
90// <etymology>
91// "Table" is a formal term from relational database theory:
92// <em> "The organizing principle in a relational database is the TABLE,
93// a rectangular, row/column arrangement of data values."</em>
94// Casacore tables are extensions to traditional tables, but are similar
95// enough that we use the same name. There is also a strong resemblance
96// between the uses of Casacore tables, and FITS binary tables, which
97// provides another reason to use "Tables" to describe the Casacore data
98// storage mechanism.
99// </etymology>
100
101// <synopsis>
102// Tables are the fundamental storage mechanism for Casacore. This document
103// explains <A HREF="#Tables:motivation">why</A> they had to be made,
104// <A HREF="#Tables:properties">what</A> their properties are, and
105// <A HREF="#Tables:open">how</A> to use them. The last subject is
106// discussed and illustrated in a sequence of sections:
107// <UL>
108// <LI> <A HREF="#Tables:open">opening</A> an existing table,
109// <LI> <A HREF="#Tables:read">reading</A> from a table,
110// <LI> <A HREF="#Tables:creation">creating</A> a new table,
111// <LI> <A HREF="#Tables:write">writing</A> into a table,
112// <LI> <A HREF="#Tables:row-access">accessing rows</A> in a table,
113// <LI> <A HREF="#Tables:select and sort">selection and sorting</A>
114// (see also <A HREF="../notes/199.html">Table Query Language</A>),
115// <LI> <A HREF="#Tables:concatenation">concatenating similar tables</A>
116// <LI> <A HREF="#Tables:iterate">iterating</A> through a table,
117// <LI> <A HREF="#Tables:LockSync">locking/synchronization</A>
118// for concurrent access,
119// <LI> <A HREF="#Tables:KeyLookup">indexing</A> a table for faster lookup,
120// <LI> <A HREF="#Tables:vectors">vector operations</A> on a column.
121// <LI> <A HREF="#Tables:performance">performance and robustness</A>
122// considerations with some information on
123// <A HREF="#Tables:iotracing">IO tracing</A>.
124// </UL>
125// A few <A HREF="Tables:applications">applications</A> exist to inspect
126// and manipulate a table.
127//
128// Several UML diagrams describe the class structure of the Tables module.
129// <ul>
130// <li> <a href="TableOverview.drawio.svg.html">Global overview of Table access</a>.
131// <li> <a href="TableDesc.drawio.svg.html">Table and column descriptions</a>.
132// <li> <a href="TableRecord.drawio.svg.html">Table keywords</a>.
133// <li> <a href="Table.drawio.svg.html">Table class structure</a>.
134// <li> <a href="PlainTable.drawio.svg.html">Detailed PlainTable class structure</a>.
135// <li> <a href="DataManager.drawio.svg.html">DataManagers for storage</a>.
136// </ul>
137
138// <ANCHOR NAME="Tables:motivation">
139// <motivation></ANCHOR>
140//
141// The Casacore tables are mainly based upon the ideas of Allen Farris,
142// as laid out in the
143// <A HREF="http://aips2.cv.nrao.edu/aips++/docs/reference/Database.ps.gz">
144// AIPS++ Database document</A>, from where the following paragraph is taken:
145//
146// <p>
147// Traditional relational database tables have two features that
148// decisively limit their applicability to scientific data. First, an item of
149// data in a column of a table must be atomic -- it must have no internal
150// structure. A consequence of this restriction is that relational
151// databases are unable to deal with arrays of data items. Second, an
152// item of data in a column of a table must not have any direct or
153// implied linkages to other items of data or data aggregates. This
154// restriction makes it difficult to model complex relationships between
155// collections of data. While these restrictions may make it easy to
156// define a mathematically complete set of data manipulation operations,
157// they are simply intolerable in a scientific data-handling context.
158// Multi-dimensional arrays are frequently the most natural modes in
159// which to discuss and think about scientific data. In addition,
160// scientific data often requires complex calibration operations that
161// must draw on large bodies of data about equipment and its performance
162// in various states. The restrictions imposed by the relational model
163// make it very difficult to deal with complex problems of this nature.
164// <p>
165//
166// In response to these limitations, and other needs, the Casacore tables were
167// designed.
168// </motivation>
169
170// <ANCHOR NAME="Tables:properties">
171// <h3>Table Properties</h3></ANCHOR>
172//
173// Casacore tables have the following properties:
174// <ul>
175// <li> A table consists of a number of rows and columns.
176// <A HREF="#Tables:keywords">Keyword/value pairs</A> may be defined
177// for the table as a whole and for individual columns. A keyword/value
178// pair for a column could, for instance, define its unit.
179// <li> Each table has a <A HREF="#Tables:Table Description">description</A>
180// which specifies the number and type of columns, and maybe initial
181// keyword sets and default values for the columns.
182// <li> A cell in a column may contain
183// <UL>
184// <LI> a scalar;
185// <LI> a "direct" array -- which must have the same shape in all
186// cells of a column, is usually small, and is stored in the
187// table itself;
188// <LI> an "indirect" array -- which may have different shapes in
189// different cells of the same column, is arbitrarily large,
190// and is stored in a separate file;
191// </UL>
192// <li> A column may be
193// <UL>
194// <LI> "filled" -- containing actual data, or
195// <LI> "virtual" -- containing a recipe telling how the data will
196// be generated dynamically
197// </UL>
198// <li> Only the standard Casacore data types can be used in filled
199// columns, be they scalars or arrays: Bool, uChar, Short, uShort,
200// Int, uInt, Int64, float, double, Complex, DComplex and String.
201// Furthermore scalars containing
202// <linkto class=TableRecord>record</linkto> values are possible
203// <li> A column can have a default value, which will automatically be stored
204// in a cell of the column, when a row is added to the table.
205// <li> <A HREF="#Tables:Data Managers">Data managers</A> handle the
206// reading, writing and generation of data. Each column in a table can
207// be assigned its own data manager, which allows for optimization of
208// the data storage per column. The choice of data manager determines
209// whether a column is filled or virtual.
210// <li> Table data are stored in a canonical format, so they can be read
211// on any machine. To avoid needless swapping of bytes, the data can
212// be stored in big endian (as used on e.g. SUN) or little endian
213// (as used on Intel PC-s) canonical format.
214// By default it uses the format specified in the aipsrc variable
215// <code>table.endianformat</code> which defaults to
216// <code>Table::LocalEndian</code> (the endian format of the
217// machine being used when creating the table).
218// <li> The SQL-like
219// <a href="../notes/199.html">Table Query Language</a> (TaQL)
220// can be used to do operations on tables like
221// select, sort, update, insert, delete, and create.
222// </ul>
223//
224// Tables can be in one of four forms:
225// <ul>
226// <li> A plain table is a table stored on disk.
227// It can be shared by multiple processes.
228// <li> A memory table is a table held in memory.
229// It is a process specific table, thus not sharable.
230// The <linkto class=Table>Table::copy</linkto> function can be used
231// to turn a memory table into a plain table.
232// <li> A reference table is a table referencing a plain or memory table.
233// It is the result of a selection or sort on another table.
234// A reference table references the data in the other table, thus
235// changing data in a reference table means that the data in the
236// original table are changed.
237// The <linkto class=Table>Table::deepCopy</linkto> function can be
238// used to turn a reference table into a plain table.
239// <li> <A HREF="#Tables:concatenation">a concatenated table</A>
240// is a union of tables (of any form) with the same description.
241// They are concatenated in a virtual way, thus no copy is made.
242// </ul>
243// Concurrent access from different processes to the same plain table is
244// fully supported by means of a <A HREF="#Tables:LockSync">
245// locking/synchronization</A> mechanism. Concurrent access over NFS is also
246// supported.
247// <p>
248// A (somewhat primitive) mechanism is available to do a
249// <A HREF="#Tables:KeyLookup">table lookup</A> based on the contents
250// of a key.
251
252// <ANCHOR NAME="Tables:open">
253// <h3>Opening an Existing Table</h3></ANCHOR>
254//
255// To open an existing table you just create a
256// <linkto class="Table:description">Table</linkto> object giving
257// the name of the table, like:
258//
259// <srcblock>
260// Table readonly_table ("tableName");
261// // or
262// Table read_and_write_table ("tableName", Table::Update);
263// </srcblock>
264//
265// The constructor option determines whether the table will be opened as
266// readonly or as read/write. A readonly table file must be opened
267// as readonly, otherwise an exception is thrown. The functions
268// <linkto class="Table">Table::isWritable(...)</linkto>
269// can be used to determine if a table is writable.
270//
271// When the table is opened, the data managers are reinstantiated
272// according to their definition at table creation.
273// <p>
274// <ANCHOR NAME="Tables:openTable">
275// The static function <src>TableUtil::openTable</src> can be used to open a table,
276// in particular a subtable, in a simple way by means of the :: notation like
277// <src>maintable::subtable</src>. The :: notation is much better than specifying
278// an explicit path (such as <src>maintable/subtable</src>, because it also works
279// fine if the main table is a reference table (e.g. the result of a selection).
280
281// <ANCHOR NAME="Tables:read">
282// <h3>Reading from a Table</h3></ANCHOR>
283//
284// You can read data from a table column with the "get" functions
285// in the classes
286// <linkto class="ScalarColumn:description">ScalarColumn&lt;T&gt;</linkto>
287// and
288// <linkto class="ArrayColumn:description">ArrayColumn&lt;T&gt;</linkto>.
289// For scalars of a standard data type (i.e. Bool, uChar, Int, Short,
290// uShort, uInt, float, double, Complex, DComplex and String) you could
291// instead use
292// <linkto class="TableColumn">TableColumn::getScalar(...)</linkto> or
293// <linkto class="TableColumn">TableColumn::asXXX(...)</linkto>.
294// These functions offer an extra: they do automatic data type promotion;
295// so that you can, for example, get a double value from a float column.
296//
297// These "get" functions are used in the same way as the simple "put"
298// functions described in the previous section.
299// <p>
300// <linkto class="ScalarColumn:description">ScalarColumn&lt;T&gt;</linkto>
301// can be constructed for a non-writable column. However, an exception
302// is thrown if the put function is used for it.
303// The same is true for
304// <linkto class="ArrayColumn:description">ArrayColumn&lt;T&gt;</linkto> and
305// <linkto class="TableColumn:description">TableColumn</linkto>.
306// <p>
307// A typical program could look like:
308// <srcblock>
309// #include <casacore/tables/Tables/Table.h>
310// #include <casacore/tables/Tables/ScalarColumn.h>
311// #include <casacore/tables/Tables/ArrayColumn.h>
312// #include <casacore/casa/Arrays/Vector.h>
313// #include <casacore/casa/Arrays/Slicer.h>
314// #include <casacore/casa/Arrays/ArrayMath.h>
315// #include <iostream>
316//
317// main()
318// {
319// // Open the table (readonly).
320// Table tab ("some.name");
321//
322// // Construct the various column objects.
323// // Their data type has to match the data type in the table description.
324// ScalarColumn<Int> acCol (tab, "ac");
325// ArrayColumn<Float> arr2Col (tab, "arr2");
326//
327// // Loop through all rows in the table.
328// uInt nrrow = tab.nrow();
329// for (uInt i=0; i<nrow; i++) {
330// // Read the row for both columns.
331// cout << "Column ac in row i = " << acCol(i) << endl;
332// Array<Float> array = arr2Col.get (i);
333// }
334//
335// // Show the entire column ac,
336// // and show the 10th element of arr2 in each row..
337// cout << ac.getColumn();
338// cout << arr2.getColumn (Slicer(Slice(10)));
339// }
340// </srcblock>
341
342// <ANCHOR NAME="Tables:creation">
343// <h3>Creating a Table</h3></ANCHOR>
344//
345// The creation of a table is a multi-step process:
346// <ol>
347// <li>
348// Create a <A HREF="#Tables:Table Description">table description</A>.
349// <li>
350// Create a <linkto class="SetupNewTable:description">SetupNewTable</linkto>
351// object with the name of the new table.
352// <li>
353// Create the necessary <A HREF="#Tables:Data Managers">data managers</A>.
354// <li>
355// Bind each column to the appropriate data manager.
356// The system will bind unbound columns to data managers which
357// are created internally using the default data manager name
358// defined in the column description.
359// <li>
360// Define the shape of direct columns (if that was not already done in the
361// column description).
362// <li>
363// Create the <linkto class="Table:description">Table</linkto>
364// object from the SetupNewTable object. Here, a final check is performed
365// and the necessary files are created.
366// </ol>
367// The recipe above is meant for the creation a plain table, but the
368// creation of a memory table is exactly the same. The only difference
369// is that in call to construct the Table object the Table::Memory
370// type has to be given. Note that in the SetupNewTable object the columns
371// can be bound to any data manager. <src>MemoryTable</src> will rebind
372// stored columns to the <linkto class=MemoryStMan>MemoryStMan</linkto>
373// storage manager, but virtual columns bindings are not changed.
374//
375// The following example shows how you can create a table. An example
376// specifically illustrating the creation of the
377// <A HREF="#Tables:Table Description">table description</A> is given
378// in that section. Other sections discuss the access to the table.
379//
380// <srcblock>
381// #include <casacore/tables/Tables/TableDesc.h>
382// #include <casacore/tables/Tables/SetupNewTab.h>
383// #include <casacore/tables/Tables/Table.h>
384// #include <casacore/tables/Tables/ScaColDesc.h>
385// #include <casacore/tables/Tables/ScaRecordColDesc.h>
386// #include <casacore/tables/Tables/ArrColDesc.h>
387// #include <casacore/tables/Tables/StandardStMan.h>
388// #include <casacore/tables/Tables/IncrementalStMan.h>
389//
390// main()
391// {
392// // Step1 -- Build the table description.
393// TableDesc td("tTableDesc", "1", TableDesc::Scratch);
394// td.comment() = "A test of class SetupNewTable";
395// td.addColumn (ScalarColumnDesc<Int> ("ab" ,"Comment for column ab"));
396// td.addColumn (ScalarColumnDesc<Int> ("ac"));
397// td.addColumn (ScalarColumnDesc<uInt> ("ad","comment for ad"));
398// td.addColumn (ScalarColumnDesc<Float> ("ae"));
399// td.addColumn (ScalarRecordColumnDesc ("arec"));
400// td.addColumn (ArrayColumnDesc<Float> ("arr1",3,ColumnDesc::Direct));
401// td.addColumn (ArrayColumnDesc<Float> ("arr2",0));
402// td.addColumn (ArrayColumnDesc<Float> ("arr3",0,ColumnDesc::Direct));
403//
404// // Step 2 -- Setup a new table from the description.
405// SetupNewTable newtab("newtab.data", td, Table::New);
406//
407// // Step 3 -- Create storage managers for it.
408// StandardStMan stmanStand_1;
409// StandardStMan stmanStand_2;
410// IncrementalStMan stmanIncr;
411//
412// // Step 4 -- First, bind all columns to the first storage
413// // manager. Then, bind a few columns to another storage manager
414// // (which will overwrite the previous bindings).
415// newtab.bindAll (stmanStand_1);
416// newtab.bindColumn ("ab", stmanStand_2);
417// newtab.bindColumn ("ae", stmanIncr);
418// newtab.bindColumn ("arr3", stmanIncr);
419//
420// // Step 5 -- Define the shape of the direct columns.
421// // (this could have been done in the column description).
422// newtab.setShapeColumn( "arr1", IPosition(3,2,3,4));
423// newtab.setShapeColumn( "arr3", IPosition(3,3,4,5));
424//
425// // Step 6 -- Finally, create the table consisting of 10 rows.
426// Table tab(newtab, 10);
427//
428// // Now we can fill the table, which is shown in a next section.
429// // The Table destructor will flush the table to the files.
430// }
431// </srcblock>
432// To create a table in memory, only step 6 has to be modified slightly to:
433// <srcblock>
434// Table tab(newtab, Table::Memory, 10);
435// </srcblock>
436//
437// Note that the function <src>TableUtil::createTable</src> can be used to create a table
438// in a simpler way. It can also be used to create a subtable using the :: notation
439// similar to the <A HREF="#Tables:openTable"><src>Tableutil::openTable</src></A>
440// function described above.
441
442// <ANCHOR NAME="Tables:write">
443// <h3>Writing into a Table</h3></ANCHOR>
444//
445// Once a table has been created or has been opened for read/write,
446// you want to write data into it. Before doing that you may have
447// to add one or more rows to the table.
448// <note role=tip> If a table was created with a given number of rows, you
449// do not need to add rows; you may not even be able to do so.
450// </note>
451//
452// When adding new rows to the table, either via the
453// <linkto class="Table">Table(...) constructor</linkto>
454// or via the
455// <linkto class="Table">Table::addRow(...)</linkto>
456// function, you can choose to have those rows initialized with the
457// default values given in the description.
458//
459// To actually write the data into the table you need the classes
460// <linkto class="ScalarColumn:description">ScalarColumn&lt;T&gt;</linkto> and
461// <linkto class="ArrayColumn:description">ArrayColumn&lt;T&gt;</linkto>.
462// For each column you can construct one or
463// more of these objects. Their put(...) functions
464// let you write a value at a time or the entire column in one go.
465// For arrays you can "put" subsections of the arrays.
466//
467// As an alternative for scalars of a standard data type (i.e. Bool,
468// uChar, Int, Short, uShort, uInt, float, double, Complex, DComplex
469// and String) you could use the functions
470// <linkto class="TableColumn">TableColumn::putScalar(...)</linkto>.
471// These functions offer an extra: automatic data type promotion; so that
472// you can, for example, put a float value in a double column.
473//
474// A typical program could look like:
475// <srcblock>
476// #include <casacore/tables/Tables/TableDesc.h>
477// #include <casacore/tables/Tables/SetupNewTab.h>
478// #include <casacore/tables/Tables/Table.h>
479// #include <casacore/tables/Tables/ScaColDesc.h>
480// #include <casacore/tables/Tables/ArrColDesc.h>
481// #include <casacore/tables/Tables/ScalarColumn.h>
482// #include <casacore/tables/Tables/ArrayColumn.h>
483// #include <casacore/casa/Arrays/Vector.h>
484// #include <casacore/casa/Arrays/Slicer.h>
485// #include <casacore/casa/Arrays/ArrayMath.h>
486// #include <iostream>
487//
488// main()
489// {
490// // First build the table description.
491// TableDesc td("tTableDesc", "1", TableDesc::Scratch);
492// td.comment() = "A test of class SetupNewTable";
493// td.addColumn (ScalarColumnDesc<Int> ("ac"));
494// td.addColumn (ArrayColumnDesc<Float> ("arr2",0));
495//
496// // Setup a new table from the description,
497// // and create the (still empty) table.
498// // Note that since we do not explicitly bind columns to
499// // data managers, all columns will be bound to the default
500// // standard storage manager StandardStMan.
501// SetupNewTable newtab("newtab.data", td, Table::New);
502// Table tab(newtab);
503//
504// // Construct the various column objects.
505// // Their data type has to match the data type in the description.
506// ScalarColumn<Int> ac (tab, "ac");
507// ArrayColumn<Float> arr2 (tab, "arr2");
508// Vector<Float> vec2(100);
509//
510// // Write the data into the columns.
511// // In each cell arr2 will be a vector of length 100.
512// // Since its shape is not set explicitly, it is done implicitly.
513// for (uInt i=0; i<10; i++) {
514// tab.addRow(); // First add a row.
515// ac.put (i, i+10); // value is i+10 in row i
516// indgen (vec2, float(i+20)); // vec2 gets i+20, i+21, ..., i+119
517// arr2.put (i, vec2);
518// }
519//
520// // Finally, show the entire column ac,
521// // and show the 10th element of arr2.
522// cout << ac.getColumn();
523// cout << arr2.getColumn (Slicer(Slice(10)));
524//
525// // The Table destructor writes the table.
526// }
527// </srcblock>
528//
529// In this example we added rows in the for loop, but we could also have
530// created 10 rows straightaway by constructing the Table object as:
531// <srcblock>
532// Table tab(newtab, 10);
533// </srcblock>
534// in which case we would not include
535// <srcblock>
536// tab.addRow()
537// </srcblock>
538//
539// The classes
540// <linkto class="TableColumn:description">TableColumn</linkto>,
541// <linkto class="ScalarColumn:description">ScalarColumn&lt;T&gt;</linkto>, and
542// <linkto class="ArrayColumn:description">ArrayColumn&lt;T&gt;</linkto>
543// contain several functions to put values into a single cell or into the
544// whole column. This may look confusing, but is actually quite simple.
545// The functions can be divided in two groups:
546// <ol>
547// <li>
548// Put the given value into the column cell(s).
549// <ul>
550// <li>
551// The simplest put functions,
552// <linkto class="ScalarColumn">ScalarColumn::put(...)</linkto> and
553// <linkto class="ArrayColumn">ArrayColumn::put(...)</linkto>,
554// put a value into the given column cell. For convenience, there is an
555// <linkto class="ArrayColumn">ArrayColumn::putSlice(...)</linkto>
556// to put only a part of the array.
557// <li>
558// <linkto class="ScalarColumn">ScalarColumn::fillColumn(...)</linkto> and
559// <linkto class="ArrayColumn">ArrayColumn::fillColumn(...)</linkto>
560// fill an entire column by putting the given value into all the cells
561// of the column.
562// <li>
563// The simplest putColumn functions,
564// <linkto class="ScalarColumn">ScalarColumn::putColumn(...)</linkto> and
565// <linkto class="ArrayColumn">ArrayColumn::putColumn(...)</linkto>,
566// put an array of values into the column. There is a special
567// <linkto class="ArrayColumn">ArrayColumn::putColumn(...)</linkto>
568// version which puts only a part of the arrays.
569// </ul>
570//
571// <li>
572// Copy values from another column to this column.<BR>
573// These functions have the advantage that the
574// data type of the input and/or output column can be unknown.
575// The generic TableColumn objects can be used for this purpose.
576// The put(Column) function checks the data types and, if possible,
577// converts them. If the conversion is not possible, it throws an
578// exception.
579// <ul>
580// <li>
581// The put functions copy the value in a cell of the input column
582// to a cell in the output column. The row numbers of the cells
583// in the columns can be different.
584// <li>
585// The putColumn functions copy the entire contents of the input column
586// to the output column. The lengths of the columns must be equal.
587// </ul>
588// Each class has its own set of these functions.
589// <ul>
590// <li>
591// <linkto class="TableColumn">TableColumn::put(...)</linkto> and
592// <linkto class="TableColumn">TableColumn::putColumn(...)</linkto> and
593// are the most generic. They can be
594// used if the data types of both input and output column are unknown.
595// Note that these functions are virtual.
596// <li>
597// <linkto class="ScalarColumn">ScalarColumn::put(...)</linkto>,
598// <linkto class="ArrayColumn">ArrayColumn::put(...)</linkto>,
599// <linkto class="ScalarColumn">ScalarColumn::putColumn(...)</linkto>, and
600// <linkto class="ArrayColumn">ArrayColumn::putColumn(...)</linkto>
601// are less generic and therefore potentially more efficient.
602// The most efficient variants are the ones taking a
603// Scalar/ArrayColumn&lt;T&gt;, because they require no data type
604// conversion.
605// </ul>
606// </ol>
607
608// <ANCHOR NAME="Tables:row-access">
609// <h3>Accessing rows in a Table</h3></ANCHOR>
610//
611// Apart from accessing a table column-wise as described in the
612// previous two sections, it is also possible to access a table row-wise.
613// The <linkto class=TableRow>TableRow</linkto> class makes it possible
614// to access multiple fields in a table row as a whole. Note that like the
615// XXColumn classes described above, there is also an ROTableRow class
616// for access to readonly tables.
617// <p>
618// On construction of a TableRow object it has to be specified which
619// fields (i.e. columns) are part of the row. For these fields a
620// fixed structured <linkto class=TableRecord>TableRecord</linkto>
621// object is constructed as part of the TableRow object. The TableRow::get
622// function will fill this record with the table data for the given row.
623// The user has access to the record and can use
624// <linkto class=RecordFieldPtr>RecordFieldPtr</linkto> objects for
625// speedier access to the record.
626// <p>
627// The class could be used as shown in the following example.
628// <srcblock>
629// // Open the table as readonly and define a row object to contain
630// // the given columns.
631// // Note that the function stringToVector is a very convenient
632// // way to construct a Vector<String>.
633// // Show the description of the fields in the row.
634// Table table("Some.table");
635// ROTableRow row (table, stringToVector("col1,col2,col3"));
636// cout << row.record().description();
637// // Since the structure of the record is known, the RecordFieldPtr
638// // objects could be used to allow for easy and fast access to
639// // the record which is refilled for each get.
640// RORecordFieldPtr<String> col1(row.record(), "col1");
641// RORecordFieldPtr<Double> col2(row.record(), "col2");
642// RORecordFieldPtr<Array<Int> > col3(row.record(), "col3");
643// for (uInt i=0; i<table.nrow(); i++) {
644// row.get (i);
645// someString = *col1;
646// somedouble = *col2;
647// someArrayInt = *col3;
648// }
649// </srcblock>
650// The description of TableRow contains some more extensive examples.
651
652// <ANCHOR NAME="Tables:select and sort">
653// <h3>Table Selection and Sorting</h3></ANCHOR>
654//
655// The result of a select and sort of a table is another table,
656// which references the original table. This means that an update
657// of a sorted or selected table results in the update of the original
658// table. The result is, however, a table in itself, so all table
659// functions (including select and sort) can be used with it.
660// Note that a true copy of such a reference table can be made with
661// the <linkto class=Table>Table::deepCopy</linkto> function.
662// <p>
663// Rows or columns can be selected from a table. Columns can be selected
664// by the
665// <linkto class="Table">Table::project(...)</linkto>
666// function, while rows can be selected by the various
667// <linkto class="Table">Table operator()</linkto> functions.
668// Usually a row is selected by giving a select expression with
669// <linkto class="TableExprNode:description">TableExprNode</linkto>
670// objects. These objects represent the various nodes
671// in an expression, e.g. a constant, a column, or a subexpression.
672// The Table function
673// <linkto class="Table">Table::col(...)</linkto>
674// creates a TableExprNode object for a column. The function
675// <linkto class="Table">Table::key(...)</linkto>
676// does the same for a keyword by reading
677// the keyword value and storing it as a constant in an expression node.
678// All column nodes in an expression must belong to the same table,
679// otherwise an exception is thrown.
680// In the following example we select all rows with RA>10:
681// <srcblock>
682// #include <casacore/tables/Tables/ExprNode.h>
683// Table table ("Table.name");
684// Table result = table (table.col("RA") > 10);
685// </srcblock>
686// while in the next one we select rows with RA and DEC in the given
687// intervals:
688// <srcblock>
689// Table result = table (table.col("RA") > 10
690// && table.col("RA") < 14
691// && table.col("DEC") >= -10
692// && table.col("DEC") <= 10);
693// </srcblock>
694// The following operators can be used to form arbitrarily
695// complex expressions:
696// <ul>
697// <li> Relational operators ==, !=, >, >=, < and <=.
698// <li> Logical operators &&, || and !.
699// <li> Arithmetic operators +, -, *, /, %, and unary + and -.
700// <li> Bit operators ^, &, |, and unary ~.
701// <li> Operator() to take a subsection of an array.
702// </ul>
703// Many functions (like sin, max, conj) can be used in an expression.
704// Class <linkto class=TableExprNode>TableExprNode</linkto> shows
705// the available functions.
706// E.g.
707// <srcblock>
708// Table result = table (sin (table.col("RA")) > 0.5);
709// </srcblock>
710// Function <src>in</src> can be used to select from a set of values.
711// A value set can be constructed using class
712// <linkto class=TableExprNodeSet>TableExprNodeSet</linkto>.
713// <srcblock>
714// TableExprNodeSet set;
715// set.add (TableExprNodeSetElem ("abc"));
716// set.add (TableExprNodeSetElem ("defg"));
717// set.add (TableExprNodeSetElem ("h"));
718// Table result = table (table.col("NAME).in (set));
719// </srcblock>
720// select rows with a NAME equal to <src>abc</src>,
721// <src>defg</src>, or <src>h</src>.
722//
723// <p>
724// You can sort a table on one or more columns containing scalars.
725// In this example we simply sort on column RA (default is ascending):
726// <srcblock>
727// Table table ("Table.name");
728// Table result = table.sort ("RA");
729// </srcblock>
730// Multiple
731// <linkto class="Table">Table::sort(...)</linkto>
732// functions exist which allow for more flexible control over the sort order.
733// In the next example we sort first on RA in descending order
734// and then on DEC in ascending order:
735// <srcblock>
736// Table table ("Table.name");
737// Block<String> sortKeys(2);
738// Block<int> sortOrders(2);
739// sortKeys(0) = "RA";
740// sortOrders(0) = Sort::Descending;
741// sortKeys(1) = "DEC";
742// sortOrders(1) = Sort::Ascending;
743// Table result = table.sort (sortKeys, sortOrders);
744// </srcblock>
745//
746// Tables stemming from the same root, can be combined in several
747// ways with the help of the various logical
748// <linkto class="Table">Table operators</linkto> (operator|, etc.).
749
750// <h4>Table Query Language</h4>
751// The selection and sorting mechanism described above can only be used
752// in a hard-coded way in a C++ program.
753// There is, however, another way. Strings containing selection and
754// sorting commands can be used.
755// The syntax of these commands is based on SQL and is described in the
756// <a href="../notes/199.html">Table Query Language</a> (TaQL) note 199.
757// The language supports UDFs (User Defined Functions) in dynamically
758// loadable libraries as explained in the note.
759// <br>A TaQL command can be executed with the static function
760// <src>tableCommand</src> defined in class
761// <linkto class=TableParse>TableParse</linkto>.
762
763// <ANCHOR NAME="Tables:concatenation">
764// <h3>Table Concatenation</h3></ANCHOR>
765// Tables with identical descriptions can be concatenated in a virtual way
766// using the Table concatenation constructor. Such a Table object behaves
767// as any other Table object, thus any operation can be performed on it.
768// An identical description means that the number of columns, the column names,
769// and their data types of the columns must be the same. The columns do not
770// need to be ordered in the same way nor to be stored in the same way.
771// <br>Note that if tables have different column names, it is possible
772// to form a projection (as described in the previous section) first
773// to make them appear identical.
774//
775// Sometimes a MeasurementSet is partitioned, for instance in chunks of
776// one hour. All those chunks can be virtually concatenated this way.
777// Note that all tables in the concatenation will be opened, thus one might
778// run out of file descriptors if there are many chunks.
779//
780// Similar to reference tables, it is possible to make a concatenated Table
781// persistent by using the <src>rename</src> function. It will not copy the
782// data; only the names of the tables used are written.
783//
784// The keywords of a concatenated table are taken from the first table.
785// It is possible to change or add keywords, but that is not persistent,
786// not even if the concatenated table is made persistent.
787// <br>The keywords holding subtables can be handled in a special way.
788// Normally the subtables of the concatenation are the subtables of the first
789// table are used, but is it possible to concatenate subtables as well by
790// giving their names in the constructor.
791// In this way the, say, SYSCAL subtable of a MeasurementSet can be
792// concatenated as well.
793// <srcblock>
794// // Create virtual concatenation of ms0 and ms1.
795// Block<String> names(2);
796// names[0] = "ms0";
797// names[1] = "ms1";
798// // Also concatenate their SYSCAL subtables.
799// Block<String> subNames(1, "SYSCAL");
800// Table concTab (names, subNames);
801// </srcblock>
802
803// <ANCHOR NAME="Tables:iterate">
804// <h3>Table Iterators</h3></ANCHOR>
805//
806// You can iterate through a table in an arbitrary order by getting
807// a subset of the table consisting of the rows in which the iteration
808// columns have the same value.
809// An iterator object is created by constructing a
810// <linkto class="TableIterator:description">TableIterator</linkto>
811// object with the appropriate column names.
812//
813// In the next example we define an iteration on the columns Time and
814// Baseline. Each iteration step returns a table subset in which Time and
815// Baseline have the same value.
816//
817// <srcblock>
818// // Iterate over Time and Baseline (by default in ascending order).
819// // Time is the main iteration order, thus the first column specified.
820// Table t;
821// Table tab ("UV_Table.data");
822// Block<String> iv0(2);
823// iv0[0] = "Time";
824// iv0[1] = "Baseline";
825// //
826// // Create the iterator. This will prepare the first subtable.
827// TableIterator iter(tab, iv0);
828// Int nr = 0;
829// while (!iter.pastEnd()) {
830// // Get the first subtable.
831// // This will contain rows with equal Time and Baseline.
832// t = iter.table();
833// cout << t.nrow() << " ";
834// nr++;
835// // Prepare the next subtable with the next Time,Baseline value.
836// iter.next();
837// }
838// cout << endl << nr << " iteration steps" << endl;
839// </srcblock>
840//
841// You can define more than one iterator on the same table; they operate
842// independently.
843//
844// Note that the result of each iteration step is a table in itself which
845// references the original table, just as in the case of a sort or select.
846// This means that the resulting table can be used again in a sort, select,
847// iteration, etc..
848
849// <ANCHOR NAME="Tables:vectors">
850// <h3>Table Vectors</h3></ANCHOR>
851//
852// A table vector makes it possible to treat a column in a table
853// as a vector. Almost all operators and functions defined for normal
854// vectors, are also defined for table vectors. So it is, for instance,
855// possible to add a constant to a table vector. This has the effect
856// that the underlying column gets changed.
857//
858// You can use the templated class
859// <linkto class="TableVector:description">TableVector</linkto>
860// to make a scalar column appear as a (table) vector.
861// Columns containing arrays or tables are not supported.
862// The data type of the TableVector object must match the
863// data type of the column.
864// A table vector can also hold a normal vector so that (temporary)
865// results of table vector operations can be handled.
866//
867// In the following example we double the data in column COL1 and
868// store the result in a temporary table vector.
869// <srcblock>
870// // Create a table vector for column COL1.
871// // Note that if the table is readonly, putting data in the table vector
872// // results in an exception.
873// Table tab ("Table.data");
874// TableVector<Int> tabvec(tab, "COL1");
875// // Multiply it by a constant. Result is kept in a Vector in memory.
876// TableVector<Int> temp = 2 * tabvec;
877// </srcblock>
878//
879// In the next example we double the data in COL1 and put the result back
880// in the column.
881// <srcblock>
882// // Create a table vector for column COL1.
883// // It has to be a TableVector to be able to change the column.
884// Table tab ("Table.data", Table::Update);
885// TableVector<Int> tabvec(tab, "COL1");
886// // Multiply it by a constant.
887// tabvec *= 2;
888// </srcblock>
889
890// <ANCHOR NAME="Tables:keywords">
891// <h3>Table Keywords</h3></ANCHOR>
892//
893// Any number of keyword/value pairs may be attached to the table as a whole,
894// or to any individual column. They may be freely added, retrieved,
895// re-assigned, or deleted. They are, in essence, a self-resizing list of
896// values (any of the primitive types) indexed by Strings (the keyword).
897//
898// A table keyword/value pair might be
899// <srcblock>
900// Observer = Grote Reber
901// Date = 10 october 1942
902// </srcblock>
903// Column keyword/value pairs might be
904// <srcblock>
905// Units = mJy
906// Reference Pixel = 320
907// </srcblock>
908// The class
909// <linkto class="TableRecord:description">TableRecord</linkto>
910// represents the keywords in a table.
911// It is (indirectly) derived from the standard record classes in the class
912// <linkto class="Record:description">Record</linkto>
913
914// <ANCHOR NAME="Tables:Table Description">
915// <h3>Table Description</h3></ANCHOR>
916//
917// A table contains a description of itself, which defines the layout of the
918// columns and the keyword sets for the table and for the individual columns.
919// It may also define initial keyword sets and default values for the columns.
920// Such a default value is automatically stored in a cell in the table column,
921// whenever a row is added to the table.
922//
923// The creation of the table descriptor is the first step in the creation of
924// a new table. The description is part of the table itself, but may also
925// exist in a separate file. This is useful if you need to create a number
926// of tables with the same structure; in other circumstances it probably
927// should be avoided.
928//
929// The public classes to set up a table description are:
930// <ul>
931// <li> <linkto class="TableDesc:description">TableDesc</linkto>
932// -- holds the table description.
933// <li> <linkto class="ColumnDesc:description">ColumnDesc</linkto>
934// -- holds a generic column description.
935// <li> <linkto class="ScalarColumnDesc:description">ScalarColumnDesc&lt;T&gt;
936// </linkto>
937// -- defines a column containing a scalar value.
938// <li> <linkto class="ScalarRecordColumnDesc:description">ScalarRecordColumnDesc;
939// </linkto>
940// -- defines a column containing a scalar record value.
941// <li> <linkto class="ArrayColumnDesc:description">ArrayColumnDesc&lt;T&gt;
942// </linkto>
943// -- defines a column containing an (in)direct array.
944// </ul>
945//
946// Here follows a typical example of the construction of a table
947// description. For more specialized things -- like the definition of a
948// default data manager -- we refer to the descriptions of the above
949// mentioned classes.
950//
951// <srcblock>
952// #include <casacore/tables/Tables/TableDesc.h>
953// #include <casacore/tables/Tables/ScaColDesc.h>
954// #include <casacore/tables/Tables/ArrColDesc.h>
955// #include <casacore/tables/Tables/ScaRecordTabDesc.h>
956// #include <casacore/tables/Tables/TableRecord.h>
957// #include <casacore/casa/Arrays/IPosition.h>
958// #include <casacore/casa/Arrays/Vector.h>
959//
960// main()
961// {
962// // Create a new table description
963// // Define a comment for the table description.
964// // Define some keywords.
965// ColumnDesc colDesc1, colDesc2;
966// TableDesc td("tTableDesc", "1", TableDesc::New);
967// td.comment() = "A test of class TableDesc";
968// td.rwKeywordSet().define ("ra" float(3.14));
969// td.rwKeywordSet().define ("equinox", double(1950));
970// td.rwKeywordSet().define ("aa", Int(1));
971//
972// // Define an integer column ab.
973// td.addColumn (ScalarColumnDesc<Int> ("ab", "Comment for column ab"));
974//
975// // Add a scalar integer column ac, define keywords for it
976// // and define a default value 0.
977// // Overwrite the value of keyword unit.
978// ScalarColumnDesc<Int> acColumn("ac");
979// acColumn.rwKeywordSet().define ("scale" Complex(0,0));
980// acColumn.rwKeywordSet().define ("unit", "");
981// acColumn.setDefault (0);
982// td.addColumn (acColumn);
983// td.rwColumnDesc("ac").rwKeywordSet().define ("unit", "DEG");
984//
985// // Add a scalar string column ad and define its comment string.
986// td.addColumn (ScalarColumnDesc<String> ("ad","comment for ad"));
987//
988// // Now define array columns.
989// // This one is indirect and has no dimensionality mentioned yet.
990// td.addColumn (ArrayColumnDesc<Complex> ("Arr1","comment for Arr1"));
991// // This one is indirect and has 3-dim arrays.
992// td.addColumn (ArrayColumnDesc<Int> ("A2r1","comment for Arr1",3));
993// // This one is direct and has 2-dim arrays with axes length 4 and 7.
994// td.addColumn (ArrayColumnDesc<uInt> ("Arr3","comment for Arr1",
995// IPosition(2,4,7),
996// ColumnDesc::Direct));
997//
998// // Add columns containing records.
999// td.addColumn (ScalarRecordColumnDesc ("Rec1"));
1000// }
1001// </srcblock>
1002
1003// <ANCHOR NAME="Tables:Data Managers">
1004// <h3>Data Managers</h3></ANCHOR>
1005//
1006// Data managers take care of the actual access to the data in a column.
1007// There are two kinds of data managers:
1008// <ol>
1009// <li> <A HREF="#Tables:storage managers">Storage managers</A> --
1010// which store the data as such. They can only handle the standard
1011// data types (Bool,...,String) as discussed in the section about the
1012// <A HREF="#Tables:properties">table properties</A>).
1013// <li> <A HREF="#Tables:virtual column engines">Virtual column engines</A>
1014// -- which manipulate the data.
1015// An engine could be a simple thing like scaling the data (as done
1016// in classic AIPS to reduce data storage), but it could also be an
1017// elaborate thing like applying corrections on-the-fly.
1018// <br>A special engine is VirtualTaQLColumn which can be used to define
1019// the contents of a column by means of a TaQL expression. In particular,
1020// it can be used to define a constant value for the entire column.
1021// But it can also be used to calculate the UVW-coordinates on-the-fly.
1022// <br>An engine must be used when storing data objects with a non-standard type.
1023// It has to break down the object into items with standard data types
1024// which can be stored with a storage manager.
1025// </ol>
1026// In general the user of a table does not need to be aware which
1027// data managers are being used underneath. Only when the table is created
1028// data managers have to be bound to the columns. Thereafter it is
1029// completely transparent.
1030//
1031// Data managers needs to be registered, so they can be found when a table is
1032// opened. All data managers mentioned below are part of the system and
1033// pre-registered.
1034// It is, however, also possible to load data managers on demand. If a data
1035// manager is not registered it is tried to load a shared library with the
1036// part of the data manager name (in lowercase) before a dot or left arrow.
1037// The dot makes it possible to have multiple data managers in a shared library,
1038// while the left arrow is meant for templated data manager classes.
1039// <br>E.g. if <src>BitFlagsEngine<uChar></src> was not registered, the shared
1040// library <src>libbitflagsengine.so</src> (or .dylib) will be loaded. If
1041// successful, its function <src>register_bitflagsengine()</src> will be
1042// executed which should register the data manager(s). Thereafter it is known
1043// and will be used. For example in a file Register.h and Register.cc:
1044// <srcblock>
1045// // Declare in .h file as C function, so no name mangling is done.
1046// extern "C" {
1047// void register_bitflagsengine();
1048// }
1049// // Implement in .cc file.
1050// void register_bitflagsengine()
1051// {
1052// BitFlagsEngine<uChar>::registerClass();
1053// BitFlagsEngine<Short>::registerClass();
1054// BitFlagsEngine<Int>::registerClass();
1055// }
1056// </srcblock>
1057// There are several functions that can give information which data managers
1058// are used for which columns and to obtain the characteristics and properties
1059// of them. Class RODataManAccessor and derived classes can be used for it
1060// as well as the functions <src>dataManagerInfo</src> and
1061// <src>showStructure</src> in class Table.
1062
1063// <ANCHOR NAME="Tables:storage managers">
1064// <h3>Storage Managers</h3></ANCHOR>
1065//
1066// Storage managers are used to store the data contained in the column cells.
1067// At table construction time the binding of columns to storage managers is done.
1068// <br>Each storage manager uses one or more files (usually called table.fi_xxx
1069// where i is a sequence number and _xxx is some kind of extension).
1070// Typically several file are used to store the data of the columns of a table.
1071// <br>In order to reduce the number of files (and to support large block sizes),
1072// it is possible to have a single container file (a MultiFile) containing all
1073// data files used by the storage managers. Such a file is called table.mf.
1074// Note that the program <em>lsmf</em> can be used to see which
1075// files are contained in a MultiFile. The program <em>tomf</em> can
1076// convert the files in a MultiFile to regular files.
1077// <br>At table creation time it is decided if a MultiFile will be used. It
1078// can be done by means of the StorageOption object given to the SetupNewTable
1079// constructor and/or by the aipsrc variables:
1080// <ul>
1081// <li> <src>table.storage.option</src> which can have the value
1082// 'multifile', 'sepfile' (meaning separate files), or 'default'.
1083// Currently the default is to use separate files.
1084// <li> <src>table.storage.blocksize</src> defines the block size to be
1085// used by a MultiFile. If 0 is given, the file system's block size
1086// will be used.
1087// </ul>
1088// About all standard storage managers support the MultiFile.
1089// The exception is StManAipsIO, because it is hardly ever used.
1090//
1091// Several storage managers exist, each with its own storage characteristics.
1092// The default and preferred storage manager is <src>StandardStMan</src>.
1093// Other storage managers should only be used if they pay off in
1094// file space (like <src>IncrementalStMan</src> for slowly varying data)
1095// or access speed (like the tiled storage managers for large data arrays).
1096// <br>The storage managers store the data in a big or little endian
1097// canonical format. The format can be specified when the table is created.
1098// By default it uses the endian format as specified in the aipsrc variable
1099// <code>table.endianformat</code> which can have the value local, big,
1100// or little. The default is local.
1101// <ol>
1102// <li>
1103// <linkto class="StandardStMan:description">StandardStMan</linkto>
1104// stores all the values in so-called buckets (equally sized chunks
1105// in the file). It requires little memory.
1106// <br>It replaces the old <src>StManAipsIO</src>.
1107//
1108// <li>
1109// <linkto class="IncrementalStMan:description">IncrementalStMan</linkto>
1110// uses a storage mechanism resembling "incremental backups". A value
1111// is only stored if it is different from the previous row. It is
1112// very well suited for slowly varying data.
1113// <br>The class <linkto class="ROIncrementalStManAccessor:description">
1114// ROIncrementalStManAccessor</linkto> can be used to tune the
1115// behaviour of the <src>IncrementalStMan</src>. It contains functions
1116// to deal with the cache size and to show the behaviour of the cache.
1117//
1118// <li>
1119// The <a href="#Tables:TiledStMan">Tiled Storage Managers</a>
1120// store the data as a tiled hypercube allowing for more or less equally
1121// efficient data access along all main axes. It can be used for
1122// UV-data as well as for image data.
1123//
1124// <li>
1125// <linkto class="StManAipsIO:description">StManAipsIO</linkto>
1126// uses <src>AipsIO</src> to store the data in the columns.
1127// It supports all table functionality, but its I/O is probably not
1128// as efficient as other storage managers. It also requires that
1129// a large part of the table fits in memory.
1130// <br>It should not be used anymore, because it uses a lot of memory
1131// for larger tables and because it is not very robust in case an
1132// application or system crashes.
1133//
1134// <li>
1135// <linkto class="MemoryStMan:description">MemoryStMan</linkto>
1136// holds the data in memory. It means that data 'stored' with this
1137// storage manager are NOT persistent.
1138// <br>This storage manager is primarily meant for tables held in
1139// memory, but it can also be useful for temporary columns in
1140// normal tables. Note, however, that if a table is accessed
1141// concurrently from multiple processes, MemoryStMan data cannot be
1142// synchronized.
1143//
1144// <li>
1145// @ref dyscostman.DyscoStMan is a class that stores data with lossy
1146// compression. It combines non-linear least-squares quantization and
1147// different kinds of normalizaton. With the typical factor of 4
1148// compression, the loss in accuracy from lossy compression is
1149// negligable. It should only be used for real (non-simulated) data
1150// that is in a Measurement Set.
1151// The method is described in this article:
1152// https://arxiv.org/abs/1609.02019.
1153//
1154// <li>
1155// <linkto class="Adios2StMan:description">Adios2StMan</linkto> uses the
1156// <A HREF="https://github.com/ornladios/ADIOS2">ADIOS2 framework</A> to
1157// store and load column data.
1158// <br>ADIOS2 has several configurable storage backend itself, and this
1159// flexibility is also available via Adios2StMan. This includes, among other
1160// things, storing compressed data, or choosing a different on-disk formats.
1161// <br>This storage manager is also special in that it provides parallel
1162// writing capabilities for MPI processes, so that multiple processes can
1163// write into different sections of the same column concurrently.
1164// </ol>
1165//
1166// The storage manager framework makes it possible to support arbitrary files
1167// as tables. This has been used in a case where a file is filled
1168// by the data acquisition system of a telescope. The file is simultaneously
1169// used as a table using a dedicated storage manager. The table
1170// system and storage manager provide a sync function to synchronize
1171// the processes, i.e. to make CTDS aware of changes
1172// in the file size (thus in the table size) by the filling process.
1173//
1174// <note role=tip>
1175// Not all data managers support all the table functionality. So, the choice
1176// of a data manager can greatly influence the type of operations you can do
1177// on the table as a whole.
1178// For example, if a column uses the tiled storage manager,
1179// it is not possible to delete rows from the table, because that storage
1180// manager will not support deletion of rows.
1181// However, it is always possible to delete all columns of a data
1182// manager in one single call.
1183// </note>
1184
1185// <ANCHOR NAME="Tables:TiledStMan">
1186// <h3>Tiled Storage Manager</h3></ANCHOR>
1187// The Tiled Storage Managers allow one to store the data of
1188// one or more columns in a tiled way. Tiling means
1189// that the data are stored without a preferred order to make access
1190// along the different main axes equally efficient. This is done by
1191// storing the data in so-called tiles (i.e. equally shaped subsets of an
1192// array) to increase data locality. The user can define the tile shape
1193// to optimize for the most frequently used access.
1194// <p>
1195// The Tiled Storage Manager has the following properties:
1196// <ul>
1197// <li> There can be more than one Tiled Storage Manager in
1198// a table; each with its own (unique) name.
1199// <li> Each Tiled Storage Manager can store an
1200// N-dimensional so-called hypercolumn.
1201// Elaborate hypercolumns can be defined using
1202// <linkto file="TableDesc.h#defineHypercolumn">
1203// TableDesc::defineHypercolumn</linkto>).
1204// <br>Note that defining a hypercolumn is only necessary if it
1205// contains multiple columns or if the TiledDataStMan is used.
1206// It means that in practice it is hardly ever needed to define a
1207// hypercolumn.
1208// <br>A hypercolumn consists of up to three types of columns:
1209// <dl>
1210// <dt> Data columns
1211// <dd> contain the data to be stored in a tiled way. This will
1212// be done in tiled hypercubes.
1213// There must be at least one data column.
1214// <br> For example: a table contains UV-data with
1215// data columns "Visibility" and "Weight".
1216// <dt> Coordinate columns
1217// <dd> define the world coordinates of the pixels in the data columns.
1218// Coordinate columns are optional, but if given there must
1219// be N coordinate columns for an N-dimensional hypercolumn.
1220// <br>
1221// For example: the data in the example above is 4-dimensional
1222// and has coordinate columns "Time", "Baseline", "Frequency",
1223// and "Polarization".
1224// <dt> Id columns
1225// <dd> are needed if TiledDataStMan is used.
1226// Different rows in the data columns can be stored in different
1227// hypercubes. The values in the id column(s) uniquely identify
1228// the hypercube a row is stored in.
1229// <br>
1230// For example: the line and continuum data in a MeasurementSet
1231// table need to be stored in 2 different hypercubes (because
1232// their shapes are different (see below)). A column containing
1233// the type (line or continuum) has to be used as an id column.
1234// </dl>
1235// <li> If multiple data columns are used, the shape of their data
1236// must be conforming in each individual row.
1237// If data in different rows have different shapes, they must be
1238// stored in different hypercubes, because a hypercube can only hold
1239// data with conforming shapes.
1240// <br>
1241// Thus in the example above, rows with line data will have conforming
1242// shapes and can be stored in one hypercube. The continuum data
1243// will have another shape and can be stored in another hypercube.
1244// <br>
1245// The storage manager keeps track of the mapping of rows to/from
1246// hypercubes.
1247// <li> Each hypercube can be tiled in its own way. It is not required
1248// that an integer number of tiles fits in the hypercube. The last
1249// tiles will be padded as needed.
1250// <li> The last axis of a hypercube can be extensible. This means that
1251// the size of that axis does not need to be defined when the
1252// hypercube is defined in the storage manager. Instead, the hypercube
1253// can be extended when another chunk of data has to be stored.
1254// This can be very useful in, for example, a (quasi-)realtime
1255// environment where the size of the time axis is not known.
1256// <li> If coordinate columns are defined, they describe the coordinates
1257// of the axes of the hypercubes. Each hypercube has its own set of
1258// coordinates.
1259// <li> Data and id columns have to be stored with the Tiled
1260// Storage Manager. However, coordinate columns do not need to be
1261// stored with the Tiled Storage Manager.
1262// Especially in the case where the coordinates for a hypercube axis
1263// are varying (i.e. dependent on other axes), another storage manager
1264// has to be used (because the Tiled Storage Manager can only
1265// hold constant coordinates).
1266// </ul>
1267// <p>
1268// The following Tiled Storage Managers are available:
1269// <dl>
1270// <dt> <linkto class=TiledShapeStMan:description>TiledShapeStMan</linkto>
1271// <dd> can be seen as a specialization of <src>TiledDataStMan</src>
1272// by using the array shape as the id value.
1273// Similarly to <src>TiledDataStMan</src> it can maintain multiple
1274// hypercubes and store multiple rows in a hypercube, but it is
1275// easier to use, because the special <src>addHypercube</src> and
1276// <src>extendHypercube</src> functions are not needed.
1277// An hypercube is automatically added when a new array shape is
1278// encountered.
1279// <br>
1280// This storage manager could be used for a table with a column
1281// containing line and continuum data, which will result
1282// in 2 hypercubes.
1283// <dt> <linkto class=TiledCellStMan:description>TiledCellStMan</linkto>
1284// <dd> creates (automatically) a new hypercube for each row.
1285// Thus each row of the hypercolumn is stored in a separate hypercube.
1286// Note that the row number serves as the id value. So an id column
1287// is not needed, although there are multiple hypercubes.
1288// <br>
1289// This storage manager is meant for tables where the data arrays
1290// in the different rows are not accessed together. One can think
1291// of a column containing images. Each row contains an image and
1292// only one image is shown at a time.
1293// <dt> <linkto class=TiledColumnStMan:description>TiledColumnStMan</linkto>
1294// <dd> creates one hypercube for the entire hypercolumn. Thus all cells
1295// in the hypercube have to have the same shape and therefore this
1296// storage manager is only possible if all columns in the hypercolumn
1297// have the attribute FixedShape.
1298// <br>
1299// This storage manager could be used for a table with a column
1300// containing images for the Stokes parameters I, Q, U, and V.
1301// By storing them in one hypercube, it is possible to retrieve
1302// the 4 Stokes values for a subset of the image or for an individual
1303// pixel in a very efficient way.
1304// <dt> <linkto class=TiledDataStMan:description>TiledDataStMan</linkto>
1305// <dd> allows one to control the creation and extension of hypercubes.
1306// This is done by means of the class
1307// <linkto class=TiledDataStManAccessor:description>
1308// TiledDataStManAccessor</linkto>.
1309// It makes it possible to store, say, row 0-9 in hypercube A,
1310// row 10-34 in hypercube B, row 35-54 in hypercube A again, etc..
1311// <br>
1312// The drawback of this storage manager is that its hypercubes are not
1313// automatically extended when adding new rows. The special functions
1314// <src>addHypercube</src> and <src>extendHypercube</src> have to be
1315// used making it somewhat tedious to use.
1316// Therefore this storage manager may become obsolete in the near future.
1317// </dl>
1318// The Tiled Storage Managers have 3 ways to access and cache the data.
1319// Class <linkto class=TSMOption>TSMOption</linkto> can be used to setup an
1320// access choice and use it in a Table constructor.
1321// <ul>
1322// <li> The old way (the only way until January 2010) uses a cache
1323// of its own to keep tiles that might need to be reused. It will always
1324// access entire tiles, even if only a small part is needed.
1325// It is possible to define a maximum cache size. The description of class
1326// <linkto class=ROTiledStManAccessor>ROTiledStManAccessor</linkto>
1327// contains a discussion about the effect of defining a maximum cache
1328// size.
1329// <li> Memory-mapping the data files. In this way the operating system
1330// takes care of the IO and caching. However, the limited address space
1331// may preclude using it for large tables on 32-bit systems.
1332// <li> Use buffered IO and let the kernel's file cache take care of caching.
1333// It will access the data in chunks of the given buffer size, so the
1334// entire tile does not need to be accessed if only a small part is
1335// needed.
1336// </ul>
1337// Apart from reading, all access ways described above can also handle writing
1338// and extending tables. They create fully equal files. Both little and big
1339// endian data can be read or written.
1340
1341// <ANCHOR NAME="Tables:virtual column engines">
1342// <h3>Virtual Column Engines</h3></ANCHOR>
1343//
1344// Virtual column engines are used to implement the virtual (i.e.
1345// calculated-on-the-fly) columns. CTDS provides
1346// an abstract base class (or "interface class")
1347// <linkto class="VirtualColumnEngine:description">VirtualColumnEngine</linkto>
1348// that specifies the protocol for these engines.
1349// The programmer must derive a concrete class to implement
1350// the application-specific virtual column.
1351// <p>
1352// For example: the programmer
1353// needs a column in a table which is the difference between two other
1354// columns. (Perhaps these two other columns are updated periodically
1355// during the execution of a program.) A good way to handle this would
1356// be to have a virtual column in the table, and write a virtual column
1357// engine which knows how to calculate the difference between corresponding
1358// cells of the two other columns. So the result is that accessing a
1359// particular cell of the virtual column invokes the virtual column engine,
1360// which then gets the values from the other two columns, and returns their
1361// difference. This particular example could be done using
1362// <linkto class="VirtualTaQLColumn:description">VirtualTaQLColumn</linkto>.
1363// <p>
1364// Several virtual column engines exist:
1365// <ol>
1366// <li> The class
1367// <linkto class="VirtualTaQLColumn:description">VirtualTaQLColumn</linkto>
1368// makes it possible to define a column as an arbitrary expression of
1369// other columns. It uses the <a href="../notes/199.html">TaQL</a>
1370// CALC command. The virtual column can be a scalar or an array and
1371// can have one of the standard data types supported by CTDS.
1372// <li> The class
1373// <linkto class="BitFlagsEngine:description">BitFlagsEngine</linkto>
1374// maps an integer bit flags column to a Bool column. A read and write mask
1375// can be defined telling which bits to take into account when mapping
1376// to and from Bool (thus when reading or writing the Bool).
1377// <li> The class
1378// <linkto class="CompressFloat:description">CompressFloat</linkto>
1379// compresses a single precision floating point array by scaling the
1380// values to shorts (16-bit integer).
1381// <li> The class
1382// <linkto class="CompressComplex:description">CompressComplex</linkto>
1383// compresses a single precision complex array by scaling the
1384// values to shorts (16-bit integer). In fact, the 2 parts of the complex
1385// number are combined to an 32-bit integer.
1386// <li> The class
1387// <linkto class="CompressComplexSD:description">CompressComplexSD</linkto>
1388// does the same as CompressComplex, but optimizes for the case where the
1389// imaginary part is zero (which is often the case for Single Dish data).
1390// <li> The double templated class
1391// <linkto class="ScaledArrayEngine:description">ScaledArrayEngine</linkto>
1392// scales the data in an array from, for example,
1393// float to short before putting it.
1394// <li> The double templated class
1395// <linkto class="MappedArrayEngine:description">MappedArrayEngine</linkto>
1396// converts the data from one data type to another. Sometimes it might be
1397// needed to store the residual data in an MS in double precision.
1398// Because the imaging task can only handle single precision, this enigne
1399// can be used to map the data from double to single precision.
1400// <li> The double templated class
1401// <linkto class="RetypedArrayEngine:description">RetypedArrayEngine</linkto>
1402// converts the data from one data type to another with the possibility
1403// to reduce the number of dimensions. For example, it can be used to
1404// store an 2-d array of StokesVector objects as a 3-d array of floats
1405// by treating the 4 data elements as an extra array axis. If the
1406// StokesVector class is simple, it can be done very efficiently.
1407// <li> The class
1408// <linkto class="ForwardColumnEngine:description">
1409// ForwardColumnEngine</linkto>
1410// forwards the gets and puts on a row in a column to the same row
1411// in a column with the same name in another table. This provides
1412// a virtual copy of the referenced column.
1413// <li> The class
1414// <linkto class="ForwardColumnIndexedRowEngine:description">
1415// ForwardColumnIndexedRowEngine</linkto>
1416// is similar to <src>ForwardColumnEngine.</src>.
1417// However, instead of forwarding it to the same row it uses a
1418// a column to map its row number to a row number in the referenced
1419// table. In this way multiple rows can share the same data.
1420// This data manager only allows for get operations.
1421// <li> The calibration module has implemented a virtual column engine
1422// to do on-the-fly calibration in a transparent way.
1423// </ol>
1424// To handle arbitrary data types the templated abstract base class
1425// <linkto class="VSCEngine:description">VSCEngine</linkto>
1426// has been written. An example of how to use this class can be
1427// found in the demo program <src>dVSCEngine.cc</src>.
1428
1429// <ANCHOR NAME="Tables:LockSync">
1430// <h3>Table locking and synchronization</h3></ANCHOR>
1431//
1432// Multiple concurrent readers and writers (also via NFS) of a
1433// table are supported by means of a locking/synchronization mechanism.
1434// This mechanism is not very sophisticated in the sense that it is
1435// very coarsely grained. When locking, the entire table gets locked.
1436// A special lock file is used to lock the table. This lock file also
1437// contains some synchronization data.
1438// <p>
1439// Five ways of locking are supported (see class
1440// <linkto class=TableLock>TableLock</linkto>):
1441// <dl>
1442// <dt> TableLock::PermanentLocking(Wait)
1443// <dd> locks the table permanently (from open till close). This means
1444// that one writer OR multiple readers are possible.
1445// <dt> TableLock::AutoLocking
1446// <dd> does the locking automatically. This is the default mode.
1447// This mode makes it possible that a table is shared amongst
1448// processes without the user needing to write any special code.
1449// It also means that a lock is only released when needed.
1450// <dt> TableLock::AutoNoReadLocking
1451// <dd> is similar to AutoLocking. However, no lock is acquired when
1452// reading the table making it possible to read the table while
1453// another process holds a write-lock. It also means that for read
1454// purposes no automatic synchronization is done when the table is
1455// updated in another process.
1456// Explicit synchronization can be done by means of the function
1457// <src>Table::resync</src>.
1458// <dt> TableLock::UserLocking
1459// <dd> requires that the programmer explicitly acquires and releases
1460// a lock on the table. This makes some kind of transaction
1461// processing possible. E.g. set a write lock, add a row,
1462// write all data into the row and release the lock.
1463// The Table functions <src>lock</src> and <src>unlock</src>
1464// have to be used to acquire and release a (read or write) lock.
1465// <dt> TableLock::UserNoReadLocking
1466// <dd> is similar to UserLocking. However, similarly to AutoNoReadLocking
1467// no lock is needed to read the table.
1468// <dt> TableLock::NoLocking
1469// <dd> does not use table locking. It is the responsibility of the
1470// user to ensure that no concurrent access is done on the same
1471// bucket or tile in a storage manager, otherwise a table might
1472// get corrupted.
1473// <br>This mode is always used if Casacore is built with
1474// -DAIPS_TABLE_NOLOCKING.
1475// </dl>
1476// Synchronization of the processes accessing the same table is done
1477// by means of the lock file. When a lock is released, the storage
1478// managers flush their data into the table files. Some synchronization data
1479// is written into the lock file telling the new number of table rows
1480// and telling which storage managers have written data.
1481// This information is read when another process acquires the lock
1482// and is used to determine which storage managers have to refresh
1483// their internal caches.
1484// <br>Note that for the NoReadLocking modes (see above) explicit
1485// synchronization might be needed using <src>Table::resync</src>.
1486// <p>
1487// The function <src>Table::hasDataChanged</src> can be used to check
1488// if a table is (being) changed by another process. In this way
1489// a program can react on it. E.g. the table browser can refresh its
1490// screen when the underlying table is changed.
1491// <p>
1492// In general the default locking option will do.
1493// From the above it should be clear that heavy concurrent access
1494// results in a lot of flushing, thus will have a negative impact on
1495// performance. If uninterrupted access to a table is needed,
1496// the <src>PermanentLocking</src> option should be used.
1497// If transaction-like processing is done (e.g. updating a table
1498// containing an observation catalogue), the <src>UserLocking</src>
1499// option is probably best.
1500// <p>
1501// Creation or deletion of a table is not possible if that table
1502// is still open in another process. The function
1503// <src>Table::isMultiUsed()</src> can be used to check if a table
1504// is open in other processes.
1505// <br>
1506// The function <src>TableUtil::deleteTable</src> should be used to delete
1507// a table. Before deleting the table it ensures that it is writable
1508// and that it is not open in the current or another process.
1509// <p>
1510// The following example wants to read the table uninterrupted, thus it uses
1511// the <src>PermanentLocking</src> option. It also wants to wait
1512// until the lock is actually acquired.
1513// Note that the destructor closes the table and releases the lock.
1514// <srcblock>
1515// // Open the table (readonly).
1516// // Acquire a permanent (read) lock.
1517// // It waits until the lock is acquired.
1518// Table tab ("some.name",
1519// TableLock(TableLock::PermanentLockingWait));
1520// </srcblock>
1521//
1522// The following example uses the automatic locking..
1523// It tells the system to check about every 20 seconds if another
1524// process wants access to the table.
1525// <srcblock>
1526// // Open the table (readonly).
1527// Table tab ("some.name",
1528// TableLock(TableLock::AutoLocking, 20));
1529// </srcblock>
1530//
1531// The following example gets data (say from a GUI) and writes it
1532// as a row into the table. The lock the table as little as possible
1533// the lock is acquired just before writing and released immediately
1534// thereafter.
1535// <srcblock>
1536// // Open the table (writable).
1537// Table tab ("some.name",
1538// TableLock(TableLock::UserLocking),
1539// Table::Update);
1540// while (True) {
1541// get input data
1542// tab.lock(); // Acquire a write lock and wait for it.
1543// tab.addRow();
1544// write data into the row
1545// tab.unlock(); // Release the lock.
1546// }
1547// </srcblock>
1548//
1549// The following example deletes a table if it is not used in
1550// another process.
1551// <srcblock>
1552// Table tab ("some.name");
1553// if (! tab.isMultiUsed()) {
1554// tab.markForDelete();
1555// }
1556// </srcblock>
1557
1558// <ANCHOR NAME="Tables:KeyLookup">
1559// <h3>Table lookup based on a key</h3></ANCHOR>
1560//
1561// Class <linkto class=ColumnsIndex>ColumnsIndex</linkto> offers the
1562// user a means to find the rows matching a given key or key range.
1563// It is a somewhat primitive replacement of a B-tree index and in the
1564// future it may be replaced by a proper B+-tree implementation.
1565// <p>
1566// The <src>ColumnsIndex</src> class makes it possible to build an
1567// in-core index on one or more columns. Looking a key or key range
1568// is done using a binary search on that index. It returns a vector
1569// containing the row numbers of the rows matching the key (range).
1570// <p>
1571// The class is not capable of tracing changes in the underlying column(s).
1572// It detects a change in the number of rows and updates the index
1573// accordingly. However, it has to be told explicitly when a value
1574// in the underlying column(s) changes.
1575// <p>
1576// The following example shows how the class can be used.
1577// <example>
1578// Suppose one has an antenna table with key ANTENNA.
1579// <srcblock>
1580// // Open the table and make an index for column ANTENNA.
1581// Table tab("antenna.tab")
1582// ColumnsIndex colInx(tab, "ANTENNA");
1583// // Make a RecordFieldPtr for the ANTENNA field in the index key record.
1584// // Its data type has to match the data type of the column.
1585// RecordFieldPtr<Int> antFld(colInx.accessKey(), "ANTENNA");
1586// // Now loop in some way and find the row for the antenna
1587// // involved in that loop.
1588// Bool found;
1589// while (...) {
1590// // Fill the key field and get the row number.
1591// // ANTENNA is a unique key, so only one row number matches.
1592// // Otherwise function getRowNumbers had to be used.
1593// *antFld = antenna;
1594// uInt antRownr = colInx.getRowNumber (found);
1595// if (!found) {
1596// cout << "Antenna " << antenna << " is unknown" << endl;
1597// } else {
1598// // antRownr can now be used to get data from that row in
1599// // the antenna table.
1600// }
1601// }
1602// </srcblock>
1603// </example>
1604// <linkto class=ColumnsIndex>ColumnsIndex</linkto> itself contains a more
1605// advanced example. It shows how to use a private compare function
1606// to adjust the lookup if the index does not contain single
1607// key values, but intervals instead. This is useful if a row in
1608// a (sub)table is valid for, say, a time range instead of a single
1609// timestamp.
1610
1611// <ANCHOR NAME="Tables:performance">
1612// <h3>Performance and robustness considerations</h3></ANCHOR>
1613//
1614// CTDS resembles a database system, but it is not as robust.
1615// It lacks the transaction and logging facilities common to data base systems.
1616// It means that in case of a crash data might be lost.
1617// To reduce the risk of data loss to
1618// a minimum, it is advisable to regularly do a <tt>flush</tt>, optionally
1619// with an <tt>fsync</tt> to ensure that all data are really written.
1620// However, that can degrade the performance because it involves extra writes.
1621// So one should find the right balance between robustness and performance.
1622//
1623// To get a good feeling for the performance issues, it is important to
1624// understand some of the internals of CTDS.
1625// <br>The storage managers drive the performance. All storage managers use
1626// buckets (called tiles for the TiledStMan) which contain the data.
1627// All IO is done by bucket. The bucket/tile size is defined when creating
1628// the storage manager objects. Sometimes the default will do, but usually
1629// it is better to set it explicitly.
1630//
1631// It is best to do a flush when a tile is full.
1632// For example: <br>
1633// When creating a MeasurementSet containing N antennae (thus N*(N-1) baselines
1634// or N*(N+1) if auto-correlations are stored as well) it makes sense to
1635// store, say, N/2 rows in a tile and do a flush each time all baselines
1636// are written. In that way tiles are fully filled when doing the flush, so
1637// no extra IO is involved.
1638// <br>Here is some code showing this when creating a MeasurementSet.
1639// The code should speak for itself.
1640// <srcblock>
1641// MS* createMS (const String& msName, int nrchan, int nrant)
1642// {
1643// // Get the MS main default table description.
1644// TableDesc td = MS::requiredTableDesc();
1645// // Add the data column and its unit.
1646// MS::addColumnToDesc(td, MS::DATA, 2);
1647// td.rwColumnDesc(MS::columnName(MS::DATA)).rwKeywordSet().
1648// define("UNIT","Jy");
1649// // Store the DATA and FLAG column in two separate files.
1650// // In this way accessing FLAG only is much cheaper than
1651// // when combining DATA and FLAG.
1652// // All data have the same shape, thus use TiledColumnStMan.
1653// // Also store UVW with TiledColumnStMan.
1654// Vector<String> tsmNames(1);
1655// tsmNames[0] = MS::columnName(MS::DATA);
1656// td.rwColumnDesc(tsmNames[0]).setShape (IPosition(2,itsNrCorr,itsNrFreq));
1657// td.defineHypercolumn("TiledData", 3, tsmNames);
1658// tsmNames[0] = MS::columnName(MS::FLAG);
1659// td.rwColumnDesc(tsmNames[0]).setShape (IPosition(2,itsNrCorr,itsNrFreq));
1660// td.defineHypercolumn("TiledFlag", 3, tsmNames);
1661// tsmNames[0] = MS::columnName(MS::UVW);
1662// td.defineHypercolumn("TiledUVW", 2, tsmNames);
1663// // Setup the new table.
1664// SetupNewTable newTab(msName, td, Table::New);
1665// // Most columns vary slowly and use the IncrStMan.
1666// IncrementalStMan incrStMan("ISMData");
1667// // A few columns use he StandardStMan (set an appropriate bucket size).
1668// StandardStMan stanStMan("SSMData", 32768);
1669// // Store all pol and freq and some rows in a single tile.
1670// // autocorrelations are written, thus in total there are
1671// // nrant*(nrant+1)/2 baselines. Ensure a baseline takes up an
1672// // integer number of tiles.
1673// TiledColumnStMan tiledData("TiledData",
1674// IPosition(3,4,nchan,(nrant+1)/2));
1675// TiledColumnStMan tiledFlag("TiledFlag",
1676// IPosition(3,4,nchan,8*(nrant+1)/2));
1677// TiledColumnStMan tiledUVW("TiledUVW", IPosition(2,3,));
1678// IPosition(2,3,nrant*(nrant+1)/2));
1679// newTab.bindAll (incrStMan);
1680// newTab.bindColumn(MS::columnName(MS::ANTENNA1),stanStMan);
1681// newTab.bindColumn(MS::columnName(MS::ANTENNA2),stanStMan);
1682// newTab.bindColumn(MS::columnName(MS::DATA),tiledData);
1683// newTab.bindColumn(MS::columnName(MS::FLAG),tiledFlag);
1684// newTab.bindColumn(MS::columnName(MS::UVW),tiledUVW);
1685// // Create the MS and its subtables.
1686// // Get access to its columns.
1687// MS* msp = new MeasurementSet(newTab);
1688// // Create all subtables.
1689// // Do this after the creation of optional subtables,
1690// // so the MS will know about those optional sutables.
1691// msp->createDefaultSubtables (Table::New);
1692// return msp;
1693// }
1694// </srcblock>
1695
1696// <h4>Some more performance considerations</h4>
1697// Which storage managers to use and how to use them depends heavily on
1698// the type of data and the access patterns to the data. Here follow some
1699// guidelines:
1700// <ol>
1701// <li> Scalar data can be stored with the StandardStMan (SSM) or
1702// IncrementalStMan (ISM). For slowly varying data (e.g. the TIME column
1703// in a MeasurementSet) it is best to use the ISM. Otherwise the SSM.
1704// Note that very long strings (longer than the bucketsize) can only
1705// be stored with the SSM.
1706// <li> Any number of storage managers can be used. In fact, each column
1707// can have a storage manager of its own resulting in column-wise
1708// stored data which is more and more used in data base systems.
1709// In that way a query or sort on that column is very fast, because
1710// the buckets to read only contain data of that column.
1711// In practice one can decide to combine a few frequently used columns
1712// in a storage manager.
1713// <li> Array data can be stored with any column manager. Small fixed size
1714// arrays can be stored directly with the SSM
1715// (or ISM if not changing much).
1716// However, they can also be stored with a TiledStMan (TSM) as shown
1717// for the UVW column in the example above.
1718// <br> Large arrays should usually be stored with a TSM. However,
1719// if it must be possible to change the shape of an array after it
1720// was stored, the SSM (or ISM) must be used. Note that in that
1721// case a lot of disk space can be wasted, because the SSM and ISM
1722// store the array data at the end of the file if the array got
1723// bigger and do not reuse the old space. The only way to
1724// reclaim it is by making a deep copy of the entire table.
1725// <li> If an array is stored with a TSM, it is important to decide
1726// which TSM to use.
1727// <ol>
1728// <li> The TiledColumnStMan is the most efficient, but only suitable
1729// for arrays having the same shape in the entire column.
1730// <li> The TiledShapeStMan is suitable for columns where the arrays
1731// can have a few shapes.
1732// <li> The TiledCellStMan is suitable for columns where the arrays
1733// can have many different shapes.
1734// </ol>
1735// This is discussed in more detail
1736// <a href="#Tables:TiledStMan">above</a>.
1737// <li> If storing an array with a TSM, it can be very important to
1738// choose the right tile shape. Not only does this define the size
1739// of a tile, but it also defines if access in other directions
1740// than the natural direction can be fast. It is also discussed in
1741// more detail <a href="#Tables:TiledStMan">above</a>.
1742// <li> Columns can be combined in a single TiledStMan. For instance, combining DATA
1743// and FLAG is advantageous if FLAG is always used with DATA. However, if FLAG
1744// is used on its own (e.g. in combination with CORRECTED_DATA), it is better
1745// to separate them, otherwise tiles containing FLAG also contain DATA making the
1746// tiles much bigger, thus more expensive to access.
1747// </ol>
1748//
1749// <ANCHOR NAME="Tables:iotracing">
1750// <h4>IO Tracing</h4></ANCHOR>
1751//
1752// Several forms of tracing can be done to see how the Table I/O performs.
1753// <ul>
1754// <li> On Linux/UNIX systems the <src>strace</src> command can be used to
1755// collect trace information about the physical IO.
1756// <li> The function <src>showCacheStatistics</src> in class
1757// TiledStManAccessor can be used to show the number of actual reads
1758// and writes and the percentage of cache hits.
1759// <li> The software has some options to trace the operations done on
1760// tables. It is possible to specify the columns and/or the operations
1761// to be traced. The following <src>aipsrc</src> variables can be used.
1762// <ul>
1763// <li> <src>table.trace.filename</src> specifies the file to write the
1764// trace output to. If not given or empty, no tracing will be done.
1765// The file name can contain environment variables or a tilde.
1766// <li> <src>table.trace.operation</src> specifies the operations to be
1767// traced. It is a string containing s, r, and/or w where
1768// s means tracing RefTable construction (selection/sort),
1769// r means column reads, and w means column writes.
1770// If empty, only the high level table operations (open, create, close)
1771// will be traced.
1772// <li> <src>table.trace.columntype</src> specifies the types of columns to
1773// be traced. It is a string containing the characters s, a, and/or r.
1774// s means all scalar columns, a all array columns, and r all record
1775// columns. If empty and if <src>table.trace.column</src> is empty,
1776// its default value is a.
1777// <li> <src>table.trace.column</src> specifies names of columns to be
1778// traced. Its value can be one or more glob-like patterns separated
1779// by commas without any whitespace. The default is empty.
1780// For example:
1781// <srcblock>
1782// table.trace.column: *DATA,FLAG,WEIGHT*
1783// </srcblock>
1784// to trace all DATA, the FLAG, and all WEIGHT columns.
1785// </ul>
1786// The trace output is a text file with the following columns
1787// separated by a space.
1788// <ul>
1789// <li> The UTC time the trace line was written (with msec accuracy).
1790// <li> The operation: n(ew), o(pen), c(lose), t(able), r(ead), w(rite),
1791// s(election/sort/iter), p(rojection).
1792// t means an arbitrary table operation as given in the name column.
1793// <li> The table-id (as t=i) given at table creation (new) or open.
1794// <li> The table name, column name, or table operation
1795// (as <src>*oper*</src>).
1796// <src>*reftable*</src> means that the operation is on a RefTable
1797// (thus result of selection, sort, projection, or iteration).
1798// <li> The row or rows to access (* means all rows).
1799// Multiple rows are given as a series of ranges like s:e:i,s:e:i,...
1800// where e and i are only given if applicable (default i is 1).
1801// Note that e is inclusive and defaults to s.
1802// <li> The optional array shape to access (none means scalar).
1803// In case multiple rows are accessed, the last shape value is the
1804// number of rows.
1805// <li> The optional slice of the array in each row as [start][end][stride].
1806// </ul>
1807// Shape, start, end, and stride are given in Fortran-order as
1808// [n1,n2,...].
1809// </ul>
1810
1811// <ANCHOR NAME="Tables:applications">
1812// <h4>Applications to inspect/manipulate a table</h4></ANCHOR>
1813// <ul>
1814// <li><em>showtableinfo</em> shows the structure of a table. It can show:
1815// <ul>
1816// <li> the columns and their format (optionally sorted on name)
1817// <li> the data managers used to store the column data
1818// <li> the table and/or column keywords and their values
1819// <li> recursively the same info of the subtables
1820// </ul>
1821// <li><em>showtablelock</em> if a table is locked or opened and by
1822// which process.
1823// <li><em>lsmf</em> shows the virtual files contained in a MultiFile.
1824// <li><em>tomf</em> copies the given files to a MultiFile.
1825// <li><em>taql</em> can be used to query a table using the
1826// <a href="../notes/199.html">Table Query Language</a> (TaQL).
1827// </ul>
1828//
1829// </synopsis>
1830// </module>
1831
1832
1833
1834} //# NAMESPACE CASACORE - END
1835
1836#endif
this file contains all the compiler specific defines
Definition: mainpage.dox:28