casacore
UDFBase.h
Go to the documentation of this file.
1 //# UDFBase.h: Abstract base class for a user-defined TaQL function
2 //# Copyright (C) 2010
3 //# Associated Universities, Inc. Washington DC, USA.
4 //#
5 //# This library is free software; you can redistribute it and/or modify it
6 //# under the terms of the GNU Library General Public License as published by
7 //# the Free Software Foundation; either version 2 of the License, or (at your
8 //# option) any later version.
9 //#
10 //# This library is distributed in the hope that it will be useful, but WITHOUT
11 //# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 //# FITNESS FOR A PARTICULAR PURPOSE. See the GNU Library General Public
13 //# License for more details.
14 //#
15 //# You should have received a copy of the GNU Library General Public License
16 //# along with this library; if not, write to the Free Software Foundation,
17 //# Inc., 675 Massachusetts Ave, Cambridge, MA 02139, USA.
18 //#
19 //# Correspondence concerning AIPS++ should be addressed as follows:
20 //# Internet email: aips2-request@nrao.edu.
21 //# Postal address: AIPS++ Project Office
22 //# National Radio Astronomy Observatory
23 //# 520 Edgemont Road
24 //# Charlottesville, VA 22903-2475 USA
25 //#
26 //# $Id$
27 
28 #ifndef TABLES_UDFBASE_H
29 #define TABLES_UDFBASE_H
30 
31 //# Includes
32 #include <casacore/casa/aips.h>
33 #include <casacore/tables/TaQL/ExprNodeRep.h>
34 #include <casacore/tables/Tables/Table.h>
35 #include <casacore/tables/TaQL/TaQLStyle.h>
36 #include <casacore/casa/Containers/Block.h>
37 #include <casacore/casa/OS/Mutex.h>
38 #include <casacore/casa/stdmap.h>
39 
40 
41 namespace casacore {
42 
43  // <summary>
44  // Abstract base class for a user-defined TaQL function
45  // </summary>
46  //
47  // <synopsis>
48  // This class makes it possible to add user-defined functions (UDF) to TaQL.
49  // A UDF has to be implemented in a class derived from this class and can
50  // contain one or more user-defined functions.
51  // <br>A few functions have to be implemented in the class as described below.
52  // In this way TaQL can be extended with arbitrary functions, which can be
53  // normal functions as well as aggregate functions (often used with GROUPBY).
54  //
55  // A UDF is a class derived from this base class. It must contain the
56  // following member functions. See also the example below.
57  // <table border=0>
58  // <tr>
59  // <td><src>makeObject</src></td>
60  // <td>a static function to create an object of the UDF class. This function
61  // needs to be registered.
62  // </td>
63  // </tr>
64  // <tr>
65  // <td><src>setup</src></td>
66  // <td>this virtual function is called after the object has been created.
67  // It should initialize the object using the function arguments that
68  // can be obtained using the function <src>operands()</src>. The setup
69  // function should perform the following:
70  // <ul>
71  // <li>Define the data type of the result using <src>setDataType<src>.
72  // The data type should be derived from the data types of the function
73  // arguments. The possible data types are defined in class
74  // TableExprNodeRep.
75  // Note that a UDF can support multiple data types. For example, a
76  // function like <src>min</src> can be used for Int, Double, or a mix.
77  // Function 'checkDT' in class TableExprNodeMulti can be used to
78  // check the data types of the operands and determine the result
79  // data type.
80  // <li>Define if the function is an aggregate function calculating
81  // an aggregated value in a group (e.g., minimum or mean).
82  // <src>setAggregate</src> can be used to tell so.
83  // <li>Define the dimensionality of the result using <src>setNDim</src>.
84  // A value of 0 means a scalar. A value of -1 means an array with
85  // a dimensionality that can vary from row to row.
86  // <li>Optionally use <src>setShape</src> to define the shape if the
87  // results are arrays with a shape that is the same for all rows.
88  // It will also set ndim if setNDim was not used yet, otherwise
89  // it checks if it ndim matches.
90  // <li>Optionally set the unit of the result using <src>setUnit</src>.
91  // TaQL has full support of units, so UDFs should behave the same.
92  // It is possible to change the unit of the function arguments.
93  // For example:
94  // <ul>
95  // <li>a function like 'sin' can force its argument to be
96  // in radians; TaQL will scale the argument as needed. This can be
97  // done like
98  // <src>TableExprNodeUnit::adaptUnit (operands()[i], "rad");</src>
99  // <li>A function like 'asin' will have a result in radians.
100  // Such a UDF should set its result unit to rad.
101  // <li>A function like 'min' wants its arguments to have the same
102  // unit and will set its result unit to it. It can be done like:
103  // <src>setUnit (TableExprFuncNode::makeEqualUnits
104  // (operands(), 0, operands().size()));</src>
105  // </ul>
106  // See class TableExprFuncNode for more info about these functions.
107  // <li>Optionally define if the result is a constant value using
108  // <src>setConstant</src>. It means that the function is not
109  // dependent on the row number in the table being queried.
110  // This is usually the case if all UDF arguments are constant.
111  // </ul>
112  // </td>
113  // </tr>
114  // <tr>
115  // <td><src>getXXX</src></td>
116  // <td>these are virtual get functions for each possible data type. The
117  // get functions matching the data types set by the setup
118  // function need to be implemented.
119  // The <src>get</src> functions have an argument TableExprId
120  // defining the table row (or record) for which the function has
121  // to be evaluated.
122  // If the UDF is an aggregate functions the TableExprId has to be
123  // upcasted to an TableExprIdAggr object from which all TableExprId
124  // objects in an aggregation group can be retrieved.
125  // <srcblock>
126  // const TableExprIdAggr& aid = TableExprIdAggr::cast (id);
127  // const vector<TableExprId>& ids = aid.result().ids(id.rownr());
128  // </srcblock>
129  // </td>
130  // </tr>
131  // </table>
132  //
133  // A UDF has to be made known to TaQL by adding it to the UDF registry with
134  // its name and 'makeObject' function.
135  // UDFs will usually reside in a shared library that is loaded dynamically.
136  // TaQL will load a UDF in the following way:
137  // <ul>
138  // <li> The UDF name used in TaQL consists of two parts: a library name
139  // and a function name separated by a dot. Both parts need to be given.
140  // Note that the library name can also be seen as a UDF scope, so
141  // different UDFs with equal names can be used from different libraries.
142  // A UDF should be registered with this full name.
143  // <br>The "USING STYLE" clause can be used to define a synonym for
144  // a (long) library name in the TaQLStyle object. The library part
145  // of the UDF will always be looked up in this synonym map.
146  // <li> If a UDF is not found in the registry, it will be tried to load
147  // a shared library using the library name part. The libraries tried
148  // to be loaded are lib<library>.so and libcasa_<library>.so.
149  // On Mac .dylib will be tried. If loaded successfully, a special
150  // function 'register_libname' will be called first. It should
151  // register each UDF in the shared library using UDFBase::register.
152  // </ul>
153  // </synopsis>
154  //
155  // <example>
156  // The following examples show a normal UDF function.
157  // <br>It returns True if the function argument matches 1.
158  // It can be seen that it checks if the argument is an integer scalar.
159  // <srcblock>
160  // class TestUDF: public UDFBase
161  // {
162  // public:
163  // TestUDF() {}
164  // // Registered function to create the UDF object.
165  // // The name of the function is not important here.
166  // static UDFBase* makeObject (const String&)
167  // { return new TestUDF(); }
168  // // Setup and check the details; result is a bool scalar value.
169  // virtual void setup (const Table&, const TaQLStyle&)
170  // {
171  // AlwaysAssert (operands().size() == 1, AipsError);
172  // AlwaysAssert (operands()[0]->dataType() == TableExprNodeRep::NTInt,
173  // AipsError);
174  // AlwaysAssert (operands()[0]->valueType() == TableExprNodeRep::VTScalar,
175  // AipsError);
176  // setDataType (TableExprNodeRep::NTBool);
177  // setNDim (0); // scalar result
178  // setConstant (operands()[0].isConstant()); // constant result?
179  // }
180  // // Get the value for the given id.
181  // // It gets the value of the operand and checks if it is 1.
182  // Bool getBool (const TableExprId& id)
183  // { return operands()[0]->getInt(id) == 1; }
184  // };
185  // </srcblock>
186  // </example>
187 
188  // <example>
189  // The following example shows an aggregate UDF function.
190  // It calculates the sum of the cubes of the values in a group.
191  // <srcblock>
192  // class TestUDFAggr: public UDFBase
193  // {
194  // public:
195  // TestUDFAggr() {}
196  // // Registered function to create the UDF object.
197  // // The name of the function is not important here.
198  // static UDFBase* makeObject (const String&) { return new TestUDFAggr(); }
199  // // Setup and check the details; result is an integer scalar value.
200  // // It aggregates the values of multiple rows.
201  // virtual void setup (const Table&, const TaQLStyle&)
202  // {
203  // AlwaysAssert (operands().size() == 1, AipsError);
204  // AlwaysAssert (operands()[0]->dataType() == TableExprNodeRep::NTInt, AipsError);
205  // AlwaysAssert (operands()[0]->valueType() == TableExprNodeRep::VTScalar, AipsError);
206  // setDataType (TableExprNodeRep::NTInt);
207  // setNDim (0); // scalar
208  // setAggregate (True); // aggregate function
209  // }
210  // // Get the value of a group.
211  // // It aggregates the values of multiple rows.
212  // Int64 getInt (const TableExprId& id)
213  // {
214  // // Cast the id to a TableExprIdAggr object.
215  // const TableExprIdAggr& aid = TableExprIdAggr::cast (id);
216  // // Get the vector of ids for this group.
217  // const vector<TableExprId>& ids = aid.result().ids(id.groupnr());
218  // // Get the values for all ids and accumulate them.
219  // Int64 sum3 = 0;
220  // for (vector<TableExprId>::const_iterator it=ids.begin();
221  // it!=ids.end(); ++it){
222  // Int64 v = operands()[0]->getInt(*it);
223  // sum3 += v*v*v;
224  // }
225  // return sum3;
226  // }
227  // };
228  // </srcblock>
229  // </example>
230  // More examples of UDF functions can be found in classes UDFMSCal
231  // and DirectionUDF.
232 
233  class UDFBase
234  {
235  public:
236  // The signature of a global or static member function creating an object
237  // of the UDF.
238  typedef UDFBase* MakeUDFObject (const String& functionName);
239 
240  // Only default constructor is needed.
241  UDFBase();
242 
243  // Destructor.
244  virtual ~UDFBase();
245 
246  // Evaluate the function and return the result.
247  // Their default implementations throw a "not implemented" exception.
248  // <group>
249  virtual Bool getBool (const TableExprId& id);
250  virtual Int64 getInt (const TableExprId& id);
251  virtual Double getDouble (const TableExprId& id);
252  virtual DComplex getDComplex (const TableExprId& id);
253  virtual String getString (const TableExprId& id);
254  virtual TaqlRegex getRegex (const TableExprId& id);
255  virtual MVTime getDate (const TableExprId& id);
256  virtual Array<Bool> getArrayBool (const TableExprId& id);
257  virtual Array<Int64> getArrayInt (const TableExprId& id);
258  virtual Array<Double> getArrayDouble (const TableExprId& id);
259  virtual Array<DComplex> getArrayDComplex (const TableExprId& id);
260  virtual Array<String> getArrayString (const TableExprId& id);
261  virtual Array<MVTime> getArrayDate (const TableExprId& id);
262  // </group>
263 
264  // Get the unit.
265  const String& getUnit() const
266  { return itsUnit; }
267 
268  // Get the nodes in the function operands representing an aggregate function.
269  void getAggrNodes (vector<TableExprNodeRep*>& aggr);
270 
271  // Get the nodes in the function operands representing a table column.
272  void getColumnNodes (vector<TableExprNodeRep*>& cols);
273 
274  private:
275  // Set up the function object.
276  virtual void setup (const Table& table,
277  const TaQLStyle&) = 0;
278 
279  protected:
280  // Get the operands.
282  { return itsOperands; }
283 
284  // Set the data type.
285  // This function must be called by the setup function of the derived class.
287 
288  // Set the dimensionality of the results.
289  // <br> 0 means that the results are scalars.
290  // <br> -1 means that the results are arrays with unknown dimensionality.
291  // <br> >0 means that the results are arrays with that dimensionality.
292  // This function must be called by the setup function of the derived class.
293  void setNDim (Int ndim);
294 
295  // Set the shape of the results if it is fixed and known.
296  void setShape (const IPosition& shape);
297 
298  // Set the unit of the result.
299  // If this function is not called by the setup function of the derived
300  // class, the result has no unit.
301  void setUnit (const String& unit);
302 
303  // Define if the result is constant (e.g. if all arguments are constant).
304  // If this function is not called by the setup function of the derived
305  // class, the result is not constant.
306  void setConstant (Bool isConstant);
307 
308  // Define if the UDF is an aggregate function (usually used in GROUPBY).
310 
311  // Let a derived class recreate its column objects in case a selection
312  // has to be applied.
313  // The default implementation does nothing.
314  virtual void recreateColumnObjects (const Vector<uInt>& rownrs);
315 
316  public:
317  // Register the name and construction function of a UDF (thread-safe).
318  // An exception is thrown if this name already exists with a different
319  // construction function.
320  static void registerUDF (const String& name, MakeUDFObject* func);
321 
322  // Initialize the function object.
323  void init (const PtrBlock<TableExprNodeRep*>& arg,
324  const Table& table, const TaQLStyle&);
325 
326  // Get the data type.
328  { return itsDataType; }
329 
330  // Get the dimensionality of the results.
331  // (0=scalar, -1=array with variable ndim, >0=array with fixed ndim
332  Int ndim() const
333  { return itsNDim; }
334 
335  // Get the result shape if the same for all results.
336  const IPosition& shape() const
337  { return itsShape; }
338 
339  // Tell if the UDF gives a constant result.
340  Bool isConstant() const
341  { return itsIsConstant; }
342 
343  // Tell if the UDF is an aggregate function.
345  { return itsIsAggregate; }
346 
347  // Do not apply the selection.
349  { itsApplySelection = False; }
350 
351  // If needed, let the UDF re-create column objects for a selection of rows.
352  // It calls the function recreateColumnObjects.
353  void applySelection (const Vector<uInt>& rownrs);
354 
355  // Create a UDF object (thread-safe).
356  // It looks in the map with fixed function names. If unknown,
357  // it looks if a wildcarded function name is supported (for PyTaQL).
358  static UDFBase* createUDF (const String& name, const TaQLStyle& style);
359 
360  private:
361  //# Data members.
370  //# The registry is used for two purposes:
371  //# 1. It is a map of known function names (lib.func) to funcptr.
372  //# Function name * means that the library can contain any function,
373  //# which is intended for python functions (through PyTaQL).
374  //# 2. The loaded libraries are kept in the map (with 0 funcptr).
375  static map<String, MakeUDFObject*> theirRegistry;
377  };
378 
379 } // end namespace
380 
381 #endif
A Vector of integers, for indexing into Array<T> objects.
Definition: IPosition.h:119
void setUnit(const String &unit)
Set the unit of the result.
virtual MVTime getDate(const TableExprId &id)
long long Int64
Define the extra non-standard types used by Casacore (like proposed uSize, Size)
Definition: aipsxtype.h:38
int Int
Definition: aipstype.h:47
LatticeExprNode arg(const LatticeExprNode &expr)
void setNDim(Int ndim)
Set the dimensionality of the results.
Bool itsIsAggregate
Definition: UDFBase.h:368
Main interface class to a read/write table.
Definition: Table.h:149
TableExprNodeRep::NodeDataType itsDataType
Definition: UDFBase.h:363
PtrBlock< TableExprNodeRep * > & operands()
Get the operands.
Definition: UDFBase.h:281
virtual Array< String > getArrayString(const TableExprId &id)
UDFBase()
Only default constructor is needed.
void setAggregate(Bool isAggregate)
Define if the UDF is an aggregate function (usually used in GROUPBY).
virtual Bool getBool(const TableExprId &id)
Evaluate the function and return the result.
Abstract base class for a user-defined TaQL function.
Definition: UDFBase.h:233
virtual Array< DComplex > getArrayDComplex(const TableExprId &id)
Bool isAggregate() const
Tell if the UDF is an aggregate function.
Definition: UDFBase.h:344
NodeDataType
Define the data types of a node.
Definition: ExprNodeRep.h:154
virtual Array< MVTime > getArrayDate(const TableExprId &id)
const String & getUnit() const
Get the unit.
Definition: UDFBase.h:265
TableExprNodeRep::NodeDataType dataType() const
Get the data type.
Definition: UDFBase.h:327
static map< String, MakeUDFObject * > theirRegistry
Definition: UDFBase.h:375
void getAggrNodes(vector< TableExprNodeRep * > &aggr)
Get the nodes in the function operands representing an aggregate function.
void disableApplySelection()
Do not apply the selection.
Definition: UDFBase.h:348
String itsUnit
Definition: UDFBase.h:366
Int ndim() const
Get the dimensionality of the results.
Definition: UDFBase.h:332
const IPosition & shape() const
Get the result shape if the same for all results.
Definition: UDFBase.h:336
PtrBlock< TableExprNodeRep * > itsOperands
Definition: UDFBase.h:362
double Double
Definition: aipstype.h:52
void getColumnNodes(vector< TableExprNodeRep * > &cols)
Get the nodes in the function operands representing a table column.
Bool isConstant() const
Tell if the UDF gives a constant result.
Definition: UDFBase.h:340
Class with static members defining the TaQL style.
Definition: TaQLStyle.h:64
void setConstant(Bool isConstant)
Define if the result is constant (e.g.
virtual ~UDFBase()
Destructor.
bool Bool
Define the standard types used by Casacore.
Definition: aipstype.h:39
UDFBase * MakeUDFObject(const String &functionName)
The signature of a global or static member function creating an object of the UDF.
Definition: UDFBase.h:238
virtual void recreateColumnObjects(const Vector< uInt > &rownrs)
Let a derived class recreate its column objects in case a selection has to be applied.
virtual Int64 getInt(const TableExprId &id)
static Mutex theirMutex
Definition: UDFBase.h:376
const Bool False
Definition: aipstype.h:41
A drop-in replacement for Block<T*>.
Definition: Block.h:861
Bool itsApplySelection
Definition: UDFBase.h:369
Wrapper around a pthreads mutex.
Definition: Mutex.h:49
Bool itsIsConstant
Definition: UDFBase.h:367
IPosition itsShape
Definition: UDFBase.h:365
void setDataType(TableExprNodeRep::NodeDataType)
Set the data type.
void setShape(const IPosition &shape)
Set the shape of the results if it is fixed and known.
The identification of a TaQL selection subject.
Definition: TableExprId.h:98
Class to handle a Regex or StringDistance.
Definition: ExprNodeRep.h:74
void init(const PtrBlock< TableExprNodeRep * > &arg, const Table &table, const TaQLStyle &)
Initialize the function object.
String: the storage and methods of handling collections of characters.
Definition: String.h:223
static void registerUDF(const String &name, MakeUDFObject *func)
Register the name and construction function of a UDF (thread-safe).
virtual Array< Bool > getArrayBool(const TableExprId &id)
virtual DComplex getDComplex(const TableExprId &id)
virtual String getString(const TableExprId &id)
Class to handle date/time type conversions and I/O.
Definition: MVTime.h:266
static UDFBase * createUDF(const String &name, const TaQLStyle &style)
Create a UDF object (thread-safe).
virtual Double getDouble(const TableExprId &id)
virtual TaqlRegex getRegex(const TableExprId &id)
virtual void setup(const Table &table, const TaQLStyle &)=0
Set up the function object.
this file contains all the compiler specific defines
Definition: mainpage.dox:28
virtual Array< Int64 > getArrayInt(const TableExprId &id)
void applySelection(const Vector< uInt > &rownrs)
If needed, let the UDF re-create column objects for a selection of rows.
virtual Array< Double > getArrayDouble(const TableExprId &id)