INGOR
|
General data container. More...
#include <util/ytData.h>
Public Member Functions | |
ytData * | ytData_new () |
Generates the empty ytData instance. | |
void | ytData_delete (ytData *this) |
Deletes the ytData instance. | |
int | ytData_numSamples (const ytData *this) |
Returns the number of samples (ytData::n). | |
int | ytData_numVariables (const ytData *this) |
Returns the number of variables (ytData::p). | |
const char * | ytData_typeName (int type) |
Returns the string expression of the type value. | |
void | ytData_stat (const ytData *this, FILE *fp, int level) |
Prints or checks data statistics. | |
void | ytData_print (ytData *this, FILE *fp) |
Prints the contents. | |
int | ytData_getType (const ytData *this, int j) |
Returns the type of the variable. | |
int | ytData_parseType (const char *name) |
Returns the type ID of the given type name. | |
const char * | ytData_getName (const ytData *this, int j) |
Returns the name of the variable. | |
const char * | ytData_getTypeName (const ytData *this, int j) |
Returns the string expression of the type of the specified variable. | |
int | ytData_findName (const ytData *this, const char *name) |
Returns the index of the variable of the specified name. | |
void | ytData_dynamic (ytData *this) |
Converts data for the dynamic model. | |
ytData * | ytData_dynamic2 (const ytData *this, int *T) |
Generates time expanded data. | |
ytData * | ytData_bootstrap (const ytData *this, ytRNG *rng, ytData *data) |
Performs the boostrap resampling. | |
ytData * | ytData_pseudoBootstrap (const ytData *this, ytRNG *rng, int blocks, ytData *data) |
Performs the pseudo bootstrap resampling for dynamic data. | |
ytData * | ytData_pidBootstrap (const ytData *this, ytRNG *rng, int n, int F, ytData *data) |
Resampling primary IDs for the bootstrap method. | |
ytData * | ytData_listBootstrap (const ytData *this, ytRNG *rng, int n, ytArray *listSet, int F, ytData *data) |
Resampling lists of primary IDs for the bootstrap method. | |
ytArray * | ytData_readPrimaryIDList (const ytData *this, const char *file) |
void | ytData_extractRange (const ytData *this, ytKeyValues *kv) |
Extrats value ranges. | |
void | ytData_checkRange (const ytData *this, ytDoubleArray *xlar, ytDoubleArray *xrar) |
Checks if the range arrays are valid. | |
ytData * | ytData_hybrid (ytData *this, int N) |
Generates a new ytData instance for static-dynamic hybrid model. | |
ytData * | ytData_dehybrid (ytData *this, int N) |
De-hybridize time-extended static-dynamic hybrid data. | |
ytData * | ytData_dbn (ytData *this, int T) |
Converts data for the time-expanded DBN model. | |
ytArray * | ytData_collectPrimaryId (const ytData *this) |
Collects sample IDs with repsect to the primary ID. | |
int | ytData_maxSecondaryId (const ytData *this) |
Returns the maximum secondary ID of the samples. | |
const ytStrArray * | ytData_getCategories (const ytData *this, int j) |
Returns the dictionary (categories) of the variable. | |
void | ytData_convertAllToReal (ytData *this) |
Converts all values to real values. | |
void | ytData_splitXY (ytData *this) |
Converts the data to explanatory/objective variable separated data. | |
ytData * | ytData_selectVars (const ytData *this, const ytStrArray *names) |
Selects variables by their names. | |
int | ytData_countNAN (const ytData *this) |
Counts up the number of NaNs. | |
void | ytData_MPI_Bcast (ytData **data, int root, MPI_Comm comm) |
Broadcasts the ytData instance with MPI. | |
Public Attributes | |
ytObject | obj |
int | n |
The number of samples. | |
int | p |
The number of variables. | |
double * | X |
n x p explanatory data matrix. | |
double * | Y |
n x p target data matrix. | |
ytStrArray * | names |
Names of variables. | |
ytIntArray * | types |
Value types of the variables. The j-th element represents the type ID of the j-th variable. The type ID is one of ytData_TYPE_REAL, ytData_TYPE_ORDINAL, ytData_TYPE_CATEGORICAL, and ytData_TYPE_DISCRETE. | |
ytKeyValues * | sampleAttrs |
attributes for samples. The value associated with the key is an array. The type of the array depends on the attributes. | |
ytKeyValues * | varAttrs |
attributes for variables. | |
ytArray * | dict |
dictionary for categories. The elements are ytStrArray instances, and the j-th element corresponds to the dictionary for the j-th variable. If the variable does not categarical, NULL needs to be set. | |
ytKeyValues * | meta |
meta data | |
General data container.
primaryid
secondaryid
Performs the boostrap resampling.
Currently, only "primaryid
" and "secondaryid
" are set in ytData::sampleAttrs.
If the original data has these IDs, this sets the resampled corresponding IDs. If the original data does not have primary IDs, this sets the resampled index of samples as the primary IDs. If the original data does not have secondary IDs, this does not set them in the new data.
This does not set ytData::varAttrs and ytData::meta.
this | |
data | ytData instance where bootstrap results are stored. If NULL, new ytData instance is allocated and is returned. The instance needs to be the one returned by this function. |
rng |
void ytData_checkRange | ( | const ytData * | this, |
ytDoubleArray * | xlar, | ||
ytDoubleArray * | xrar ) |
Checks if the range arrays are valid.
If the ranges in the given arrays exceed the values in data, then this changes them.
void ytData_convertAllToReal | ( | ytData * | this | ) |
Converts all values to real values.
This converts the types of all variables to real (ytData_TYPE_REAL). The categorical values are converted to the integer values of the internal indices of the values.
this | ytData instance. |
int ytData_countNAN | ( | const ytData * | this | ) |
Counts up the number of NaNs.
This counts the number of NaNs only in ytData::X.
Converts data for the time-expanded DBN model.
This converts data with p × T variables to p variables with T samples. If the original data set has N samples, these are regarded as data for different primary IDs.
this | |
T |
De-hybridize time-extended static-dynamic hybrid data.
Note: The current implementation supports only N = 2.
this | |
N | depth |
void ytData_dynamic | ( | ytData * | this | ) |
Generates time expanded data.
This converts the given data to the time-expanded data where each time point (Secondary ID) of a variable is regarded as a different variable.
If the data contains T time points with p variables, then the new data with T × p variables is genereted. Therefore, the new data will have P samples whereas the old data set has P × T samples where P represents the number of unique primary IDs.
Note that this assumes that each primary ID has the same lengths of time points (secondary IDs).
New sample attributes for P samples are taken from the old ones at the first time point (secondary ID) of the particular primary IDs.
P is identical to the number of samples in the new data set.
[in] | this | ytData instance to convert. |
[out] | T | the number of time points (secondary IDs). |
void ytData_extractRange | ( | const ytData * | this, |
ytKeyValues * | kv ) |
Extrats value ranges.
This extracts the minimum and maximum values for each variable, and stores them as ytDoubleArray instances. The arrays are set in the given ytKeyValues instance as values with keys "xl
" and "xr
".
Note taht this does not consider extra outer regions. This simply searches for the max and min of each variable.
This is to fix parent value ranges for B-spline modeling when bootstraping.
int ytData_findName | ( | const ytData * | this, |
const char * | name ) |
Returns the index of the variable of the specified name.
const char * ytData_getName | ( | const ytData * | this, |
int | j ) |
Returns the name of the variable.
If the name is not set, this returns NULL.
this | |
j | index of variable. |
int ytData_getType | ( | const ytData * | this, |
int | j ) |
Returns the type of the variable.
this | |
j | index |
Generates a new ytData instance for static-dynamic hybrid model.
The order of the new data set is t=0, t=-1, ..., t=-N.
N | Specifies to generate a new data with T- N to T-0. |
ytData * ytData_listBootstrap | ( | const ytData * | this, |
ytRNG * | rng, | ||
int | n, | ||
ytArray * | listSet, | ||
int | F, | ||
ytData * | data ) |
Resampling lists of primary IDs for the bootstrap method.
listSet | ytArray instance containing ytIntArray instances as its elements defining lists of primary IDs. |
F | If true, checks the consistensity of the length of the secondary IDs and the list lengths. |
int ytData_maxSecondaryId | ( | const ytData * | this | ) |
Returns the maximum secondary ID of the samples.
Note: This returns the maximum value of the internal, predefined secondaryid
sample attributes.
this | ytData instance. |
secondaryid
sample attributes. If the attribute does not exist, 0 is returned. void ytData_MPI_Bcast | ( | ytData ** | data, |
int | root, | ||
MPI_Comm | comm ) |
int ytData_parseType | ( | const char * | name | ) |
Returns the type ID of the given type name.
name |
Resampling primary IDs for the bootstrap method.
The new "bootstrapped" ytData instance contains only three sample attributes: "primaryid
", "secondaryid
", and "orig_primaryid
".
"orig_primaryid
" keeps track of the original primary IDs.
this | |
rng | |
n | Number of IDs to resample. If 0, the same number as in the original data set is used (the number of primary IDs). |
F | If true, checks the consistensity of the length of the secondary IDs. |
data | If NULL, the new ytData instance is generated and returned. |
Performs the pseudo bootstrap resampling for dynamic data.
brief Reads a primary ID list file.
Each line defines the list of sample names where these samples are resampled together when the "list" bootstrap mode.
ytData * ytData_selectVars | ( | const ytData * | this, |
const ytStrArray * | names ) |
Selects variables by their names.
If the given node is not found in the original ytData instance, this outputs only the warning messages. Checks the number of variables after receiving the new instance by yourself if you want to know whether this happens or not.
void ytData_splitXY | ( | ytData * | this | ) |
Converts the data to explanatory/objective variable separated data.
This assumes that the given data set consists of different set of samples for explanatory and objective variables, and then regards the first half of the samples (rows) are ones of explanatory variables and the second half of them objective variables. Thus, the number of samples of the given data becomes the half of them after applying this routine.
this |
void ytData_stat | ( | const ytData * | this, |
FILE * | fp, | ||
int | level ) |
Prints or checks data statistics.
level | 0 - only warning. |
const char * ytData_typeName | ( | int | type | ) |
Returns the string expression of the type value.
type | type value returned by ytData_getType(). |
double* ytData::X |
n x p explanatory data matrix.
This is a column major matrix. The (i
,j
) element of X can be accessed by X[i
+ j
* n] where n is the number of samples. Here columns represent variables and rows represent samples.
The values are not only doubles but also integers or indices. The type of the variable is stored in the types field.
double* ytData::Y |