00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #include "cmtkHypothesisTests.h"
00034
00035 #include <Base/cmtkVector3D.h>
00036 #include <Base/cmtkMathUtil.h>
00037 #include <System/cmtkConsole.h>
00038
00039 #include <vector>
00040
00041 namespace
00042 cmtk
00043 {
00044
00047
00048 TypedArray::SmartPtr
00049 HypothesisTests::GetUnpairedTwoTailedTTest
00050 ( std::vector<TypedArray::SmartPtr>& dataX,
00051 std::vector<TypedArray::SmartPtr>& dataY,
00052 TypedArray::SmartPtr* tstatData, TypedArray::SmartPtr* avgXData,
00053 TypedArray::SmartPtr* avgYData, const TypedArray* mask )
00054 {
00055 const unsigned int length = dataX[0]->GetDataSize();
00056
00057 TypedArray::SmartPtr probData = TypedArray::Create( TYPE_FLOAT, length );
00058
00059 if ( tstatData )
00060 *tstatData = TypedArray::Create( TYPE_FLOAT, length );
00061
00062 if ( avgXData )
00063 *avgXData = TypedArray::Create( TYPE_FLOAT, length );
00064
00065 if ( avgYData )
00066 *avgYData = TypedArray::Create( TYPE_FLOAT, length );
00067
00068 const unsigned int dataXsize = dataX.size();
00069 std::vector<Types::DataItem> valuesX( dataXsize );
00070 const unsigned int dataYsize = dataY.size();
00071 std::vector<Types::DataItem> valuesY( dataYsize );
00072
00073 Types::DataItem t = 0, prob = 0, avgX = 0, avgY = 0;
00074 for ( unsigned int idx = 0; idx < length; ++idx ) {
00075
00076 Types::DataItem maskValue;
00077 if ( !mask || (mask->Get( maskValue, idx ) && (maskValue != 0)) )
00078 {
00079 unsigned int actualSizeX = 0;
00080 for ( unsigned int i = 0; i < dataXsize; ++i )
00081 if ( dataX[i]->Get( valuesX[actualSizeX], idx ) ) ++actualSizeX;
00082
00083 unsigned int actualSizeY = 0;
00084 for ( unsigned int i = 0; i < dataYsize; ++i )
00085 if ( dataY[i]->Get( valuesY[actualSizeY], idx ) ) ++actualSizeY;
00086
00087 if ( actualSizeX && actualSizeY )
00088 {
00089 prob = MathUtil::TTest<Types::DataItem>( valuesX, valuesY, t, avgX, avgY );
00090
00091 if ( (prob < 0) || (prob>1) )
00092 {
00093 fprintf( stderr, "t = %f\tp = %f\n", t, prob );
00094 }
00095 prob = 1.0 - prob;
00096 }
00097 else
00098 {
00099 t = prob = 0;
00100 }
00101
00102 if ( tstatData ) (*tstatData)->Set( t, idx );
00103 if ( avgXData ) (*avgXData)->Set( avgX, idx );
00104 if ( avgYData ) (*avgYData)->Set( avgY, idx );
00105
00106 if ( avgX > avgY )
00107 probData->Set( prob, idx );
00108 else
00109 probData->Set( -prob, idx );
00110 }
00111 else
00112 {
00113 probData->SetPaddingAt( idx );
00114 if ( tstatData ) (*tstatData)->SetPaddingAt( idx );
00115 if ( avgXData ) (*avgXData)->SetPaddingAt( idx );
00116 if ( avgYData ) (*avgYData)->SetPaddingAt( idx );
00117 }
00118 }
00119
00120 return probData;
00121 }
00122
00123 TypedArray::SmartPtr
00124 HypothesisTests::GetPairedTwoTailedTTest
00125 ( std::vector<TypedArray::SmartPtr>& dataX,
00126 std::vector<TypedArray::SmartPtr>& dataY,
00127 TypedArray::SmartPtr* tstatData, TypedArray::SmartPtr* avgXData,
00128 TypedArray::SmartPtr* avgYData, const TypedArray* mask )
00129 {
00130 if ( dataX.size() != dataY.size() )
00131 {
00132 throw( Exception( "Cannot perform paired t-test if numbers of X and Y samples isn't equal" ) );
00133 }
00134
00135 const unsigned int length = dataX[0]->GetDataSize();
00136
00137 TypedArray::SmartPtr probData = TypedArray::Create( TYPE_FLOAT, length );
00138
00139 if ( tstatData )
00140 *tstatData = TypedArray::Create( TYPE_FLOAT, length );
00141
00142 if ( avgXData )
00143 *avgXData = TypedArray::Create( TYPE_FLOAT, length );
00144
00145 if ( avgYData )
00146 *avgYData = TypedArray::Create( TYPE_FLOAT, length );
00147
00148 const unsigned int dataXsize = dataX.size();
00149 std::vector<Types::DataItem> valuesX( dataXsize );
00150 const unsigned int dataYsize = dataY.size();
00151 std::vector<Types::DataItem> valuesY( dataYsize );
00152
00153 Types::DataItem t = 0, prob = 0, avgX = 0, avgY = 0;
00154 for ( unsigned int idx = 0; idx < length; ++idx ) {
00155
00156 Types::DataItem maskValue;
00157 if ( !mask || (mask->Get( maskValue, idx ) && (maskValue != 0)) )
00158 {
00159 valuesX.resize( dataXsize );
00160 unsigned int actualSizeX = 0;
00161 for ( unsigned int i = 0; i < dataXsize; ++i )
00162 if ( dataX[i]->Get( valuesX[actualSizeX], idx ) ) ++actualSizeX;
00163
00164 valuesY.resize( dataYsize );
00165 unsigned int actualSizeY = 0;
00166 for ( unsigned int i = 0; i < dataYsize; ++i )
00167 if ( dataY[i]->Get( valuesY[actualSizeY], idx ) ) ++actualSizeY;
00168
00169 if ( actualSizeX == actualSizeY )
00170 {
00171 valuesX.resize( actualSizeX );
00172 valuesY.resize( actualSizeY );
00173
00174 prob = MathUtil::PairedTTest<Types::DataItem>( valuesX, valuesY, t, avgX, avgY );
00175
00176 if ( (prob < 0) || (prob>1) )
00177 {
00178 fprintf( stderr, "t = %f\tp = %f\n", t, prob );
00179 }
00180 prob = 1.0 - prob;
00181 }
00182 else
00183 {
00184 t = prob = 0;
00185 }
00186
00187 if ( tstatData ) (*tstatData)->Set( t, idx );
00188 if ( avgXData ) (*avgXData)->Set( avgX, idx );
00189 if ( avgYData ) (*avgYData)->Set( avgY, idx );
00190
00191 if ( avgX > avgY )
00192 probData->Set( prob, idx );
00193 else
00194 probData->Set( -prob, idx );
00195 }
00196 else
00197 {
00198 probData->SetPaddingAt( idx );
00199 if ( tstatData ) (*tstatData)->SetPaddingAt( idx );
00200 if ( avgXData ) (*avgXData)->SetPaddingAt( idx );
00201 if ( avgYData ) (*avgYData)->SetPaddingAt( idx );
00202 }
00203 }
00204
00205 return probData;
00206 }
00207
00208 TypedArray::SmartPtr
00209 HypothesisTests::GetPairedCorrelation
00210 ( std::vector<TypedArray::SmartPtr>& dataX, std::vector<TypedArray::SmartPtr>& dataY, TypedArray::SmartPtr* pData, const TypedArray* mask )
00211 {
00212 if ( dataX.size() != dataY.size() )
00213 {
00214 throw( Exception( "Cannot perform paired correlation if numbers of X and Y samples isn't equal" ) );
00215 }
00216
00217 const unsigned int length = dataX[0]->GetDataSize();
00218
00219 TypedArray::SmartPtr correlationData = TypedArray::Create( TYPE_FLOAT, length );
00220 if ( pData )
00221 *pData = TypedArray::Create( TYPE_FLOAT, length );
00222
00223 const unsigned int dataXsize = dataX.size();
00224 std::vector<Types::DataItem> valuesX( dataXsize );
00225 const unsigned int dataYsize = dataY.size();
00226 std::vector<Types::DataItem> valuesY( dataYsize );
00227
00228 for ( unsigned int idx = 0; idx < length; ++idx )
00229 {
00230 correlationData->SetPaddingAt( idx );
00231 if ( pData )
00232 (*pData)->SetPaddingAt( idx );
00233
00234 Types::DataItem maskValue;
00235 if ( !mask || (mask->Get( maskValue, idx ) && (maskValue != 0)) )
00236 {
00237 valuesX.resize( dataXsize );
00238 valuesY.resize( dataXsize );
00239
00240 unsigned int actualSize = 0;
00241 for ( unsigned int i = 0; i < dataXsize; ++i )
00242 if ( dataX[i]->Get( valuesX[actualSize], idx ) && dataY[i]->Get( valuesY[actualSize], idx ) )
00243 ++actualSize;
00244
00245 if ( actualSize )
00246 {
00247 valuesX.resize( actualSize );
00248 valuesY.resize( actualSize );
00249
00250 Types::DataItem corr = MathUtil::Correlation<Types::DataItem>( valuesX, valuesY );
00251 correlationData->Set( corr, idx );
00252 if ( pData )
00253 (*pData)->Set( MathUtil::ProbabilityFromTStat( MathUtil::TStatFromCorrelation( corr, actualSize-2 ), actualSize-2 ), idx );
00254 }
00255 }
00256 }
00257
00258 return correlationData;
00259 }
00260
00261 TypedArray::SmartPtr
00262 HypothesisTests::GetOneSampleTTest
00263 ( std::vector<TypedArray::SmartPtr>& dataX,
00264 TypedArray::SmartPtr* tstatData, TypedArray::SmartPtr* avgXData,
00265 const TypedArray* mask )
00266 {
00267 const unsigned int length = dataX[0]->GetDataSize();
00268
00269 TypedArray::SmartPtr probData = TypedArray::Create( TYPE_FLOAT, length );
00270
00271 if ( tstatData )
00272 *tstatData = TypedArray::Create( TYPE_FLOAT, length );
00273
00274 if ( avgXData )
00275 *avgXData = TypedArray::Create( TYPE_FLOAT, length );
00276
00277 const unsigned int dataXsize = dataX.size();
00278 std::vector<Types::DataItem> valuesX( dataXsize );
00279
00280 Types::DataItem t = 0, prob = 0, avgX = 0;
00281 for ( unsigned int idx = 0; idx < length; ++idx ) {
00282
00283 Types::DataItem maskValue;
00284 if ( !mask || (mask->Get( maskValue, idx ) && (maskValue != 0)) )
00285 {
00286 valuesX.resize( dataXsize );
00287 unsigned int actualSizeX = 0;
00288 for ( unsigned int i = 0; i < dataXsize; ++i )
00289 if ( dataX[i]->Get( valuesX[actualSizeX], idx ) )
00290 ++actualSizeX;
00291
00292 if ( actualSizeX )
00293 {
00294 valuesX.resize( actualSizeX );
00295 prob = MathUtil::TTest<Types::DataItem>( valuesX, t, avgX );
00296
00297 if ( (prob < 0) || (prob>1) )
00298 {
00299 fprintf( stderr, "t = %f\tp = %f\n", t, prob );
00300 }
00301 prob = 1.0 - prob;
00302 }
00303 else
00304 {
00305 t = prob = 0;
00306 }
00307
00308 if ( tstatData ) (*tstatData)->Set( t, idx );
00309 if ( avgXData ) (*avgXData)->Set( avgX, idx );
00310
00311 if ( avgX > 0 )
00312 probData->Set( -prob, idx );
00313 else
00314 probData->Set( +prob, idx );
00315 } else {
00316 probData->SetPaddingAt( idx );
00317 if ( tstatData ) (*tstatData)->SetPaddingAt( idx );
00318 if ( avgXData ) (*avgXData)->SetPaddingAt( idx );
00319 }
00320 }
00321
00322 return probData;
00323 }
00324
00325 TypedArray::SmartPtr
00326 HypothesisTests::GetHeritability
00327 ( std::vector<TypedArray::SmartPtr>& dataX,
00328 std::vector<TypedArray::SmartPtr>& dataY,
00329 const TypedArray* mask )
00330 {
00331 const size_t length = dataX[0]->GetDataSize();
00332
00333 TypedArray::SmartPtr outData = TypedArray::Create( TYPE_FLOAT, length );
00334
00335 const unsigned int dataXsize = dataX.size();
00336 std::vector<float> valuesX( dataXsize );
00337 const unsigned int dataYsize = dataY.size();
00338 std::vector<float> valuesY( dataYsize );
00339
00340 for ( size_t idx = 0; idx < length; ++idx ) {
00341
00342 Types::DataItem maskValue;
00343 if ( !mask || (mask->Get( maskValue, idx ) && (maskValue != 0)) ) {
00344 }
00345 }
00346
00347 return outData;
00348 }
00349
00350 TypedArray::SmartPtr
00351 HypothesisTests::GetZScores
00352 ( std::vector<TypedArray::SmartPtr>& dataX,
00353 std::vector<TypedArray::SmartPtr>& dataY,
00354 const TypedArray* mask )
00355 {
00356 const size_t length = dataX[0]->GetDataSize();
00357
00358 TypedArray::SmartPtr outData = TypedArray::Create( TYPE_FLOAT, length );
00359
00360 const unsigned int dataXsize = dataX.size();
00361 std::vector<Types::DataItem> valuesX( dataXsize );
00362 const unsigned int dataYsize = dataY.size();
00363 std::vector<Types::DataItem> valuesY( dataYsize );
00364
00365 Types::DataItem avgX, avgY, varX;
00366
00367 for ( size_t idx = 0; idx < length; ++idx ) {
00368
00369 Types::DataItem maskValue;
00370 if ( !mask || (mask->Get( maskValue, idx ) && (maskValue != 0)) )
00371 {
00372 valuesX.resize( dataXsize );
00373 unsigned int actualSizeX = 0;
00374 for ( unsigned int i = 0; i < dataXsize; ++i )
00375 if ( dataX[i]->Get( valuesX[actualSizeX], idx ) ) ++actualSizeX;
00376
00377 valuesY.resize( dataYsize );
00378 unsigned int actualSizeY = 0;
00379 for ( unsigned int i = 0; i < dataYsize; ++i )
00380 if ( dataY[i]->Get( valuesY[actualSizeY], idx ) ) ++actualSizeY;
00381
00382 if ( actualSizeX && actualSizeY )
00383 {
00384 valuesX.resize( actualSizeX );
00385 avgX = MathUtil::Mean<Types::DataItem>( valuesX );
00386 valuesY.resize( actualSizeY );
00387 avgY = MathUtil::Mean<Types::DataItem>( valuesY );
00388
00389 varX = MathUtil::Variance<Types::DataItem>( valuesX, avgX );
00390
00391 outData->Set( (avgY - avgX) / sqrt( varX ), idx );
00392 }
00393 else
00394 {
00395 outData->SetPaddingAt( idx );
00396 }
00397 }
00398 else
00399 {
00400 outData->SetPaddingAt( idx );
00401 }
00402 }
00403
00404 return outData;
00405 }
00406
00407 TypedArray::SmartPtr
00408 HypothesisTests::GetGeneticCovariance
00409 ( std::vector<TypedArray::SmartPtr>& dataMZ,
00410 std::vector<TypedArray::SmartPtr>& dataDZ,
00411 const TypedArray* mask )
00412 {
00413 const size_t length = dataMZ[0]->GetDataSize();
00414
00415 TypedArray::SmartPtr outData = TypedArray::Create( TYPE_FLOAT, length );
00416
00417 const unsigned int dataMZsize = dataMZ.size();
00418 std::vector<Types::DataItem> valuesMZ( dataMZsize );
00419 const unsigned int dataDZsize = dataDZ.size();
00420 std::vector<Types::DataItem> valuesDZ( dataDZsize );
00421
00422 Types::DataItem avgMZ, avgDZ, varMZ, varDZ;
00423
00424 for ( size_t idx = 0; idx < length; ++idx ) {
00425
00426 Types::DataItem maskValue;
00427 if ( !mask || (mask->Get( maskValue, idx ) && (maskValue != 0)) )
00428 {
00429 valuesMZ.resize( dataMZsize );
00430 unsigned int actualSizeMZ = 0;
00431 for ( unsigned int i = 0; i < dataMZsize; ++i )
00432 if ( dataMZ[i]->Get( valuesMZ[actualSizeMZ], idx ) ) ++actualSizeMZ;
00433
00434 valuesDZ.resize( dataDZsize );
00435 unsigned int actualSizeDZ = 0;
00436 for ( unsigned int i = 0; i < dataDZsize; ++i )
00437 if ( dataDZ[i]->Get( valuesDZ[actualSizeDZ], idx ) ) ++actualSizeDZ;
00438
00439 if ( actualSizeMZ && actualSizeDZ )
00440 {
00441 valuesMZ.resize( actualSizeMZ );
00442 avgMZ = MathUtil::Mean<Types::DataItem>( valuesMZ );
00443 varMZ = MathUtil::Variance<Types::DataItem>( valuesMZ, avgMZ );
00444
00445 valuesDZ.resize( actualSizeDZ );
00446 avgDZ = MathUtil::Mean<Types::DataItem>( valuesDZ );
00447 varDZ = MathUtil::Variance<Types::DataItem>( valuesDZ, avgDZ );
00448
00449 outData->Set( varMZ / avgMZ - varDZ / avgDZ, idx );
00450 }
00451 else
00452 {
00453 outData->SetPaddingAt( idx );
00454 }
00455 }
00456 else
00457 {
00458 outData->SetPaddingAt( idx );
00459 }
00460 }
00461
00462 return outData;
00463 }
00464
00465 }