cmtkHypothesisTests.cxx

Go to the documentation of this file.
00001 /*
00002 //
00003 //  Copyright 1997-2009 Torsten Rohlfing
00004 //
00005 //  Copyright 2004-2010 SRI International
00006 //
00007 //  This file is part of the Computational Morphometry Toolkit.
00008 //
00009 //  http://www.nitrc.org/projects/cmtk/
00010 //
00011 //  The Computational Morphometry Toolkit is free software: you can
00012 //  redistribute it and/or modify it under the terms of the GNU General Public
00013 //  License as published by the Free Software Foundation, either version 3 of
00014 //  the License, or (at your option) any later version.
00015 //
00016 //  The Computational Morphometry Toolkit is distributed in the hope that it
00017 //  will be useful, but WITHOUT ANY WARRANTY; without even the implied
00018 //  warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00019 //  GNU General Public License for more details.
00020 //
00021 //  You should have received a copy of the GNU General Public License along
00022 //  with the Computational Morphometry Toolkit.  If not, see
00023 //  <http://www.gnu.org/licenses/>.
00024 //
00025 //  $Revision: 2398 $
00026 //
00027 //  $LastChangedDate: 2010-10-05 14:54:37 -0700 (Tue, 05 Oct 2010) $
00028 //
00029 //  $LastChangedBy: torstenrohlfing $
00030 //
00031 */
00032 
00033 #include "cmtkHypothesisTests.h"
00034 
00035 #include <Base/cmtkVector3D.h>
00036 #include <Base/cmtkMathUtil.h>
00037 #include <System/cmtkConsole.h>
00038 
00039 #include <vector>
00040 
00041 namespace
00042 cmtk
00043 {
00044 
00047 
00048 TypedArray::SmartPtr
00049 HypothesisTests::GetUnpairedTwoTailedTTest
00050 ( std::vector<TypedArray::SmartPtr>& dataX, 
00051   std::vector<TypedArray::SmartPtr>& dataY,
00052   TypedArray::SmartPtr* tstatData, TypedArray::SmartPtr* avgXData, 
00053   TypedArray::SmartPtr* avgYData, const TypedArray* mask )
00054 {
00055   const unsigned int length = dataX[0]->GetDataSize();
00056 
00057   TypedArray::SmartPtr probData = TypedArray::Create( TYPE_FLOAT, length );
00058 
00059   if ( tstatData )
00060     *tstatData = TypedArray::Create( TYPE_FLOAT, length );
00061   
00062   if ( avgXData )
00063     *avgXData = TypedArray::Create( TYPE_FLOAT, length );
00064   
00065   if ( avgYData )
00066     *avgYData = TypedArray::Create( TYPE_FLOAT, length );
00067   
00068   const unsigned int dataXsize = dataX.size();
00069   std::vector<Types::DataItem> valuesX( dataXsize );
00070   const unsigned int dataYsize = dataY.size();
00071   std::vector<Types::DataItem> valuesY( dataYsize );
00072 
00073   Types::DataItem t = 0, prob = 0, avgX = 0, avgY = 0;
00074   for ( unsigned int idx = 0; idx < length; ++idx ) {
00075 
00076     Types::DataItem maskValue;
00077     if ( !mask || (mask->Get( maskValue, idx ) && (maskValue != 0)) ) 
00078       {
00079       unsigned int actualSizeX = 0;
00080       for ( unsigned int i = 0; i < dataXsize; ++i )
00081         if ( dataX[i]->Get( valuesX[actualSizeX], idx ) ) ++actualSizeX;
00082      
00083       unsigned int actualSizeY = 0;
00084       for ( unsigned int i = 0; i < dataYsize; ++i )
00085         if ( dataY[i]->Get( valuesY[actualSizeY], idx ) ) ++actualSizeY;
00086       
00087       if ( actualSizeX && actualSizeY )
00088         {
00089         prob = MathUtil::TTest<Types::DataItem>( valuesX, valuesY, t, avgX, avgY );
00090         
00091         if ( (prob < 0) || (prob>1) )
00092           {
00093           fprintf( stderr, "t = %f\tp = %f\n", t, prob );
00094           }
00095         prob = 1.0 - prob; // convert probability to significance
00096         }
00097       else
00098         {
00099         t = prob = 0;
00100         }
00101      
00102       if ( tstatData ) (*tstatData)->Set( t, idx );
00103       if ( avgXData ) (*avgXData)->Set( avgX, idx );
00104       if ( avgYData ) (*avgYData)->Set( avgY, idx );
00105       
00106       if ( avgX > avgY )
00107         probData->Set(  prob, idx );
00108       else
00109         probData->Set( -prob, idx );
00110       } 
00111     else 
00112       {
00113       probData->SetPaddingAt( idx );
00114       if ( tstatData ) (*tstatData)->SetPaddingAt( idx );
00115       if ( avgXData ) (*avgXData)->SetPaddingAt( idx );
00116       if ( avgYData ) (*avgYData)->SetPaddingAt( idx );
00117       }
00118   }
00119   
00120   return probData;
00121 }
00122 
00123 TypedArray::SmartPtr
00124 HypothesisTests::GetPairedTwoTailedTTest
00125 ( std::vector<TypedArray::SmartPtr>& dataX, 
00126   std::vector<TypedArray::SmartPtr>& dataY,
00127   TypedArray::SmartPtr* tstatData, TypedArray::SmartPtr* avgXData, 
00128   TypedArray::SmartPtr* avgYData, const TypedArray* mask )
00129 {
00130   if ( dataX.size() != dataY.size() )
00131     {
00132     throw( Exception( "Cannot perform paired t-test if numbers of X and Y samples isn't equal" ) );
00133     }
00134       
00135   const unsigned int length = dataX[0]->GetDataSize();
00136 
00137   TypedArray::SmartPtr probData = TypedArray::Create( TYPE_FLOAT, length );
00138 
00139   if ( tstatData )
00140     *tstatData = TypedArray::Create( TYPE_FLOAT, length );
00141   
00142   if ( avgXData )
00143     *avgXData = TypedArray::Create( TYPE_FLOAT, length );
00144   
00145   if ( avgYData )
00146     *avgYData = TypedArray::Create( TYPE_FLOAT, length );
00147   
00148   const unsigned int dataXsize = dataX.size();
00149   std::vector<Types::DataItem> valuesX( dataXsize );
00150   const unsigned int dataYsize = dataY.size();
00151   std::vector<Types::DataItem> valuesY( dataYsize );
00152 
00153   Types::DataItem t = 0, prob = 0, avgX = 0, avgY = 0;
00154   for ( unsigned int idx = 0; idx < length; ++idx ) {
00155 
00156     Types::DataItem maskValue;
00157     if ( !mask || (mask->Get( maskValue, idx ) && (maskValue != 0)) ) 
00158       {
00159       valuesX.resize( dataXsize );
00160       unsigned int actualSizeX = 0;
00161       for ( unsigned int i = 0; i < dataXsize; ++i )
00162         if ( dataX[i]->Get( valuesX[actualSizeX], idx ) ) ++actualSizeX;
00163       
00164       valuesY.resize( dataYsize );
00165       unsigned int actualSizeY = 0;
00166       for ( unsigned int i = 0; i < dataYsize; ++i )
00167         if ( dataY[i]->Get( valuesY[actualSizeY], idx ) ) ++actualSizeY;
00168       
00169       if ( actualSizeX == actualSizeY )
00170         {
00171         valuesX.resize( actualSizeX );
00172         valuesY.resize( actualSizeY );
00173 
00174         prob = MathUtil::PairedTTest<Types::DataItem>( valuesX, valuesY, t, avgX, avgY );
00175         
00176         if ( (prob < 0) || (prob>1) )
00177           {
00178           fprintf( stderr, "t = %f\tp = %f\n", t, prob );
00179           }
00180         prob = 1.0 - prob; // convert probability to significance
00181         }
00182       else
00183         {
00184         t = prob = 0;
00185         }
00186       
00187       if ( tstatData ) (*tstatData)->Set( t, idx );
00188       if ( avgXData ) (*avgXData)->Set( avgX, idx );
00189       if ( avgYData ) (*avgYData)->Set( avgY, idx );
00190       
00191       if ( avgX > avgY )
00192         probData->Set(  prob, idx );
00193       else
00194         probData->Set( -prob, idx );
00195       } 
00196     else 
00197       {
00198       probData->SetPaddingAt( idx );
00199       if ( tstatData ) (*tstatData)->SetPaddingAt( idx );
00200       if ( avgXData ) (*avgXData)->SetPaddingAt( idx );
00201       if ( avgYData ) (*avgYData)->SetPaddingAt( idx );
00202       }
00203   }
00204   
00205   return probData;
00206 }
00207 
00208 TypedArray::SmartPtr 
00209 HypothesisTests::GetPairedCorrelation
00210 ( std::vector<TypedArray::SmartPtr>& dataX, std::vector<TypedArray::SmartPtr>& dataY, TypedArray::SmartPtr* pData, const TypedArray* mask )
00211 {
00212   if ( dataX.size() != dataY.size() )
00213     {
00214     throw( Exception( "Cannot perform paired correlation if numbers of X and Y samples isn't equal" ) );
00215     }
00216   
00217   const unsigned int length = dataX[0]->GetDataSize();
00218 
00219   TypedArray::SmartPtr correlationData = TypedArray::Create( TYPE_FLOAT, length );
00220   if ( pData )
00221     *pData = TypedArray::Create( TYPE_FLOAT, length );
00222 
00223   const unsigned int dataXsize = dataX.size();
00224   std::vector<Types::DataItem> valuesX( dataXsize );
00225   const unsigned int dataYsize = dataY.size();
00226   std::vector<Types::DataItem> valuesY( dataYsize );
00227 
00228   for ( unsigned int idx = 0; idx < length; ++idx ) 
00229     {
00230     correlationData->SetPaddingAt( idx );
00231     if ( pData )
00232       (*pData)->SetPaddingAt( idx );
00233 
00234     Types::DataItem maskValue;
00235     if ( !mask || (mask->Get( maskValue, idx ) && (maskValue != 0)) ) 
00236       {
00237       valuesX.resize( dataXsize );
00238       valuesY.resize( dataXsize );
00239 
00240       unsigned int actualSize = 0;
00241       for ( unsigned int i = 0; i < dataXsize; ++i )
00242         if ( dataX[i]->Get( valuesX[actualSize], idx ) && dataY[i]->Get( valuesY[actualSize], idx ) )
00243           ++actualSize;
00244       
00245       if ( actualSize )
00246         {
00247         valuesX.resize( actualSize );
00248         valuesY.resize( actualSize );
00249 
00250         Types::DataItem corr = MathUtil::Correlation<Types::DataItem>( valuesX, valuesY );
00251         correlationData->Set(  corr, idx );
00252         if ( pData ) 
00253           (*pData)->Set( MathUtil::ProbabilityFromTStat( MathUtil::TStatFromCorrelation( corr, actualSize-2 ), actualSize-2 ), idx );
00254         }
00255       }
00256     }
00257   
00258   return correlationData;
00259 }
00260 
00261 TypedArray::SmartPtr
00262 HypothesisTests::GetOneSampleTTest
00263 ( std::vector<TypedArray::SmartPtr>& dataX, 
00264   TypedArray::SmartPtr* tstatData, TypedArray::SmartPtr* avgXData, 
00265   const TypedArray* mask )
00266 {
00267   const unsigned int length = dataX[0]->GetDataSize();
00268 
00269   TypedArray::SmartPtr probData = TypedArray::Create( TYPE_FLOAT, length );
00270 
00271   if ( tstatData )
00272     *tstatData = TypedArray::Create( TYPE_FLOAT, length );
00273   
00274   if ( avgXData )
00275     *avgXData = TypedArray::Create( TYPE_FLOAT, length );
00276 
00277   const unsigned int dataXsize = dataX.size();
00278   std::vector<Types::DataItem> valuesX( dataXsize );
00279 
00280   Types::DataItem t = 0, prob = 0, avgX = 0;
00281   for ( unsigned int idx = 0; idx < length; ++idx ) {
00282 
00283     Types::DataItem maskValue;
00284     if ( !mask || (mask->Get( maskValue, idx ) && (maskValue != 0)) ) 
00285       {
00286       valuesX.resize( dataXsize );
00287       unsigned int actualSizeX = 0;
00288       for ( unsigned int i = 0; i < dataXsize; ++i )
00289         if ( dataX[i]->Get( valuesX[actualSizeX], idx ) ) 
00290           ++actualSizeX;
00291       
00292       if ( actualSizeX )
00293         {
00294         valuesX.resize( actualSizeX );
00295         prob = MathUtil::TTest<Types::DataItem>( valuesX, t, avgX );
00296 
00297         if ( (prob < 0) || (prob>1) )
00298           {
00299           fprintf( stderr, "t = %f\tp = %f\n", t, prob );
00300           }
00301         prob = 1.0 - prob; // convert probability to significance
00302         }
00303       else
00304         {
00305         t = prob = 0;
00306         }
00307       
00308       if ( tstatData ) (*tstatData)->Set( t, idx );
00309       if ( avgXData ) (*avgXData)->Set( avgX, idx );
00310       
00311       if ( avgX > 0 )
00312         probData->Set(  -prob, idx );
00313       else
00314         probData->Set( +prob, idx );
00315     } else {
00316       probData->SetPaddingAt( idx );
00317       if ( tstatData ) (*tstatData)->SetPaddingAt( idx );
00318       if ( avgXData ) (*avgXData)->SetPaddingAt( idx );
00319     }
00320   }
00321   
00322   return probData;
00323 }
00324 
00325 TypedArray::SmartPtr
00326 HypothesisTests::GetHeritability
00327 ( std::vector<TypedArray::SmartPtr>& dataX, 
00328   std::vector<TypedArray::SmartPtr>& dataY,
00329   const TypedArray* mask )
00330 {
00331   const size_t length = dataX[0]->GetDataSize();
00332 
00333   TypedArray::SmartPtr outData = TypedArray::Create( TYPE_FLOAT, length );
00334   
00335   const unsigned int dataXsize = dataX.size();
00336   std::vector<float> valuesX( dataXsize );
00337   const unsigned int dataYsize = dataY.size();
00338   std::vector<float> valuesY( dataYsize );
00339 
00340   for ( size_t idx = 0; idx < length; ++idx ) {
00341 
00342     Types::DataItem maskValue;
00343     if ( !mask || (mask->Get( maskValue, idx ) && (maskValue != 0)) ) {
00344     }
00345   }
00346   
00347   return outData;
00348 }
00349 
00350 TypedArray::SmartPtr
00351 HypothesisTests::GetZScores
00352 ( std::vector<TypedArray::SmartPtr>& dataX,
00353   std::vector<TypedArray::SmartPtr>& dataY,
00354   const TypedArray* mask )
00355 {
00356   const size_t length = dataX[0]->GetDataSize();
00357 
00358   TypedArray::SmartPtr outData = TypedArray::Create( TYPE_FLOAT, length );
00359   
00360   const unsigned int dataXsize = dataX.size();
00361   std::vector<Types::DataItem> valuesX( dataXsize );
00362   const unsigned int dataYsize = dataY.size();
00363   std::vector<Types::DataItem> valuesY( dataYsize );
00364 
00365   Types::DataItem avgX, avgY, varX;
00366 
00367   for ( size_t idx = 0; idx < length; ++idx ) {
00368 
00369     Types::DataItem maskValue;
00370     if ( !mask || (mask->Get( maskValue, idx ) && (maskValue != 0)) ) 
00371       {
00372       valuesX.resize( dataXsize );
00373       unsigned int actualSizeX = 0;
00374       for ( unsigned int i = 0; i < dataXsize; ++i )
00375         if ( dataX[i]->Get( valuesX[actualSizeX], idx ) ) ++actualSizeX;
00376       
00377       valuesY.resize( dataYsize );
00378       unsigned int actualSizeY = 0;
00379       for ( unsigned int i = 0; i < dataYsize; ++i )
00380         if ( dataY[i]->Get( valuesY[actualSizeY], idx ) ) ++actualSizeY;
00381 
00382       if ( actualSizeX && actualSizeY )
00383         {
00384         valuesX.resize( actualSizeX );
00385         avgX = MathUtil::Mean<Types::DataItem>( valuesX );
00386         valuesY.resize( actualSizeY );
00387         avgY = MathUtil::Mean<Types::DataItem>( valuesY );
00388         
00389         varX = MathUtil::Variance<Types::DataItem>( valuesX, avgX );
00390 
00391         outData->Set( (avgY - avgX) / sqrt( varX ), idx );
00392         }
00393       else
00394         {
00395         outData->SetPaddingAt( idx );
00396         }
00397       }
00398     else
00399       {
00400       outData->SetPaddingAt( idx );
00401       }
00402   }
00403   
00404   return outData;
00405 }
00406 
00407 TypedArray::SmartPtr
00408 HypothesisTests::GetGeneticCovariance
00409 ( std::vector<TypedArray::SmartPtr>& dataMZ, 
00410   std::vector<TypedArray::SmartPtr>& dataDZ,
00411   const TypedArray* mask )
00412 {
00413   const size_t length = dataMZ[0]->GetDataSize();
00414 
00415   TypedArray::SmartPtr outData = TypedArray::Create( TYPE_FLOAT, length );
00416   
00417   const unsigned int dataMZsize = dataMZ.size();
00418   std::vector<Types::DataItem> valuesMZ( dataMZsize );
00419   const unsigned int dataDZsize = dataDZ.size();
00420   std::vector<Types::DataItem> valuesDZ( dataDZsize );
00421 
00422   Types::DataItem avgMZ, avgDZ, varMZ, varDZ;
00423 
00424   for ( size_t idx = 0; idx < length; ++idx ) {
00425 
00426     Types::DataItem maskValue;
00427     if ( !mask || (mask->Get( maskValue, idx ) && (maskValue != 0)) ) 
00428       {
00429       valuesMZ.resize( dataMZsize );
00430       unsigned int actualSizeMZ = 0;
00431       for ( unsigned int i = 0; i < dataMZsize; ++i )
00432         if ( dataMZ[i]->Get( valuesMZ[actualSizeMZ], idx ) ) ++actualSizeMZ;
00433       
00434       valuesDZ.resize( dataDZsize );
00435       unsigned int actualSizeDZ = 0;
00436       for ( unsigned int i = 0; i < dataDZsize; ++i )
00437         if ( dataDZ[i]->Get( valuesDZ[actualSizeDZ], idx ) ) ++actualSizeDZ;
00438 
00439       if ( actualSizeMZ && actualSizeDZ )
00440         {
00441         valuesMZ.resize( actualSizeMZ );
00442         avgMZ = MathUtil::Mean<Types::DataItem>( valuesMZ );
00443         varMZ = MathUtil::Variance<Types::DataItem>( valuesMZ, avgMZ );
00444 
00445         valuesDZ.resize( actualSizeDZ );
00446         avgDZ = MathUtil::Mean<Types::DataItem>( valuesDZ );
00447         varDZ = MathUtil::Variance<Types::DataItem>( valuesDZ, avgDZ );
00448 
00449         outData->Set( varMZ / avgMZ - varDZ / avgDZ, idx );
00450         }
00451       else
00452         {
00453         outData->SetPaddingAt( idx );
00454         }
00455       }
00456     else
00457       {
00458       outData->SetPaddingAt( idx );
00459       }
00460   }
00461   
00462   return outData;
00463 }
00464 
00465 } // namespace cmtk
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Defines