00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033 #ifdef CMTK_BUILD_MPI
00034
00035 namespace
00036 cmtk
00037 {
00038
00041
00042 SplineWarpCongealingFunctional::ReturnType
00043 SplineWarpCongealingFunctional
00044 ::EvaluateWithGradient
00045 ( CoordinateVector& v, CoordinateVector& g, const Types::Coordinate step )
00046 {
00047 const Self::ReturnType baseValue = this->EvaluateAt( v );
00048
00049 const size_t numberOfXforms = this->m_XformVector.size();
00050 const size_t numberOfControlPoints = this->m_ParametersPerXform / 3;
00051
00052 const size_t controlPointsPerNode = 1 + static_cast<size_t>( numberOfControlPoints / (4 * this->m_SizeMPI) );
00053
00054
00055 const size_t controlPointsRootNode = 1 + controlPointsPerNode / 4;
00056
00057 MPI::Status status;
00058
00059 size_t resultsToReceive = 0;
00060 std::vector<size_t> taskByNode( this->m_SizeMPI, -1 );
00061
00062 const size_t msgBufferSize = 3 * sizeof( Types::Coordinate ) * controlPointsPerNode * numberOfXforms;
00063 std::vector<char> msgBuffer( msgBufferSize );
00064 char* msgBufferPtr = &msgBuffer[0];
00065
00066
00067 const size_t numberOfThreads = Threads::GetNumberOfThreads();
00068 if ( this->m_StaticThreadStorage.size() != numberOfThreads )
00069 {
00070 this->m_StaticThreadStorage.resize( numberOfThreads );
00071 for ( size_t thread = 0; thread < numberOfThreads; ++thread )
00072 {
00073 this->m_StaticThreadStorage[thread].Initialize( this );
00074 }
00075 }
00076 else
00077 {
00078 for ( size_t thread = 0; thread < numberOfThreads; ++thread )
00079 {
00080 this->m_StaticThreadStorage[thread].m_NeedToCopyXformParameters = true;
00081 }
00082 }
00083
00084
00085 ThreadParameterArray<Self,EvaluateLocalGradientThreadParameters> threadParams( this, numberOfThreads );
00086 threadParams.RunInParallelAsynchronous( EvaluateLocalGradientThreadFunc );
00087 for ( size_t thread = 0; thread < numberOfThreads; ++thread )
00088 {
00089 threadParams[thread].m_Step = step;
00090 threadParams[thread].m_Gradient = (Types::Coordinate*)msgBufferPtr;
00091 }
00092
00093 bool allJobsSent = false;
00094 size_t firstIndexToCompute = 0;
00095 while ( (firstIndexToCompute < numberOfControlPoints) && ! allJobsSent )
00096 {
00097 if ( this->m_RankMPI == 0 )
00098 {
00099
00100 for ( int dest = 1; (dest < this->m_SizeMPI) && !allJobsSent; ++dest )
00101 {
00102 if ( taskByNode[dest] == (size_t)-1 )
00103 {
00104 if ( firstIndexToCompute >= numberOfControlPoints )
00105 {
00106 allJobsSent = true;
00107 }
00108 else
00109 {
00110 taskByNode[dest] = firstIndexToCompute;
00111 MPI::COMM_WORLD.Ssend( &firstIndexToCompute, sizeof( firstIndexToCompute ), MPI::CHAR, dest, MESSAGE_TAG_COMPUTE );
00112 ++resultsToReceive;
00113
00114 firstIndexToCompute += controlPointsPerNode;
00115 }
00116 }
00117 }
00118 }
00119 else
00120 {
00121
00122 MPI::COMM_WORLD.Recv( msgBufferPtr, msgBufferSize, MPI::CHAR, 0 , MPI::ANY_TAG, status );
00123 if ( status.Get_tag() == MESSAGE_TAG_FINISHED )
00124 {
00125 allJobsSent = true;
00126 }
00127 else
00128 {
00129 int position = 0;
00130 MPI::CHAR.Unpack( msgBufferPtr, msgBufferSize, &firstIndexToCompute, sizeof( firstIndexToCompute ), position, MPI::COMM_WORLD );
00131 }
00132 }
00133
00134 if ( (firstIndexToCompute < numberOfControlPoints ) && ! allJobsSent )
00135 {
00136 const size_t cpsToCompute = std::min( this->m_RankMPI ? controlPointsPerNode : controlPointsRootNode, (int)numberOfControlPoints - firstIndexToCompute );
00137
00138 this->m_ControlPointIndexNext = firstIndexToCompute;
00139 this->m_ControlPointIndexLast = firstIndexToCompute + cpsToCompute;
00140
00141 for ( size_t thread = 0; thread < numberOfThreads; ++thread )
00142 {
00143 threadParams[thread].m_FirstIndexToCompute = firstIndexToCompute;
00144
00145 this->m_ThreadWorkSemaphore.Post();
00146 }
00147
00148
00149 for ( size_t nThreads = 0; nThreads < numberOfThreads; ++nThreads )
00150 {
00151 this->m_ThreadReadySemaphore.Wait();
00152 }
00153
00154
00155 if ( this->m_RankMPI == 0 )
00156 {
00157
00158 this->ReorderGradientComponents( g.Elements, (Types::Coordinate*)msgBufferPtr, firstIndexToCompute, this->m_ControlPointIndexLast );
00159 firstIndexToCompute += cpsToCompute;
00160 }
00161 else
00162 {
00163
00164 MPI::COMM_WORLD.Ssend( msgBufferPtr, 3 * numberOfXforms * cpsToCompute * sizeof( Types::Coordinate ), MPI::CHAR, 0, MESSAGE_TAG_RESULTS );
00165 }
00166 }
00167
00168
00169
00170 if ( this->m_RankMPI == 0 )
00171 {
00172
00173
00174 while ( resultsToReceive && MPI::COMM_WORLD.Iprobe( MPI::ANY_SOURCE, MESSAGE_TAG_RESULTS, status ) )
00175 {
00176
00177 MPI::COMM_WORLD.Recv( msgBufferPtr, msgBufferSize, MPI::CHAR, status.Get_source(), MESSAGE_TAG_RESULTS, status );
00178
00179
00180 const int fromNode = status.Get_source();
00181 const int receivedJob = taskByNode[fromNode];
00182
00183 --resultsToReceive;
00184 taskByNode[fromNode] = -1;
00185
00186
00187 const size_t maxCpIndex = std::min<size_t>( receivedJob + controlPointsPerNode, numberOfControlPoints );
00188 this->ReorderGradientComponents( g.Elements, (Types::Coordinate*)msgBufferPtr, receivedJob, maxCpIndex );
00189 }
00190 }
00191 }
00192
00193
00194 while ( resultsToReceive )
00195 {
00196
00197 MPI::COMM_WORLD.Recv( msgBufferPtr, msgBufferSize, MPI::CHAR, MPI::ANY_SOURCE, MPI::ANY_TAG, status );
00198
00199
00200 const int fromNode = status.Get_source();
00201 const int receivedJob = taskByNode[fromNode];
00202
00203 --resultsToReceive;
00204 taskByNode[fromNode] = -1;
00205
00206
00207 const size_t maxCpIndex = std::min<size_t>( receivedJob + controlPointsPerNode, numberOfControlPoints );
00208 this->ReorderGradientComponents( g.Elements, (Types::Coordinate*)msgBufferPtr, receivedJob, maxCpIndex );
00209 }
00210
00211 if ( this->m_RankMPI == 0 )
00212 {
00213
00214 for ( int workerNode = 1; workerNode < this->m_SizeMPI; ++workerNode )
00215 {
00216 MPI::COMM_WORLD.Ssend( msgBufferPtr, 1, MPI::CHAR, workerNode, MESSAGE_TAG_FINISHED );
00217 }
00218 }
00219
00220
00221 MPI::COMM_WORLD.Bcast( g.Elements, g.Dim * sizeof( g.Elements[0] ), MPI::CHAR, 0 );
00222
00223 if ( this->m_PartialGradientMode )
00224 {
00225 const Types::Coordinate gthresh = g.MaxNorm() * this->m_PartialGradientThreshold;
00226 for ( size_t param = 0; param < g.Dim; ++param )
00227 {
00228 if ( fabs( g[param] ) < gthresh )
00229 {
00230 g[param] = this->m_ParamStepArray[param] = 0.0;
00231 }
00232 }
00233 }
00234
00235 if ( this->m_ForceZeroSum )
00236 {
00237 this->ForceZeroSumGradient( g );
00238 }
00239
00240 return baseValue;
00241 }
00242
00243 void
00244 SplineWarpCongealingFunctional
00245 ::ReorderGradientComponents
00246 ( Types::Coordinate *const dst, const Types::Coordinate* src, const size_t fromCpIdx, const size_t toCpIdx )
00247 {
00248 const size_t numberOfParameters = this->ParamVectorDim();
00249
00250
00251 size_t currentParameter = 0;
00252 for ( size_t cpIndex = fromCpIdx; cpIndex < toCpIdx; ++cpIndex )
00253 {
00254 for ( size_t cparam = 3*cpIndex; cparam < numberOfParameters; cparam += this->m_ParametersPerXform )
00255 {
00256 for ( size_t dim = 0; dim < 3; ++dim, ++currentParameter )
00257 {
00258 dst[cparam+dim] = src[currentParameter];
00259 }
00260 }
00261 }
00262 }
00263
00264 CMTK_THREAD_RETURN_TYPE
00265 SplineWarpCongealingFunctional
00266 ::EvaluateLocalGradientThreadFunc
00267 ( void* args )
00268 {
00269 EvaluateLocalGradientThreadParameters* threadParameters = static_cast<EvaluateLocalGradientThreadParameters*>( args );
00270
00271 Self* This = threadParameters->thisObject;
00272 const Self* ThisConst = This;
00273 const size_t threadID = threadParameters->ThisThreadIndex;
00274
00275 const size_t numberOfXforms = ThisConst->m_XformVector.size();
00276 const size_t parametersPerXform = ThisConst->m_ParametersPerXform;
00277 const size_t paramVectorDim = ThisConst->ParamVectorDim();
00278
00279 const byte paddingValue = ThisConst->m_PaddingValue;
00280 const size_t imagesFrom = ThisConst->m_ActiveImagesFrom;
00281 const size_t imagesTo = ThisConst->m_ActiveImagesTo;
00282 const size_t numberOfImages = imagesTo - imagesFrom;
00283 const size_t numberOfImagesIncludingTemplate = ThisConst->m_UseTemplateData ? numberOfImages+1 : numberOfImages;
00284
00285 Self::StaticThreadStorage* threadStorage = &(This->m_StaticThreadStorage[threadID]);
00286 if ( threadStorage->m_NeedToCopyXformParameters )
00287 {
00288 for ( size_t xi = 0; xi < numberOfXforms; ++xi )
00289 {
00290 threadStorage->m_Xforms[xi]->CopyParamVector( This->GetXformByIndex(xi) );
00291 threadStorage->m_NeedToCopyXformParameters = false;
00292 }
00293 }
00294
00295 const UniformVolume* templateGrid = ThisConst->m_TemplateGrid;
00296 const size_t numberOfControlPoints = ThisConst->m_ParametersPerXform / 3;
00297
00298 size_t cpIndex;
00299
00300 while ( true )
00301 {
00302 This->m_ThreadWorkSemaphore.Wait();
00303
00304 This->m_ControlPointIndexLock.Lock();
00305 while ( (cpIndex = This->m_ControlPointIndexNext) < This->m_ControlPointIndexLast )
00306 {
00307 ++This->m_ControlPointIndexNext;
00308 This->m_ControlPointIndexLock.Unlock();
00309
00310 if ( !(cpIndex % 1000) )
00311 {
00312 std::cerr << cpIndex << " / " << numberOfControlPoints << "\r";
00313 }
00314
00315 std::vector<DataGrid::RegionType>::const_iterator voi = ThisConst->m_VolumeOfInfluenceArray.begin() + cpIndex;
00316 const size_t pixelsPerLineVOI = (voi->To()[0]-voi->From()[0]);
00317
00318 std::fill( threadStorage->m_FPlus.begin(), threadStorage->m_FPlus.end(), 0 );
00319 std::fill( threadStorage->m_FMinus.begin(), threadStorage->m_FMinus.end(), 0 );
00320 std::fill( threadStorage->m_CountByParameterPlus.begin(), threadStorage->m_CountByParameterPlus.end(), 0 );
00321 std::fill( threadStorage->m_CountByParameterMinus.begin(), threadStorage->m_CountByParameterMinus.end(), 0 );
00322
00323 for ( int z = voi->From()[2]; (z < voi->To()[2]); ++z )
00324 {
00325 for ( int y = voi->From()[1]; (y < voi->To()[1]); ++y )
00326 {
00327
00328 const size_t rowofs = templateGrid->GetOffsetFromIndex( voi->From()[0], y, z );
00329
00330 size_t ofs = rowofs;
00331 for ( size_t x = 0; x < pixelsPerLineVOI; ++x, ++ofs )
00332 {
00333 threadStorage->m_Histogram[x].Reset();
00334 threadStorage->m_Count[x] = 0;
00335
00336 const size_t kernelIdx = ThisConst->m_StandardDeviationByPixel[ofs];
00337 const size_t kernelRadius = ThisConst->m_HistogramKernelRadius[kernelIdx];
00338 const HistogramBinType* kernel = ThisConst->m_HistogramKernel[kernelIdx];
00339
00340 if ( ThisConst->m_UseTemplateData )
00341 {
00342 const byte templateValue = ThisConst->m_TemplateData[ofs];
00343 if ( templateValue != paddingValue )
00344 {
00345 threadStorage->m_Histogram[x].AddWeightedSymmetricKernel( templateValue, kernelRadius, kernel );
00346 ++threadStorage->m_Count[x];
00347 }
00348 }
00349
00350 for ( size_t img = imagesFrom; img < imagesTo; ++img )
00351 {
00352 const byte dataThisPixel = ThisConst->m_Data[img][ofs];
00353 if ( dataThisPixel != paddingValue )
00354 {
00355 threadStorage->m_Histogram[x].AddWeightedSymmetricKernel( dataThisPixel, kernelRadius, kernel );
00356 ++threadStorage->m_Count[x];
00357 }
00358 }
00359 }
00360
00361 size_t cparam = 3 * cpIndex;
00362 size_t currentParameter = 0;
00363 for ( size_t imageIdx = 0; imageIdx < numberOfXforms; ++imageIdx, cparam += parametersPerXform )
00364 {
00365 SplineWarpXform::SmartPtr xform = threadStorage->m_Xforms[imageIdx];
00366 const UniformVolume* target = ThisConst->m_ImageVector[imageIdx];
00367 const byte* dataPtr = static_cast<const byte*>( target->GetData()->GetDataPtr() );
00368
00369 for ( size_t dim = 0; dim < 3; ++dim, ++currentParameter )
00370 {
00371 const size_t cdparam = cparam + dim;
00372 const size_t xfparam = 3 * cpIndex + dim;
00373 const Types::Coordinate pStep = ThisConst->m_ParamStepArray[cdparam] * threadParameters->m_Step;
00374
00375 if ( pStep > 0 )
00376 {
00377 const Types::Coordinate v0 = xform->GetParameter( xfparam );
00378 for ( int delta = 0; delta < 2; ++delta )
00379 {
00380 Types::Coordinate vTest = v0 + (2*delta-1) * pStep;
00381 xform->SetParameter( xfparam, vTest );
00382
00383 xform->GetTransformedGridRow( pixelsPerLineVOI, &(threadStorage->m_VectorList[0]), voi->From()[0], y, z );
00384
00385 size_t ofs = rowofs;
00386 for ( size_t x = 0; x < pixelsPerLineVOI; ++x, ++ofs )
00387 {
00388 if ( threadStorage->m_Count[x] == numberOfImages )
00389 {
00390 const byte baselineData = ThisConst->m_Data[imageIdx][ofs];
00391 HistogramType& workingHistogram = threadStorage->m_Histogram[x];
00392
00393 const size_t kernelIdx = ThisConst->m_StandardDeviationByPixel[ofs];
00394 const size_t kernelRadius = ThisConst->m_HistogramKernelRadius[kernelIdx];
00395 const HistogramBinType* kernel = ThisConst->m_HistogramKernel[kernelIdx];
00396
00397 byte value;
00398 if ( (baselineData != paddingValue) && target->ProbeData( value, dataPtr, threadStorage->m_VectorList[x] ) )
00399 {
00400 if ( value != paddingValue )
00401 {
00402 if ( delta )
00403 ++threadStorage->m_CountByParameterPlus[currentParameter];
00404 else
00405 ++threadStorage->m_CountByParameterMinus[currentParameter];
00406
00407 if ( value != baselineData )
00408 {
00409 float fd = 0;
00410 float invsamples = 1.0 / workingHistogram.SampleCount();
00411
00412 const size_t idxMin = std::min( value, baselineData ) - kernelRadius;
00413 const size_t idxMax = std::max( value, baselineData ) + kernelRadius + 1;
00414
00415 for ( size_t idx = idxMin; idx < idxMax; ++idx )
00416 {
00417 const size_t idxV = abs(idx - value);
00418 const float kernelIn = ( idxV < kernelRadius ) ? kernel[idxV] : 0;
00419
00420 const size_t idxB = abs(idx - baselineData);
00421 const float kernelOut = ( idxB < kernelRadius ) ? kernel[idxB] : 0;
00422
00423 if ( (kernelIn > 0) || (kernelOut > 0) )
00424 {
00425 fd += MathUtil::plogp( invsamples * (workingHistogram[idx] - kernelOut + kernelIn) ) - MathUtil::plogp( invsamples * workingHistogram[idx] );
00426 }
00427 }
00428
00429 if ( delta )
00430 threadStorage->m_FPlus[currentParameter] += fd;
00431 else
00432 threadStorage->m_FMinus[currentParameter] += fd;
00433 }
00434 }
00435 }
00436 }
00437 }
00438 }
00439 xform->SetParameter( xfparam, v0 );
00440 }
00441 }
00442 }
00443 }
00444 }
00445
00446
00447 size_t currentParameterIdxLinear = 3 * numberOfXforms * (cpIndex - threadParameters->m_FirstIndexToCompute );
00448 size_t imageDimIdx = 0;
00449 for ( size_t cparam = 3*cpIndex; cparam < paramVectorDim; cparam += parametersPerXform )
00450 {
00451 for ( size_t dim = 0; dim < 3; ++dim, ++imageDimIdx, ++currentParameterIdxLinear )
00452 {
00453 threadParameters->m_Gradient[currentParameterIdxLinear] = 0.0;
00454 assert( imageDimIdx < 3 * numberOfXforms );
00455 if ( threadStorage->m_CountByParameterPlus[imageDimIdx] && threadStorage->m_CountByParameterMinus[imageDimIdx] )
00456 {
00457 double upper = threadStorage->m_FPlus[imageDimIdx] / threadStorage->m_CountByParameterPlus[imageDimIdx];
00458 double lower = threadStorage->m_FMinus[imageDimIdx] / threadStorage->m_CountByParameterMinus[imageDimIdx];
00459
00460 if ( (ThisConst->m_JacobianConstraintWeight > 0) || (ThisConst->m_BendingEnergyWeight > 0) )
00461 {
00462 const size_t cdparam = cparam+dim;
00463 const size_t warpParam = cdparam % parametersPerXform;
00464 const size_t imageIdx = cdparam / parametersPerXform;
00465 const SplineWarpXform* xform = ThisConst->GetXformByIndex(imageIdx);
00466 const Types::Coordinate pStep = ThisConst->m_ParamStepArray[cdparam] * threadParameters->m_Step;
00467
00468 if ( ThisConst->m_JacobianConstraintWeight > 0 )
00469 {
00470 double upperJ, lowerJ;
00471 xform->GetJacobianConstraintDerivative( upperJ, lowerJ, warpParam, voi[warpParam], pStep );
00472
00473 upper -= ThisConst->m_JacobianConstraintWeight * upperJ;
00474 lower -= ThisConst->m_JacobianConstraintWeight * lowerJ;
00475 }
00476
00477 if ( ThisConst->m_BendingEnergyWeight > 0 )
00478 {
00479 double upperE, lowerE;
00480 xform->GetGridEnergyDerivative( upperE, lowerE, warpParam, pStep );
00481 upper -= ThisConst->m_BendingEnergyWeight * upperE;
00482 lower -= ThisConst->m_BendingEnergyWeight * lowerE;
00483 }
00484 }
00485
00486 if ( (upper > 0) || (lower > 0))
00487 {
00488 threadParameters->m_Gradient[currentParameterIdxLinear] = upper - lower;
00489 }
00490 }
00491 }
00492 }
00493
00494 This->m_ControlPointIndexLock.Lock();
00495 }
00496 This->m_ControlPointIndexLock.Unlock();
00497
00498 This->m_ThreadReadySemaphore.Post();
00499 }
00500
00501 return CMTK_THREAD_RETURN_VALUE;
00502 }
00503
00504 }
00505
00506 #endif // #ifdef CMTK_BUILD_MPI