12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718 |
- #include "opencv2/core.hpp"
- #include "opencv2/core/utility.hpp"
- using cv::Size;
- using cv::Mat;
- using cv::Point;
- using cv::FileStorage;
- using cv::Rect;
- using cv::Ptr;
- using cv::FileNode;
- using cv::Mat_;
- using cv::Range;
- using cv::FileNodeIterator;
- using cv::ParallelLoopBody;
- using cv::Size;
- using cv::Mat;
- using cv::Point;
- using cv::FileStorage;
- using cv::Rect;
- using cv::Ptr;
- using cv::FileNode;
- using cv::Mat_;
- using cv::Range;
- using cv::FileNodeIterator;
- using cv::ParallelLoopBody;
- #include "boost.h"
- #include "cascadeclassifier.h"
- #include <queue>
- #include "cvconfig.h"
- using namespace std;
- static inline double
- logRatio( double val )
- {
- const double eps = 1e-5;
- val = max( val, eps );
- val = min( val, 1. - eps );
- return log( val/(1. - val) );
- }
- template<typename T, typename Idx>
- class LessThanIdx
- {
- public:
- LessThanIdx( const T* _arr ) : arr(_arr) {}
- bool operator()(Idx a, Idx b) const { return arr[a] < arr[b]; }
- const T* arr;
- };
- static inline int cvAlign( int size, int align )
- {
- CV_DbgAssert( (align & (align-1)) == 0 && size < INT_MAX );
- return (size + align - 1) & -align;
- }
- #define CV_THRESHOLD_EPS (0.00001F)
- static const int MinBlockSize = 1 << 16;
- static const int BlockSizeDelta = 1 << 10;
- // TODO remove this code duplication with ml/precomp.hpp
- static int CV_CDECL icvCmpIntegers( const void* a, const void* b )
- {
- return *(const int*)a - *(const int*)b;
- }
- static CvMat* cvPreprocessIndexArray( const CvMat* idx_arr, int data_arr_size, bool check_for_duplicates=false )
- {
- CvMat* idx = 0;
- CV_FUNCNAME( "cvPreprocessIndexArray" );
- __CV_BEGIN__;
- int i, idx_total, idx_selected = 0, step, type, prev = INT_MIN, is_sorted = 1;
- uchar* srcb = 0;
- int* srci = 0;
- int* dsti;
- if( !CV_IS_MAT(idx_arr) )
- CV_ERROR( CV_StsBadArg, "Invalid index array" );
- if( idx_arr->rows != 1 && idx_arr->cols != 1 )
- CV_ERROR( CV_StsBadSize, "the index array must be 1-dimensional" );
- idx_total = idx_arr->rows + idx_arr->cols - 1;
- srcb = idx_arr->data.ptr;
- srci = idx_arr->data.i;
- type = CV_MAT_TYPE(idx_arr->type);
- step = CV_IS_MAT_CONT(idx_arr->type) ? 1 : idx_arr->step/CV_ELEM_SIZE(type);
- switch( type )
- {
- case CV_8UC1:
- case CV_8SC1:
- // idx_arr is array of 1's and 0's -
- // i.e. it is a mask of the selected components
- if( idx_total != data_arr_size )
- CV_ERROR( CV_StsUnmatchedSizes,
- "Component mask should contain as many elements as the total number of input variables" );
- for( i = 0; i < idx_total; i++ )
- idx_selected += srcb[i*step] != 0;
- if( idx_selected == 0 )
- CV_ERROR( CV_StsOutOfRange, "No components/input_variables is selected!" );
- break;
- case CV_32SC1:
- // idx_arr is array of integer indices of selected components
- if( idx_total > data_arr_size )
- CV_ERROR( CV_StsOutOfRange,
- "index array may not contain more elements than the total number of input variables" );
- idx_selected = idx_total;
- // check if sorted already
- for( i = 0; i < idx_total; i++ )
- {
- int val = srci[i*step];
- if( val >= prev )
- {
- is_sorted = 0;
- break;
- }
- prev = val;
- }
- break;
- default:
- CV_ERROR( CV_StsUnsupportedFormat, "Unsupported index array data type "
- "(it should be 8uC1, 8sC1 or 32sC1)" );
- }
- CV_CALL( idx = cvCreateMat( 1, idx_selected, CV_32SC1 ));
- dsti = idx->data.i;
- if( type < CV_32SC1 )
- {
- for( i = 0; i < idx_total; i++ )
- if( srcb[i*step] )
- *dsti++ = i;
- }
- else
- {
- for( i = 0; i < idx_total; i++ )
- dsti[i] = srci[i*step];
- if( !is_sorted )
- qsort( dsti, idx_total, sizeof(dsti[0]), icvCmpIntegers );
- if( dsti[0] < 0 || dsti[idx_total-1] >= data_arr_size )
- CV_ERROR( CV_StsOutOfRange, "the index array elements are out of range" );
- if( check_for_duplicates )
- {
- for( i = 1; i < idx_total; i++ )
- if( dsti[i] <= dsti[i-1] )
- CV_ERROR( CV_StsBadArg, "There are duplicated index array elements" );
- }
- }
- __CV_END__;
- if( cvGetErrStatus() < 0 )
- cvReleaseMat( &idx );
- return idx;
- }
- //----------------------------- CascadeBoostParams -------------------------------------------------
- CvCascadeBoostParams::CvCascadeBoostParams() : minHitRate( 0.995F), maxFalseAlarm( 0.5F )
- {
- boost_type = CvBoost::GENTLE;
- use_surrogates = use_1se_rule = truncate_pruned_tree = false;
- }
- CvCascadeBoostParams::CvCascadeBoostParams( int _boostType,
- float _minHitRate, float _maxFalseAlarm,
- double _weightTrimRate, int _maxDepth, int _maxWeakCount ) :
- CvBoostParams( _boostType, _maxWeakCount, _weightTrimRate, _maxDepth, false, 0 )
- {
- boost_type = CvBoost::GENTLE;
- minHitRate = _minHitRate;
- maxFalseAlarm = _maxFalseAlarm;
- use_surrogates = use_1se_rule = truncate_pruned_tree = false;
- }
- void CvCascadeBoostParams::write( FileStorage &fs ) const
- {
- string boostTypeStr = boost_type == CvBoost::DISCRETE ? CC_DISCRETE_BOOST :
- boost_type == CvBoost::REAL ? CC_REAL_BOOST :
- boost_type == CvBoost::LOGIT ? CC_LOGIT_BOOST :
- boost_type == CvBoost::GENTLE ? CC_GENTLE_BOOST : string();
- CV_Assert( !boostTypeStr.empty() );
- fs << CC_BOOST_TYPE << boostTypeStr;
- fs << CC_MINHITRATE << minHitRate;
- fs << CC_MAXFALSEALARM << maxFalseAlarm;
- fs << CC_TRIM_RATE << weight_trim_rate;
- fs << CC_MAX_DEPTH << max_depth;
- fs << CC_WEAK_COUNT << weak_count;
- }
- bool CvCascadeBoostParams::read( const FileNode &node )
- {
- string boostTypeStr;
- FileNode rnode = node[CC_BOOST_TYPE];
- rnode >> boostTypeStr;
- boost_type = !boostTypeStr.compare( CC_DISCRETE_BOOST ) ? CvBoost::DISCRETE :
- !boostTypeStr.compare( CC_REAL_BOOST ) ? CvBoost::REAL :
- !boostTypeStr.compare( CC_LOGIT_BOOST ) ? CvBoost::LOGIT :
- !boostTypeStr.compare( CC_GENTLE_BOOST ) ? CvBoost::GENTLE : -1;
- if (boost_type == -1)
- CV_Error( CV_StsBadArg, "unsupported Boost type" );
- node[CC_MINHITRATE] >> minHitRate;
- node[CC_MAXFALSEALARM] >> maxFalseAlarm;
- node[CC_TRIM_RATE] >> weight_trim_rate ;
- node[CC_MAX_DEPTH] >> max_depth ;
- node[CC_WEAK_COUNT] >> weak_count ;
- if ( minHitRate <= 0 || minHitRate > 1 ||
- maxFalseAlarm <= 0 || maxFalseAlarm > 1 ||
- weight_trim_rate <= 0 || weight_trim_rate > 1 ||
- max_depth <= 0 || weak_count <= 0 )
- CV_Error( CV_StsBadArg, "bad parameters range");
- return true;
- }
- void CvCascadeBoostParams::printDefaults() const
- {
- cout << "--boostParams--" << endl;
- cout << " [-bt <{" << CC_DISCRETE_BOOST << ", "
- << CC_REAL_BOOST << ", "
- << CC_LOGIT_BOOST ", "
- << CC_GENTLE_BOOST << "(default)}>]" << endl;
- cout << " [-minHitRate <min_hit_rate> = " << minHitRate << ">]" << endl;
- cout << " [-maxFalseAlarmRate <max_false_alarm_rate = " << maxFalseAlarm << ">]" << endl;
- cout << " [-weightTrimRate <weight_trim_rate = " << weight_trim_rate << ">]" << endl;
- cout << " [-maxDepth <max_depth_of_weak_tree = " << max_depth << ">]" << endl;
- cout << " [-maxWeakCount <max_weak_tree_count = " << weak_count << ">]" << endl;
- }
- void CvCascadeBoostParams::printAttrs() const
- {
- string boostTypeStr = boost_type == CvBoost::DISCRETE ? CC_DISCRETE_BOOST :
- boost_type == CvBoost::REAL ? CC_REAL_BOOST :
- boost_type == CvBoost::LOGIT ? CC_LOGIT_BOOST :
- boost_type == CvBoost::GENTLE ? CC_GENTLE_BOOST : string();
- CV_Assert( !boostTypeStr.empty() );
- cout << "boostType: " << boostTypeStr << endl;
- cout << "minHitRate: " << minHitRate << endl;
- cout << "maxFalseAlarmRate: " << maxFalseAlarm << endl;
- cout << "weightTrimRate: " << weight_trim_rate << endl;
- cout << "maxDepth: " << max_depth << endl;
- cout << "maxWeakCount: " << weak_count << endl;
- }
- bool CvCascadeBoostParams::scanAttr( const string prmName, const string val)
- {
- bool res = true;
- if( !prmName.compare( "-bt" ) )
- {
- boost_type = !val.compare( CC_DISCRETE_BOOST ) ? CvBoost::DISCRETE :
- !val.compare( CC_REAL_BOOST ) ? CvBoost::REAL :
- !val.compare( CC_LOGIT_BOOST ) ? CvBoost::LOGIT :
- !val.compare( CC_GENTLE_BOOST ) ? CvBoost::GENTLE : -1;
- if (boost_type == -1)
- res = false;
- }
- else if( !prmName.compare( "-minHitRate" ) )
- {
- minHitRate = (float) atof( val.c_str() );
- }
- else if( !prmName.compare( "-maxFalseAlarmRate" ) )
- {
- maxFalseAlarm = (float) atof( val.c_str() );
- }
- else if( !prmName.compare( "-weightTrimRate" ) )
- {
- weight_trim_rate = (float) atof( val.c_str() );
- }
- else if( !prmName.compare( "-maxDepth" ) )
- {
- max_depth = atoi( val.c_str() );
- }
- else if( !prmName.compare( "-maxWeakCount" ) )
- {
- weak_count = atoi( val.c_str() );
- }
- else
- res = false;
- return res;
- }
- CvDTreeNode* CvCascadeBoostTrainData::subsample_data( const CvMat* _subsample_idx )
- {
- CvDTreeNode* root = 0;
- CvMat* isubsample_idx = 0;
- CvMat* subsample_co = 0;
- bool isMakeRootCopy = true;
- if( !data_root )
- CV_Error( CV_StsError, "No training data has been set" );
- if( _subsample_idx )
- {
- CV_Assert( (isubsample_idx = cvPreprocessIndexArray( _subsample_idx, sample_count )) != 0 );
- if( isubsample_idx->cols + isubsample_idx->rows - 1 == sample_count )
- {
- const int* sidx = isubsample_idx->data.i;
- for( int i = 0; i < sample_count; i++ )
- {
- if( sidx[i] != i )
- {
- isMakeRootCopy = false;
- break;
- }
- }
- }
- else
- isMakeRootCopy = false;
- }
- if( isMakeRootCopy )
- {
- // make a copy of the root node
- CvDTreeNode temp;
- int i;
- root = new_node( 0, 1, 0, 0 );
- temp = *root;
- *root = *data_root;
- root->num_valid = temp.num_valid;
- if( root->num_valid )
- {
- for( i = 0; i < var_count; i++ )
- root->num_valid[i] = data_root->num_valid[i];
- }
- root->cv_Tn = temp.cv_Tn;
- root->cv_node_risk = temp.cv_node_risk;
- root->cv_node_error = temp.cv_node_error;
- }
- else
- {
- int* sidx = isubsample_idx->data.i;
- // co - array of count/offset pairs (to handle duplicated values in _subsample_idx)
- int* co, cur_ofs = 0;
- int workVarCount = get_work_var_count();
- int count = isubsample_idx->rows + isubsample_idx->cols - 1;
- root = new_node( 0, count, 1, 0 );
- CV_Assert( (subsample_co = cvCreateMat( 1, sample_count*2, CV_32SC1 )) != 0);
- cvZero( subsample_co );
- co = subsample_co->data.i;
- for( int i = 0; i < count; i++ )
- co[sidx[i]*2]++;
- for( int i = 0; i < sample_count; i++ )
- {
- if( co[i*2] )
- {
- co[i*2+1] = cur_ofs;
- cur_ofs += co[i*2];
- }
- else
- co[i*2+1] = -1;
- }
- cv::AutoBuffer<uchar> inn_buf(sample_count*(2*sizeof(int) + sizeof(float)));
- // subsample ordered variables
- for( int vi = 0; vi < numPrecalcIdx; vi++ )
- {
- int ci = get_var_type(vi);
- CV_Assert( ci < 0 );
- int *src_idx_buf = (int*)inn_buf.data();
- float *src_val_buf = (float*)(src_idx_buf + sample_count);
- int* sample_indices_buf = (int*)(src_val_buf + sample_count);
- const int* src_idx = 0;
- const float* src_val = 0;
- get_ord_var_data( data_root, vi, src_val_buf, src_idx_buf, &src_val, &src_idx, sample_indices_buf );
- int j = 0, idx, count_i;
- int num_valid = data_root->get_num_valid(vi);
- CV_Assert( num_valid == sample_count );
- if (is_buf_16u)
- {
- unsigned short* udst_idx = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
- (size_t)vi*sample_count + data_root->offset);
- for( int i = 0; i < num_valid; i++ )
- {
- idx = src_idx[i];
- count_i = co[idx*2];
- if( count_i )
- for( cur_ofs = co[idx*2+1]; count_i > 0; count_i--, j++, cur_ofs++ )
- udst_idx[j] = (unsigned short)cur_ofs;
- }
- }
- else
- {
- int* idst_idx = buf->data.i + root->buf_idx*get_length_subbuf() +
- (size_t)vi*sample_count + root->offset;
- for( int i = 0; i < num_valid; i++ )
- {
- idx = src_idx[i];
- count_i = co[idx*2];
- if( count_i )
- for( cur_ofs = co[idx*2+1]; count_i > 0; count_i--, j++, cur_ofs++ )
- idst_idx[j] = cur_ofs;
- }
- }
- }
- // subsample cv_lables
- const int* src_lbls = get_cv_labels(data_root, (int*)inn_buf.data());
- if (is_buf_16u)
- {
- unsigned short* udst = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
- (size_t)(workVarCount-1)*sample_count + root->offset);
- for( int i = 0; i < count; i++ )
- udst[i] = (unsigned short)src_lbls[sidx[i]];
- }
- else
- {
- int* idst = buf->data.i + root->buf_idx*get_length_subbuf() +
- (size_t)(workVarCount-1)*sample_count + root->offset;
- for( int i = 0; i < count; i++ )
- idst[i] = src_lbls[sidx[i]];
- }
- // subsample sample_indices
- const int* sample_idx_src = get_sample_indices(data_root, (int*)inn_buf.data());
- if (is_buf_16u)
- {
- unsigned short* sample_idx_dst = (unsigned short*)(buf->data.s + root->buf_idx*get_length_subbuf() +
- (size_t)workVarCount*sample_count + root->offset);
- for( int i = 0; i < count; i++ )
- sample_idx_dst[i] = (unsigned short)sample_idx_src[sidx[i]];
- }
- else
- {
- int* sample_idx_dst = buf->data.i + root->buf_idx*get_length_subbuf() +
- (size_t)workVarCount*sample_count + root->offset;
- for( int i = 0; i < count; i++ )
- sample_idx_dst[i] = sample_idx_src[sidx[i]];
- }
- for( int vi = 0; vi < var_count; vi++ )
- root->set_num_valid(vi, count);
- }
- cvReleaseMat( &isubsample_idx );
- cvReleaseMat( &subsample_co );
- return root;
- }
- //---------------------------- CascadeBoostTrainData -----------------------------
- CvCascadeBoostTrainData::CvCascadeBoostTrainData( const CvFeatureEvaluator* _featureEvaluator,
- const CvDTreeParams& _params )
- {
- is_classifier = true;
- var_all = var_count = (int)_featureEvaluator->getNumFeatures();
- featureEvaluator = _featureEvaluator;
- shared = true;
- set_params( _params );
- max_c_count = MAX( 2, featureEvaluator->getMaxCatCount() );
- var_type = cvCreateMat( 1, var_count + 2, CV_32SC1 );
- if ( featureEvaluator->getMaxCatCount() > 0 )
- {
- numPrecalcIdx = 0;
- cat_var_count = var_count;
- ord_var_count = 0;
- for( int vi = 0; vi < var_count; vi++ )
- {
- var_type->data.i[vi] = vi;
- }
- }
- else
- {
- cat_var_count = 0;
- ord_var_count = var_count;
- for( int vi = 1; vi <= var_count; vi++ )
- {
- var_type->data.i[vi-1] = -vi;
- }
- }
- var_type->data.i[var_count] = cat_var_count;
- var_type->data.i[var_count+1] = cat_var_count+1;
- int maxSplitSize = cvAlign(sizeof(CvDTreeSplit) + (MAX(0,max_c_count - 33)/32)*sizeof(int),sizeof(void*));
- int treeBlockSize = MAX((int)sizeof(CvDTreeNode)*8, maxSplitSize);
- treeBlockSize = MAX(treeBlockSize + BlockSizeDelta, MinBlockSize);
- tree_storage = cvCreateMemStorage( treeBlockSize );
- node_heap = cvCreateSet( 0, sizeof(node_heap[0]), sizeof(CvDTreeNode), tree_storage );
- split_heap = cvCreateSet( 0, sizeof(split_heap[0]), maxSplitSize, tree_storage );
- }
- CvCascadeBoostTrainData::CvCascadeBoostTrainData( const CvFeatureEvaluator* _featureEvaluator,
- int _numSamples,
- int _precalcValBufSize, int _precalcIdxBufSize,
- const CvDTreeParams& _params )
- {
- setData( _featureEvaluator, _numSamples, _precalcValBufSize, _precalcIdxBufSize, _params );
- }
- void CvCascadeBoostTrainData::setData( const CvFeatureEvaluator* _featureEvaluator,
- int _numSamples,
- int _precalcValBufSize, int _precalcIdxBufSize,
- const CvDTreeParams& _params )
- {
- int* idst = 0;
- unsigned short* udst = 0;
- uint64 effective_buf_size = 0;
- int effective_buf_height = 0, effective_buf_width = 0;
- clear();
- shared = true;
- have_labels = true;
- have_priors = false;
- is_classifier = true;
- rng = &cv::theRNG();
- set_params( _params );
- CV_Assert( _featureEvaluator );
- featureEvaluator = _featureEvaluator;
- max_c_count = MAX( 2, featureEvaluator->getMaxCatCount() );
- _resp = cvMat(featureEvaluator->getCls());
- responses = &_resp;
- // TODO: check responses: elements must be 0 or 1
- if( _precalcValBufSize < 0 || _precalcIdxBufSize < 0)
- CV_Error( CV_StsOutOfRange, "_numPrecalcVal and _numPrecalcIdx must be positive or 0" );
- var_count = var_all = featureEvaluator->getNumFeatures() * featureEvaluator->getFeatureSize();
- sample_count = _numSamples;
- is_buf_16u = false;
- if (sample_count < 65536)
- is_buf_16u = true;
- numPrecalcVal = min( cvRound((double)_precalcValBufSize*1048576. / (sizeof(float)*sample_count)), var_count );
- numPrecalcIdx = min( cvRound((double)_precalcIdxBufSize*1048576. /
- ((is_buf_16u ? sizeof(unsigned short) : sizeof (int))*sample_count)), var_count );
- assert( numPrecalcIdx >= 0 && numPrecalcVal >= 0 );
- valCache.create( numPrecalcVal, sample_count, CV_32FC1 );
- var_type = cvCreateMat( 1, var_count + 2, CV_32SC1 );
- if ( featureEvaluator->getMaxCatCount() > 0 )
- {
- numPrecalcIdx = 0;
- cat_var_count = var_count;
- ord_var_count = 0;
- for( int vi = 0; vi < var_count; vi++ )
- {
- var_type->data.i[vi] = vi;
- }
- }
- else
- {
- cat_var_count = 0;
- ord_var_count = var_count;
- for( int vi = 1; vi <= var_count; vi++ )
- {
- var_type->data.i[vi-1] = -vi;
- }
- }
- var_type->data.i[var_count] = cat_var_count;
- var_type->data.i[var_count+1] = cat_var_count+1;
- work_var_count = ( cat_var_count ? 0 : numPrecalcIdx ) + 1/*cv_lables*/;
- buf_count = 2;
- buf_size = -1; // the member buf_size is obsolete
- effective_buf_size = (uint64)(work_var_count + 1)*(uint64)sample_count * buf_count; // this is the total size of "CvMat buf" to be allocated
- effective_buf_width = sample_count;
- effective_buf_height = work_var_count+1;
- if (effective_buf_width >= effective_buf_height)
- effective_buf_height *= buf_count;
- else
- effective_buf_width *= buf_count;
- if ((uint64)effective_buf_width * (uint64)effective_buf_height != effective_buf_size)
- {
- CV_Error(CV_StsBadArg, "The memory buffer cannot be allocated since its size exceeds integer fields limit");
- }
- if ( is_buf_16u )
- buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_16UC1 );
- else
- buf = cvCreateMat( effective_buf_height, effective_buf_width, CV_32SC1 );
- cat_count = cvCreateMat( 1, cat_var_count + 1, CV_32SC1 );
- // precalculate valCache and set indices in buf
- precalculate();
- // now calculate the maximum size of split,
- // create memory storage that will keep nodes and splits of the decision tree
- // allocate root node and the buffer for the whole training data
- int maxSplitSize = cvAlign(sizeof(CvDTreeSplit) +
- (MAX(0,sample_count - 33)/32)*sizeof(int),sizeof(void*));
- int treeBlockSize = MAX((int)sizeof(CvDTreeNode)*8, maxSplitSize);
- treeBlockSize = MAX(treeBlockSize + BlockSizeDelta, MinBlockSize);
- tree_storage = cvCreateMemStorage( treeBlockSize );
- node_heap = cvCreateSet( 0, sizeof(*node_heap), sizeof(CvDTreeNode), tree_storage );
- int nvSize = var_count*sizeof(int);
- nvSize = cvAlign(MAX( nvSize, (int)sizeof(CvSetElem) ), sizeof(void*));
- int tempBlockSize = nvSize;
- tempBlockSize = MAX( tempBlockSize + BlockSizeDelta, MinBlockSize );
- temp_storage = cvCreateMemStorage( tempBlockSize );
- nv_heap = cvCreateSet( 0, sizeof(*nv_heap), nvSize, temp_storage );
- data_root = new_node( 0, sample_count, 0, 0 );
- // set sample labels
- if (is_buf_16u)
- udst = (unsigned short*)(buf->data.s + (size_t)work_var_count*sample_count);
- else
- idst = buf->data.i + (size_t)work_var_count*sample_count;
- for (int si = 0; si < sample_count; si++)
- {
- if (udst)
- udst[si] = (unsigned short)si;
- else
- idst[si] = si;
- }
- for( int vi = 0; vi < var_count; vi++ )
- data_root->set_num_valid(vi, sample_count);
- for( int vi = 0; vi < cat_var_count; vi++ )
- cat_count->data.i[vi] = max_c_count;
- cat_count->data.i[cat_var_count] = 2;
- maxSplitSize = cvAlign(sizeof(CvDTreeSplit) +
- (MAX(0,max_c_count - 33)/32)*sizeof(int),sizeof(void*));
- split_heap = cvCreateSet( 0, sizeof(*split_heap), maxSplitSize, tree_storage );
- priors = cvCreateMat( 1, get_num_classes(), CV_64F );
- cvSet(priors, cvScalar(1));
- priors_mult = cvCloneMat( priors );
- counts = cvCreateMat( 1, get_num_classes(), CV_32SC1 );
- direction = cvCreateMat( 1, sample_count, CV_8UC1 );
- split_buf = cvCreateMat( 1, sample_count, CV_32SC1 );//TODO: make a pointer
- }
- void CvCascadeBoostTrainData::free_train_data()
- {
- CvDTreeTrainData::free_train_data();
- valCache.release();
- }
- const int* CvCascadeBoostTrainData::get_class_labels( CvDTreeNode* n, int* labelsBuf)
- {
- int nodeSampleCount = n->sample_count;
- int rStep = CV_IS_MAT_CONT( responses->type ) ? 1 : responses->step / CV_ELEM_SIZE( responses->type );
- int* sampleIndicesBuf = labelsBuf; //
- const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf);
- for( int si = 0; si < nodeSampleCount; si++ )
- {
- int sidx = sampleIndices[si];
- labelsBuf[si] = (int)responses->data.fl[sidx*rStep];
- }
- return labelsBuf;
- }
- const int* CvCascadeBoostTrainData::get_sample_indices( CvDTreeNode* n, int* indicesBuf )
- {
- return CvDTreeTrainData::get_cat_var_data( n, get_work_var_count(), indicesBuf );
- }
- const int* CvCascadeBoostTrainData::get_cv_labels( CvDTreeNode* n, int* labels_buf )
- {
- return CvDTreeTrainData::get_cat_var_data( n, get_work_var_count() - 1, labels_buf );
- }
- void CvCascadeBoostTrainData::get_ord_var_data( CvDTreeNode* n, int vi, float* ordValuesBuf, int* sortedIndicesBuf,
- const float** ordValues, const int** sortedIndices, int* sampleIndicesBuf )
- {
- int nodeSampleCount = n->sample_count;
- const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf);
- if ( vi < numPrecalcIdx )
- {
- if( !is_buf_16u )
- *sortedIndices = buf->data.i + n->buf_idx*get_length_subbuf() + (size_t)vi*sample_count + n->offset;
- else
- {
- const unsigned short* shortIndices = (const unsigned short*)(buf->data.s + n->buf_idx*get_length_subbuf() +
- (size_t)vi*sample_count + n->offset );
- for( int i = 0; i < nodeSampleCount; i++ )
- sortedIndicesBuf[i] = shortIndices[i];
- *sortedIndices = sortedIndicesBuf;
- }
- if( vi < numPrecalcVal )
- {
- for( int i = 0; i < nodeSampleCount; i++ )
- {
- int idx = (*sortedIndices)[i];
- idx = sampleIndices[idx];
- ordValuesBuf[i] = valCache.at<float>( vi, idx);
- }
- }
- else
- {
- for( int i = 0; i < nodeSampleCount; i++ )
- {
- int idx = (*sortedIndices)[i];
- idx = sampleIndices[idx];
- ordValuesBuf[i] = (*featureEvaluator)( vi, idx);
- }
- }
- }
- else // vi >= numPrecalcIdx
- {
- cv::AutoBuffer<float> abuf(nodeSampleCount);
- float* sampleValues = &abuf[0];
- if ( vi < numPrecalcVal )
- {
- for( int i = 0; i < nodeSampleCount; i++ )
- {
- sortedIndicesBuf[i] = i;
- sampleValues[i] = valCache.at<float>( vi, sampleIndices[i] );
- }
- }
- else
- {
- for( int i = 0; i < nodeSampleCount; i++ )
- {
- sortedIndicesBuf[i] = i;
- sampleValues[i] = (*featureEvaluator)( vi, sampleIndices[i]);
- }
- }
- std::sort(sortedIndicesBuf, sortedIndicesBuf + nodeSampleCount, LessThanIdx<float, int>(&sampleValues[0]) );
- for( int i = 0; i < nodeSampleCount; i++ )
- ordValuesBuf[i] = (&sampleValues[0])[sortedIndicesBuf[i]];
- *sortedIndices = sortedIndicesBuf;
- }
- *ordValues = ordValuesBuf;
- }
- const int* CvCascadeBoostTrainData::get_cat_var_data( CvDTreeNode* n, int vi, int* catValuesBuf )
- {
- int nodeSampleCount = n->sample_count;
- int* sampleIndicesBuf = catValuesBuf; //
- const int* sampleIndices = get_sample_indices(n, sampleIndicesBuf);
- if ( vi < numPrecalcVal )
- {
- for( int i = 0; i < nodeSampleCount; i++ )
- catValuesBuf[i] = (int) valCache.at<float>( vi, sampleIndices[i]);
- }
- else
- {
- if( vi >= numPrecalcVal && vi < var_count )
- {
- for( int i = 0; i < nodeSampleCount; i++ )
- catValuesBuf[i] = (int)(*featureEvaluator)( vi, sampleIndices[i] );
- }
- else
- {
- get_cv_labels( n, catValuesBuf );
- }
- }
- return catValuesBuf;
- }
- float CvCascadeBoostTrainData::getVarValue( int vi, int si )
- {
- if ( vi < numPrecalcVal && !valCache.empty() )
- return valCache.at<float>( vi, si );
- return (*featureEvaluator)( vi, si );
- }
- struct FeatureIdxOnlyPrecalc : ParallelLoopBody
- {
- FeatureIdxOnlyPrecalc( const CvFeatureEvaluator* _featureEvaluator, CvMat* _buf, int _sample_count, bool _is_buf_16u )
- {
- featureEvaluator = _featureEvaluator;
- sample_count = _sample_count;
- udst = (unsigned short*)_buf->data.s;
- idst = _buf->data.i;
- is_buf_16u = _is_buf_16u;
- }
- void operator()( const Range& range ) const
- {
- cv::AutoBuffer<float> valCache(sample_count);
- float* valCachePtr = valCache.data();
- for ( int fi = range.start; fi < range.end; fi++)
- {
- for( int si = 0; si < sample_count; si++ )
- {
- valCachePtr[si] = (*featureEvaluator)( fi, si );
- if ( is_buf_16u )
- *(udst + (size_t)fi*sample_count + si) = (unsigned short)si;
- else
- *(idst + (size_t)fi*sample_count + si) = si;
- }
- if ( is_buf_16u )
- std::sort(udst + (size_t)fi*sample_count, udst + (size_t)(fi + 1)*sample_count, LessThanIdx<float, unsigned short>(valCachePtr) );
- else
- std::sort(idst + (size_t)fi*sample_count, idst + (size_t)(fi + 1)*sample_count, LessThanIdx<float, int>(valCachePtr) );
- }
- }
- const CvFeatureEvaluator* featureEvaluator;
- int sample_count;
- int* idst;
- unsigned short* udst;
- bool is_buf_16u;
- };
- struct FeatureValAndIdxPrecalc : ParallelLoopBody
- {
- FeatureValAndIdxPrecalc( const CvFeatureEvaluator* _featureEvaluator, CvMat* _buf, Mat* _valCache, int _sample_count, bool _is_buf_16u )
- {
- featureEvaluator = _featureEvaluator;
- valCache = _valCache;
- sample_count = _sample_count;
- udst = (unsigned short*)_buf->data.s;
- idst = _buf->data.i;
- is_buf_16u = _is_buf_16u;
- }
- void operator()( const Range& range ) const
- {
- for ( int fi = range.start; fi < range.end; fi++)
- {
- for( int si = 0; si < sample_count; si++ )
- {
- valCache->at<float>(fi,si) = (*featureEvaluator)( fi, si );
- if ( is_buf_16u )
- *(udst + (size_t)fi*sample_count + si) = (unsigned short)si;
- else
- *(idst + (size_t)fi*sample_count + si) = si;
- }
- if ( is_buf_16u )
- std::sort(udst + (size_t)fi*sample_count, udst + (size_t)(fi + 1)*sample_count, LessThanIdx<float, unsigned short>(valCache->ptr<float>(fi)) );
- else
- std::sort(idst + (size_t)fi*sample_count, idst + (size_t)(fi + 1)*sample_count, LessThanIdx<float, int>(valCache->ptr<float>(fi)) );
- }
- }
- const CvFeatureEvaluator* featureEvaluator;
- Mat* valCache;
- int sample_count;
- int* idst;
- unsigned short* udst;
- bool is_buf_16u;
- };
- struct FeatureValOnlyPrecalc : ParallelLoopBody
- {
- FeatureValOnlyPrecalc( const CvFeatureEvaluator* _featureEvaluator, Mat* _valCache, int _sample_count )
- {
- featureEvaluator = _featureEvaluator;
- valCache = _valCache;
- sample_count = _sample_count;
- }
- void operator()( const Range& range ) const
- {
- for ( int fi = range.start; fi < range.end; fi++)
- for( int si = 0; si < sample_count; si++ )
- valCache->at<float>(fi,si) = (*featureEvaluator)( fi, si );
- }
- const CvFeatureEvaluator* featureEvaluator;
- Mat* valCache;
- int sample_count;
- };
- void CvCascadeBoostTrainData::precalculate()
- {
- int minNum = MIN( numPrecalcVal, numPrecalcIdx);
- double proctime = -TIME( 0 );
- parallel_for_( Range(numPrecalcVal, numPrecalcIdx),
- FeatureIdxOnlyPrecalc(featureEvaluator, buf, sample_count, is_buf_16u!=0) );
- parallel_for_( Range(0, minNum),
- FeatureValAndIdxPrecalc(featureEvaluator, buf, &valCache, sample_count, is_buf_16u!=0) );
- parallel_for_( Range(minNum, numPrecalcVal),
- FeatureValOnlyPrecalc(featureEvaluator, &valCache, sample_count) );
- cout << "Precalculation time: " << (proctime + TIME( 0 )) << endl;
- }
- //-------------------------------- CascadeBoostTree ----------------------------------------
- CvDTreeNode* CvCascadeBoostTree::predict( int sampleIdx ) const
- {
- CvDTreeNode* node = root;
- if( !node )
- CV_Error( CV_StsError, "The tree has not been trained yet" );
- if ( ((CvCascadeBoostTrainData*)data)->featureEvaluator->getMaxCatCount() == 0 ) // ordered
- {
- while( node->left )
- {
- CvDTreeSplit* split = node->split;
- float val = ((CvCascadeBoostTrainData*)data)->getVarValue( split->var_idx, sampleIdx );
- node = val <= split->ord.c ? node->left : node->right;
- }
- }
- else // categorical
- {
- while( node->left )
- {
- CvDTreeSplit* split = node->split;
- int c = (int)((CvCascadeBoostTrainData*)data)->getVarValue( split->var_idx, sampleIdx );
- node = CV_DTREE_CAT_DIR(c, split->subset) < 0 ? node->left : node->right;
- }
- }
- return node;
- }
- void CvCascadeBoostTree::write( FileStorage &fs, const Mat& featureMap )
- {
- int maxCatCount = ((CvCascadeBoostTrainData*)data)->featureEvaluator->getMaxCatCount();
- int subsetN = (maxCatCount + 31)/32;
- queue<CvDTreeNode*> internalNodesQueue;
- int size = (int)pow( 2.f, (float)ensemble->get_params().max_depth);
- std::vector<float> leafVals(size);
- int leafValIdx = 0;
- int internalNodeIdx = 1;
- CvDTreeNode* tempNode;
- CV_DbgAssert( root );
- internalNodesQueue.push( root );
- fs << "{";
- fs << CC_INTERNAL_NODES << "[:";
- while (!internalNodesQueue.empty())
- {
- tempNode = internalNodesQueue.front();
- CV_Assert( tempNode->left );
- if ( !tempNode->left->left && !tempNode->left->right) // left node is leaf
- {
- leafVals[-leafValIdx] = (float)tempNode->left->value;
- fs << leafValIdx-- ;
- }
- else
- {
- internalNodesQueue.push( tempNode->left );
- fs << internalNodeIdx++;
- }
- CV_Assert( tempNode->right );
- if ( !tempNode->right->left && !tempNode->right->right) // right node is leaf
- {
- leafVals[-leafValIdx] = (float)tempNode->right->value;
- fs << leafValIdx--;
- }
- else
- {
- internalNodesQueue.push( tempNode->right );
- fs << internalNodeIdx++;
- }
- int fidx = tempNode->split->var_idx;
- fidx = featureMap.empty() ? fidx : featureMap.at<int>(0, fidx);
- fs << fidx;
- if ( !maxCatCount )
- fs << tempNode->split->ord.c;
- else
- for( int i = 0; i < subsetN; i++ )
- fs << tempNode->split->subset[i];
- internalNodesQueue.pop();
- }
- fs << "]"; // CC_INTERNAL_NODES
- fs << CC_LEAF_VALUES << "[:";
- for (int ni = 0; ni < -leafValIdx; ni++)
- fs << leafVals[ni];
- fs << "]"; // CC_LEAF_VALUES
- fs << "}";
- }
- void CvCascadeBoostTree::read( const FileNode &node, CvBoost* _ensemble,
- CvDTreeTrainData* _data )
- {
- int maxCatCount = ((CvCascadeBoostTrainData*)_data)->featureEvaluator->getMaxCatCount();
- int subsetN = (maxCatCount + 31)/32;
- int step = 3 + ( maxCatCount>0 ? subsetN : 1 );
- queue<CvDTreeNode*> internalNodesQueue;
- int internalNodesIdx, leafValsuesIdx;
- CvDTreeNode* prntNode, *cldNode;
- clear();
- data = _data;
- ensemble = _ensemble;
- pruned_tree_idx = 0;
- // read tree nodes
- FileNode rnode = node[CC_INTERNAL_NODES];
- internalNodesIdx = (int) rnode.size() - 1;
- FileNode lnode = node[CC_LEAF_VALUES];
- leafValsuesIdx = (int) lnode.size() - 1;
- for( size_t i = 0; i < rnode.size()/step; i++ )
- {
- prntNode = data->new_node( 0, 0, 0, 0 );
- if ( maxCatCount > 0 )
- {
- prntNode->split = data->new_split_cat( 0, 0 );
- for( int j = subsetN-1; j>=0; j--)
- {
- rnode[internalNodesIdx] >> prntNode->split->subset[j]; --internalNodesIdx;
- }
- }
- else
- {
- float split_value;
- rnode[internalNodesIdx] >> split_value; --internalNodesIdx;
- prntNode->split = data->new_split_ord( 0, split_value, 0, 0, 0);
- }
- rnode[internalNodesIdx] >> prntNode->split->var_idx; --internalNodesIdx;
- int ridx, lidx;
- rnode[internalNodesIdx] >> ridx; --internalNodesIdx;
- rnode[internalNodesIdx] >> lidx; --internalNodesIdx;
- if ( ridx <= 0)
- {
- prntNode->right = cldNode = data->new_node( 0, 0, 0, 0 );
- lnode[leafValsuesIdx] >> cldNode->value; --leafValsuesIdx;
- cldNode->parent = prntNode;
- }
- else
- {
- prntNode->right = internalNodesQueue.front();
- prntNode->right->parent = prntNode;
- internalNodesQueue.pop();
- }
- if ( lidx <= 0)
- {
- prntNode->left = cldNode = data->new_node( 0, 0, 0, 0 );
- lnode[leafValsuesIdx] >> cldNode->value; --leafValsuesIdx;
- cldNode->parent = prntNode;
- }
- else
- {
- prntNode->left = internalNodesQueue.front();
- prntNode->left->parent = prntNode;
- internalNodesQueue.pop();
- }
- internalNodesQueue.push( prntNode );
- }
- root = internalNodesQueue.front();
- internalNodesQueue.pop();
- }
- void CvCascadeBoostTree::split_node_data( CvDTreeNode* node )
- {
- int n = node->sample_count, nl, nr, scount = data->sample_count;
- char* dir = (char*)data->direction->data.ptr;
- CvDTreeNode *left = 0, *right = 0;
- int* newIdx = data->split_buf->data.i;
- int newBufIdx = data->get_child_buf_idx( node );
- int workVarCount = data->get_work_var_count();
- CvMat* buf = data->buf;
- size_t length_buf_row = data->get_length_subbuf();
- cv::AutoBuffer<uchar> inn_buf(n*(3*sizeof(int)+sizeof(float)));
- int* tempBuf = (int*)inn_buf.data();
- bool splitInputData;
- complete_node_dir(node);
- for( int i = nl = nr = 0; i < n; i++ )
- {
- int d = dir[i];
- // initialize new indices for splitting ordered variables
- newIdx[i] = (nl & (d-1)) | (nr & -d); // d ? ri : li
- nr += d;
- nl += d^1;
- }
- node->left = left = data->new_node( node, nl, newBufIdx, node->offset );
- node->right = right = data->new_node( node, nr, newBufIdx, node->offset + nl );
- splitInputData = node->depth + 1 < data->params.max_depth &&
- (node->left->sample_count > data->params.min_sample_count ||
- node->right->sample_count > data->params.min_sample_count);
- // split ordered variables, keep both halves sorted.
- for( int vi = 0; vi < ((CvCascadeBoostTrainData*)data)->numPrecalcIdx; vi++ )
- {
- int ci = data->get_var_type(vi);
- if( ci >= 0 || !splitInputData )
- continue;
- int n1 = node->get_num_valid(vi);
- float *src_val_buf = (float*)(tempBuf + n);
- int *src_sorted_idx_buf = (int*)(src_val_buf + n);
- int *src_sample_idx_buf = src_sorted_idx_buf + n;
- const int* src_sorted_idx = 0;
- const float* src_val = 0;
- data->get_ord_var_data(node, vi, src_val_buf, src_sorted_idx_buf, &src_val, &src_sorted_idx, src_sample_idx_buf);
- for(int i = 0; i < n; i++)
- tempBuf[i] = src_sorted_idx[i];
- if (data->is_buf_16u)
- {
- ushort *ldst, *rdst;
- ldst = (ushort*)(buf->data.s + left->buf_idx*length_buf_row +
- vi*scount + left->offset);
- rdst = (ushort*)(ldst + nl);
- // split sorted
- for( int i = 0; i < n1; i++ )
- {
- int idx = tempBuf[i];
- int d = dir[idx];
- idx = newIdx[idx];
- if (d)
- {
- *rdst = (ushort)idx;
- rdst++;
- }
- else
- {
- *ldst = (ushort)idx;
- ldst++;
- }
- }
- CV_Assert( n1 == n );
- }
- else
- {
- int *ldst, *rdst;
- ldst = buf->data.i + left->buf_idx*length_buf_row +
- vi*scount + left->offset;
- rdst = buf->data.i + right->buf_idx*length_buf_row +
- vi*scount + right->offset;
- // split sorted
- for( int i = 0; i < n1; i++ )
- {
- int idx = tempBuf[i];
- int d = dir[idx];
- idx = newIdx[idx];
- if (d)
- {
- *rdst = idx;
- rdst++;
- }
- else
- {
- *ldst = idx;
- ldst++;
- }
- }
- CV_Assert( n1 == n );
- }
- }
- // split cv_labels using newIdx relocation table
- int *src_lbls_buf = tempBuf + n;
- const int* src_lbls = data->get_cv_labels(node, src_lbls_buf);
- for(int i = 0; i < n; i++)
- tempBuf[i] = src_lbls[i];
- if (data->is_buf_16u)
- {
- unsigned short *ldst = (unsigned short *)(buf->data.s + left->buf_idx*length_buf_row +
- (size_t)(workVarCount-1)*scount + left->offset);
- unsigned short *rdst = (unsigned short *)(buf->data.s + right->buf_idx*length_buf_row +
- (size_t)(workVarCount-1)*scount + right->offset);
- for( int i = 0; i < n; i++ )
- {
- int idx = tempBuf[i];
- if (dir[i])
- {
- *rdst = (unsigned short)idx;
- rdst++;
- }
- else
- {
- *ldst = (unsigned short)idx;
- ldst++;
- }
- }
- }
- else
- {
- int *ldst = buf->data.i + left->buf_idx*length_buf_row +
- (size_t)(workVarCount-1)*scount + left->offset;
- int *rdst = buf->data.i + right->buf_idx*length_buf_row +
- (size_t)(workVarCount-1)*scount + right->offset;
- for( int i = 0; i < n; i++ )
- {
- int idx = tempBuf[i];
- if (dir[i])
- {
- *rdst = idx;
- rdst++;
- }
- else
- {
- *ldst = idx;
- ldst++;
- }
- }
- }
- // split sample indices
- int *sampleIdx_src_buf = tempBuf + n;
- const int* sampleIdx_src = data->get_sample_indices(node, sampleIdx_src_buf);
- for(int i = 0; i < n; i++)
- tempBuf[i] = sampleIdx_src[i];
- if (data->is_buf_16u)
- {
- unsigned short* ldst = (unsigned short*)(buf->data.s + left->buf_idx*length_buf_row +
- (size_t)workVarCount*scount + left->offset);
- unsigned short* rdst = (unsigned short*)(buf->data.s + right->buf_idx*length_buf_row +
- (size_t)workVarCount*scount + right->offset);
- for (int i = 0; i < n; i++)
- {
- unsigned short idx = (unsigned short)tempBuf[i];
- if (dir[i])
- {
- *rdst = idx;
- rdst++;
- }
- else
- {
- *ldst = idx;
- ldst++;
- }
- }
- }
- else
- {
- int* ldst = buf->data.i + left->buf_idx*length_buf_row +
- (size_t)workVarCount*scount + left->offset;
- int* rdst = buf->data.i + right->buf_idx*length_buf_row +
- (size_t)workVarCount*scount + right->offset;
- for (int i = 0; i < n; i++)
- {
- int idx = tempBuf[i];
- if (dir[i])
- {
- *rdst = idx;
- rdst++;
- }
- else
- {
- *ldst = idx;
- ldst++;
- }
- }
- }
- for( int vi = 0; vi < data->var_count; vi++ )
- {
- left->set_num_valid(vi, (int)(nl));
- right->set_num_valid(vi, (int)(nr));
- }
- // deallocate the parent node data that is not needed anymore
- data->free_node_data(node);
- }
- static void auxMarkFeaturesInMap( const CvDTreeNode* node, Mat& featureMap)
- {
- if ( node && node->split )
- {
- featureMap.ptr<int>(0)[node->split->var_idx] = 1;
- auxMarkFeaturesInMap( node->left, featureMap );
- auxMarkFeaturesInMap( node->right, featureMap );
- }
- }
- void CvCascadeBoostTree::markFeaturesInMap( Mat& featureMap )
- {
- auxMarkFeaturesInMap( root, featureMap );
- }
- //----------------------------------- CascadeBoost --------------------------------------
- bool CvCascadeBoost::train( const CvFeatureEvaluator* _featureEvaluator,
- int _numSamples,
- int _precalcValBufSize, int _precalcIdxBufSize,
- const CvCascadeBoostParams& _params )
- {
- bool isTrained = false;
- CV_Assert( !data );
- clear();
- data = new CvCascadeBoostTrainData( _featureEvaluator, _numSamples,
- _precalcValBufSize, _precalcIdxBufSize, _params );
- CvMemStorage *storage = cvCreateMemStorage();
- weak = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvBoostTree*), storage );
- storage = 0;
- set_params( _params );
- if ( (_params.boost_type == LOGIT) || (_params.boost_type == GENTLE) )
- data->do_responses_copy();
- update_weights( 0 );
- cout << "+----+---------+---------+" << endl;
- cout << "| N | HR | FA |" << endl;
- cout << "+----+---------+---------+" << endl;
- do
- {
- CvCascadeBoostTree* tree = new CvCascadeBoostTree;
- if( !tree->train( data, subsample_mask, this ) )
- {
- delete tree;
- break;
- }
- cvSeqPush( weak, &tree );
- update_weights( tree );
- trim_weights();
- if( cvCountNonZero(subsample_mask) == 0 )
- break;
- }
- while( !isErrDesired() && (weak->total < params.weak_count) );
- if(weak->total > 0)
- {
- data->is_classifier = true;
- data->free_train_data();
- isTrained = true;
- }
- else
- clear();
- return isTrained;
- }
- float CvCascadeBoost::predict( int sampleIdx, bool returnSum ) const
- {
- CV_Assert( weak );
- double sum = 0;
- CvSeqReader reader;
- cvStartReadSeq( weak, &reader );
- cvSetSeqReaderPos( &reader, 0 );
- for( int i = 0; i < weak->total; i++ )
- {
- CvBoostTree* wtree;
- CV_READ_SEQ_ELEM( wtree, reader );
- sum += ((CvCascadeBoostTree*)wtree)->predict(sampleIdx)->value;
- }
- if( !returnSum )
- sum = sum < threshold - CV_THRESHOLD_EPS ? 0.0 : 1.0;
- return (float)sum;
- }
- bool CvCascadeBoost::set_params( const CvBoostParams& _params )
- {
- minHitRate = ((CvCascadeBoostParams&)_params).minHitRate;
- maxFalseAlarm = ((CvCascadeBoostParams&)_params).maxFalseAlarm;
- return ( ( minHitRate > 0 ) && ( minHitRate < 1) &&
- ( maxFalseAlarm > 0 ) && ( maxFalseAlarm < 1) &&
- CvBoost::set_params( _params ));
- }
- void CvCascadeBoost::update_weights( CvBoostTree* tree )
- {
- int n = data->sample_count;
- double sumW = 0.;
- int step = 0;
- float* fdata = 0;
- int *sampleIdxBuf;
- const int* sampleIdx = 0;
- int inn_buf_size = ((params.boost_type == LOGIT) || (params.boost_type == GENTLE) ? n*sizeof(int) : 0) +
- ( !tree ? n*sizeof(int) : 0 );
- cv::AutoBuffer<uchar> inn_buf(inn_buf_size);
- uchar* cur_inn_buf_pos = inn_buf.data();
- if ( (params.boost_type == LOGIT) || (params.boost_type == GENTLE) )
- {
- step = CV_IS_MAT_CONT(data->responses_copy->type) ?
- 1 : data->responses_copy->step / CV_ELEM_SIZE(data->responses_copy->type);
- fdata = data->responses_copy->data.fl;
- sampleIdxBuf = (int*)cur_inn_buf_pos; cur_inn_buf_pos = (uchar*)(sampleIdxBuf + n);
- sampleIdx = data->get_sample_indices( data->data_root, sampleIdxBuf );
- }
- CvMat* buf = data->buf;
- size_t length_buf_row = data->get_length_subbuf();
- if( !tree ) // before training the first tree, initialize weights and other parameters
- {
- int* classLabelsBuf = (int*)cur_inn_buf_pos; cur_inn_buf_pos = (uchar*)(classLabelsBuf + n);
- const int* classLabels = data->get_class_labels(data->data_root, classLabelsBuf);
- // in case of logitboost and gentle adaboost each weak tree is a regression tree,
- // so we need to convert class labels to floating-point values
- double w0 = 1./n;
- double p[2] = { 1, 1 };
- cvReleaseMat( &orig_response );
- cvReleaseMat( &sum_response );
- cvReleaseMat( &weak_eval );
- cvReleaseMat( &subsample_mask );
- cvReleaseMat( &weights );
- orig_response = cvCreateMat( 1, n, CV_32S );
- weak_eval = cvCreateMat( 1, n, CV_64F );
- subsample_mask = cvCreateMat( 1, n, CV_8U );
- weights = cvCreateMat( 1, n, CV_64F );
- subtree_weights = cvCreateMat( 1, n + 2, CV_64F );
- if (data->is_buf_16u)
- {
- unsigned short* labels = (unsigned short*)(buf->data.s + data->data_root->buf_idx*length_buf_row +
- data->data_root->offset + (size_t)(data->work_var_count-1)*data->sample_count);
- for( int i = 0; i < n; i++ )
- {
- // save original categorical responses {0,1}, convert them to {-1,1}
- orig_response->data.i[i] = classLabels[i]*2 - 1;
- // make all the samples active at start.
- // later, in trim_weights() deactivate/reactive again some, if need
- subsample_mask->data.ptr[i] = (uchar)1;
- // make all the initial weights the same.
- weights->data.db[i] = w0*p[classLabels[i]];
- // set the labels to find (from within weak tree learning proc)
- // the particular sample weight, and where to store the response.
- labels[i] = (unsigned short)i;
- }
- }
- else
- {
- int* labels = buf->data.i + data->data_root->buf_idx*length_buf_row +
- data->data_root->offset + (size_t)(data->work_var_count-1)*data->sample_count;
- for( int i = 0; i < n; i++ )
- {
- // save original categorical responses {0,1}, convert them to {-1,1}
- orig_response->data.i[i] = classLabels[i]*2 - 1;
- subsample_mask->data.ptr[i] = (uchar)1;
- weights->data.db[i] = w0*p[classLabels[i]];
- labels[i] = i;
- }
- }
- if( params.boost_type == LOGIT )
- {
- sum_response = cvCreateMat( 1, n, CV_64F );
- for( int i = 0; i < n; i++ )
- {
- sum_response->data.db[i] = 0;
- fdata[sampleIdx[i]*step] = orig_response->data.i[i] > 0 ? 2.f : -2.f;
- }
- // in case of logitboost each weak tree is a regression tree.
- // the target function values are recalculated for each of the trees
- data->is_classifier = false;
- }
- else if( params.boost_type == GENTLE )
- {
- for( int i = 0; i < n; i++ )
- fdata[sampleIdx[i]*step] = (float)orig_response->data.i[i];
- data->is_classifier = false;
- }
- }
- else
- {
- // at this moment, for all the samples that participated in the training of the most
- // recent weak classifier we know the responses. For other samples we need to compute them
- if( have_subsample )
- {
- // invert the subsample mask
- cvXorS( subsample_mask, cvScalar(1.), subsample_mask );
- // run tree through all the non-processed samples
- for( int i = 0; i < n; i++ )
- if( subsample_mask->data.ptr[i] )
- {
- weak_eval->data.db[i] = ((CvCascadeBoostTree*)tree)->predict( i )->value;
- }
- }
- // now update weights and other parameters for each type of boosting
- if( params.boost_type == DISCRETE )
- {
- // Discrete AdaBoost:
- // weak_eval[i] (=f(x_i)) is in {-1,1}
- // err = sum(w_i*(f(x_i) != y_i))/sum(w_i)
- // C = log((1-err)/err)
- // w_i *= exp(C*(f(x_i) != y_i))
- double C, err = 0.;
- double scale[] = { 1., 0. };
- for( int i = 0; i < n; i++ )
- {
- double w = weights->data.db[i];
- sumW += w;
- err += w*(weak_eval->data.db[i] != orig_response->data.i[i]);
- }
- if( sumW != 0 )
- err /= sumW;
- C = err = -logRatio( err );
- scale[1] = exp(err);
- sumW = 0;
- for( int i = 0; i < n; i++ )
- {
- double w = weights->data.db[i]*
- scale[weak_eval->data.db[i] != orig_response->data.i[i]];
- sumW += w;
- weights->data.db[i] = w;
- }
- tree->scale( C );
- }
- else if( params.boost_type == REAL )
- {
- // Real AdaBoost:
- // weak_eval[i] = f(x_i) = 0.5*log(p(x_i)/(1-p(x_i))), p(x_i)=P(y=1|x_i)
- // w_i *= exp(-y_i*f(x_i))
- for( int i = 0; i < n; i++ )
- weak_eval->data.db[i] *= -orig_response->data.i[i];
- cvExp( weak_eval, weak_eval );
- for( int i = 0; i < n; i++ )
- {
- double w = weights->data.db[i]*weak_eval->data.db[i];
- sumW += w;
- weights->data.db[i] = w;
- }
- }
- else if( params.boost_type == LOGIT )
- {
- // LogitBoost:
- // weak_eval[i] = f(x_i) in [-z_max,z_max]
- // sum_response = F(x_i).
- // F(x_i) += 0.5*f(x_i)
- // p(x_i) = exp(F(x_i))/(exp(F(x_i)) + exp(-F(x_i))=1/(1+exp(-2*F(x_i)))
- // reuse weak_eval: weak_eval[i] <- p(x_i)
- // w_i = p(x_i)*1(1 - p(x_i))
- // z_i = ((y_i+1)/2 - p(x_i))/(p(x_i)*(1 - p(x_i)))
- // store z_i to the data->data_root as the new target responses
- const double lbWeightThresh = FLT_EPSILON;
- const double lbZMax = 10.;
- for( int i = 0; i < n; i++ )
- {
- double s = sum_response->data.db[i] + 0.5*weak_eval->data.db[i];
- sum_response->data.db[i] = s;
- weak_eval->data.db[i] = -2*s;
- }
- cvExp( weak_eval, weak_eval );
- for( int i = 0; i < n; i++ )
- {
- double p = 1./(1. + weak_eval->data.db[i]);
- double w = p*(1 - p), z;
- w = MAX( w, lbWeightThresh );
- weights->data.db[i] = w;
- sumW += w;
- if( orig_response->data.i[i] > 0 )
- {
- z = 1./p;
- fdata[sampleIdx[i]*step] = (float)min(z, lbZMax);
- }
- else
- {
- z = 1./(1-p);
- fdata[sampleIdx[i]*step] = (float)-min(z, lbZMax);
- }
- }
- }
- else
- {
- // Gentle AdaBoost:
- // weak_eval[i] = f(x_i) in [-1,1]
- // w_i *= exp(-y_i*f(x_i))
- assert( params.boost_type == GENTLE );
- for( int i = 0; i < n; i++ )
- weak_eval->data.db[i] *= -orig_response->data.i[i];
- cvExp( weak_eval, weak_eval );
- for( int i = 0; i < n; i++ )
- {
- double w = weights->data.db[i] * weak_eval->data.db[i];
- weights->data.db[i] = w;
- sumW += w;
- }
- }
- }
- // renormalize weights
- if( sumW > FLT_EPSILON )
- {
- sumW = 1./sumW;
- for( int i = 0; i < n; ++i )
- weights->data.db[i] *= sumW;
- }
- }
- bool CvCascadeBoost::isErrDesired()
- {
- int sCount = data->sample_count,
- numPos = 0, numNeg = 0, numFalse = 0, numPosTrue = 0;
- vector<float> eval(sCount);
- for( int i = 0; i < sCount; i++ )
- if( ((CvCascadeBoostTrainData*)data)->featureEvaluator->getCls( i ) == 1.0F )
- eval[numPos++] = predict( i, true );
- std::sort(&eval[0], &eval[0] + numPos);
- int thresholdIdx = (int)((1.0F - minHitRate) * numPos);
- threshold = eval[ thresholdIdx ];
- numPosTrue = numPos - thresholdIdx;
- for( int i = thresholdIdx - 1; i >= 0; i--)
- if ( abs( eval[i] - threshold) < FLT_EPSILON )
- numPosTrue++;
- float hitRate = ((float) numPosTrue) / ((float) numPos);
- for( int i = 0; i < sCount; i++ )
- {
- if( ((CvCascadeBoostTrainData*)data)->featureEvaluator->getCls( i ) == 0.0F )
- {
- numNeg++;
- if( predict( i ) )
- numFalse++;
- }
- }
- float falseAlarm = ((float) numFalse) / ((float) numNeg);
- cout << "|"; cout.width(4); cout << right << weak->total;
- cout << "|"; cout.width(9); cout << right << hitRate;
- cout << "|"; cout.width(9); cout << right << falseAlarm;
- cout << "|" << endl;
- cout << "+----+---------+---------+" << endl;
- return falseAlarm <= maxFalseAlarm;
- }
- void CvCascadeBoost::write( FileStorage &fs, const Mat& featureMap ) const
- {
- // char cmnt[30];
- CvCascadeBoostTree* weakTree;
- fs << CC_WEAK_COUNT << weak->total;
- fs << CC_STAGE_THRESHOLD << threshold;
- fs << CC_WEAK_CLASSIFIERS << "[";
- for( int wi = 0; wi < weak->total; wi++)
- {
- /*snprintf( cmnt, sizeof(cmnt), "tree %i", wi );
- cvWriteComment( fs, cmnt, 0 );*/
- weakTree = *((CvCascadeBoostTree**) cvGetSeqElem( weak, wi ));
- weakTree->write( fs, featureMap );
- }
- fs << "]";
- }
- bool CvCascadeBoost::read( const FileNode &node,
- const CvFeatureEvaluator* _featureEvaluator,
- const CvCascadeBoostParams& _params )
- {
- CvMemStorage* storage;
- clear();
- data = new CvCascadeBoostTrainData( _featureEvaluator, _params );
- set_params( _params );
- node[CC_STAGE_THRESHOLD] >> threshold;
- FileNode rnode = node[CC_WEAK_CLASSIFIERS];
- storage = cvCreateMemStorage();
- weak = cvCreateSeq( 0, sizeof(CvSeq), sizeof(CvBoostTree*), storage );
- for( FileNodeIterator it = rnode.begin(); it != rnode.end(); it++ )
- {
- CvCascadeBoostTree* tree = new CvCascadeBoostTree();
- tree->read( *it, this, data );
- cvSeqPush( weak, &tree );
- }
- return true;
- }
- void CvCascadeBoost::markUsedFeaturesInMap( Mat& featureMap )
- {
- for( int wi = 0; wi < weak->total; wi++ )
- {
- CvCascadeBoostTree* weakTree = *((CvCascadeBoostTree**) cvGetSeqElem( weak, wi ));
- weakTree->markFeaturesInMap( featureMap );
- }
- }
|