20 likes | 96 Views
for( i = start_stage ; i < cascade->count; i ++ ) { double stage_sum = 0; for( j = 0; j < cascade-> stage_classifier [ i ].count; j++ ) { int idx = 0; do {
E N D
for( i = start_stage; i < cascade->count; i++ ) { double stage_sum = 0; for( j = 0; j < cascade->stage_classifier[i].count; j++ ) { int idx = 0; do { CvHidHaarTreeNode* node = (cascade->stage_classifier[i].classifier + j)->node + idx; double t = node->threshold * variance_norm_factor; double sum = calc_sum(node->feature.rect[0],p_offset) * node->feature.rect[0].weight; sum += calc_sum(node->feature.rect[1],p_offset) * node->feature.rect[1].weight; if ( node->feature.rect[2].p0 ) { sum += calc_sum(node->feature.rect[2],p_offset) * node->feature.rect[2].weight; } idx = sum < t ? node->left : node->right; } while( idx > 0 ); stage_sum+= (cascade->stage_classifier[i].classifier + j)->alpha[-idx]; } if (stage_sum < cascade->stage_classifier[i].threshold) { result = -i; EXIT; } } #define calc_sum(rect,offset) \ ((rect).p0[offset] - (rect).p1[offset] - (rect).p2[offset] + (rect).p3[offset]) 54 load/store 29 add/sub/mul
Summary • Ratio of Mem/Non-Mem operations too high 54/29 • Irregular memory access pattern • Inter- and intra-iteration chain of data dependencies