// "buckytree" (c)2007 Bryan McNett. All Rights Reserved. // bmcnett at gmail dot com #include #include #include #include #include #define DIMENSIONS 3 #define DIRECTIONS (DIMENSIONS*2) typedef __m64 SIMD_VECTOR_TYPE; typedef signed char SIMD_SCALAR_TYPE; #define SIMD_VECTOR_BYTES (sizeof(SIMD_VECTOR_TYPE)) #define SIMD_SCALAR_BYTES (sizeof(SIMD_SCALAR_TYPE)) #define SIMD_SCALARS (SIMD_VECTOR_BYTES/SIMD_SCALAR_BYTES) #define SIMD_MASK (~(SIMD_SCALARS-1)) #define SPHERES (SIMD_SCALARS*500000) #define ITERATIONS 500000 struct vector { float v[DIMENSIONS]; }; struct sphere { vector center; float radius; }; struct bounding { SIMD_VECTOR_TYPE plane[DIRECTIONS]; }; // for 3D, this is [ x, -x, y, -y, z, -z ] vector axis[DIRECTIONS] = { { {1,0,0} }, { {-1,0,0} }, { {0,1,0} }, { {0,-1,0} }, { {0,0,1} }, { {0,0,-1} }, }; // this algorithm works with any kind of object, but spheres are the // simplest for the sake of explanation. this program generates a six-byte // index for each sixteen-byte sphere. this index is substantial by comparison // to a sphere, but tiny by comparison to a triangle mesh, etc. sphere database[SPHERES]; unsigned result[SPHERES]; float dot( const vector& a, const vector& b ) { float result = a.v[0] * b.v[0]; for( int i=1; i( begin->plane + i ); dest[j] = (signed char)floorf( dot( axis[i], src->center ) - src->radius ); } ++src; } ++begin; } } // compute quantized maximum values for all axes used in search // void bound_maximum( bounding* begin, sphere* src ) { for( int j=0; j( &begin->plane[i] ); dest[j] = (signed char)ceilf( dot( axis[i], src->center ) + src->radius ); } } } long aabb_tests = 0; long sphere_tests = 0; unsigned* buckysearch_results; bounding buckysearch_query; bounding index[SPHERES/SIMD_SCALARS]; #define RESTRICT // search array of bounding shapes with O(logN)? trivial rejection. // put shapes which passed test into array for expensive processing later. // template< int DIRECTION > void buckysearch( bounding* RESTRICT begin, bounding* RESTRICT end ) { int boundings = end - begin; if( boundings < 1 ) return; bounding* RESTRICT median = begin + boundings / 2; buckysearch<(DIRECTION+1)%DIRECTIONS>( begin, median ); // can't reject near side of plane { SIMD_VECTOR_TYPE temp[DIRECTIONS]; // test all eight boxes in parallel, but only for the plane that can do trivial rejection temp[DIRECTION] = _mm_cmpgt_pi8( median->plane[DIRECTION], buckysearch_query.plane[DIRECTION] ); int mask = _mm_movemask_pi8( temp[DIRECTION] ); if( mask & 1 ) return; // test all eight boxes in parallel, for the planes that don't do trivial rejection if( DIRECTION != 0 ) temp[0] = _mm_cmpgt_pi8( median->plane[0], buckysearch_query.plane[0] ); if( DIRECTION != 1 ) temp[1] = _mm_cmpgt_pi8( median->plane[1], buckysearch_query.plane[1] ); if( DIRECTION != 2 ) temp[2] = _mm_cmpgt_pi8( median->plane[2], buckysearch_query.plane[2] ); if( DIRECTION != 3 ) temp[3] = _mm_cmpgt_pi8( median->plane[3], buckysearch_query.plane[3] ); if( DIRECTION != 4 ) temp[4] = _mm_cmpgt_pi8( median->plane[4], buckysearch_query.plane[4] ); if( DIRECTION != 5 ) temp[5] = _mm_cmpgt_pi8( median->plane[5], buckysearch_query.plane[5] ); // or the results of the tests - a 1 bit for any plane means no intersection SIMD_VECTOR_TYPE a = _mm_or_si64( temp[0], temp[1] ); SIMD_VECTOR_TYPE b = _mm_or_si64( temp[2], temp[3] ); SIMD_VECTOR_TYPE c = _mm_or_si64( temp[4], temp[5] ); mask = _mm_movemask_pi8( _mm_or_si64( _mm_or_si64( a, b ), c ) ); // if not all eight boxes failed a test, write the winners out for expensive testing later. if( mask != 0xff ) { const unsigned offset = median - index; for( unsigned i=0; i( median+1, end ); } int main( int argc, char* argv[] ) { srand( 0xCAFEBABE ); // generate a lot of spheres randomly for( int i=0; i( index, index+SPHERES/SIMD_SCALARS ); _mm_empty(); // now for each box which intersected, do a costly object-object intersection. for( unsigned* begin = result; begin < buckysearch_results; ++begin ) if( intersects( query_sphere, database[*begin] ) ) ++sphere_tests; } time_t now = time( NULL ); double seconds = now - then; double spheres = SPHERES; spheres *= ITERATIONS; { std::ofstream out( "c:\\orly.txt" ); out << "mobile athlon XP 1600+" << std::endl; out << "objects: " << spheres << std::endl; out << "seconds: " << seconds << std::endl; out << "seconds / objects: " << seconds / spheres << std::endl; out << "aabb tests: " << aabb_tests << std::endl; out << "aabb tests / objects: " << aabb_tests / spheres << std::endl; out << "object tests: " << sphere_tests << std::endl; out << "object tests / objects: " << sphere_tests / spheres << std::endl; out << std::endl; out.close(); } }