include/crbn/basic/vec3_sse.hpp

00001 
00002 #ifndef __vec3d_sse_hpp__
00003 #define __vec3d_sse_hpp__
00004 
00005 #include <crbn/basic/scalar.hpp>
00006 
00007 #include <xmmintrin.h>
00008 
00009 struct vec3_sse {
00010   // Force data alignement
00011   union{
00012     __m128 _vector;
00013     struct { float w, x, y, z; };
00014   };
00015 
00016   vec3() : x( 0.f ), y( 0.f ), z( 0.f ) {}
00017   vec3( const float& a ) : x( a ), y( a ), z( a ) {}
00018   vec3( const float& a, const float& b, const float& c ) : x( a ), y( b ), z( c ) {}
00019   vec3( const vec3& v ) : x( v.x ), y( v.y ), z( v.z ) {}
00020 };
00021 
00022 typedef vec3_sse __attribute__ ((aligned (16))) vec3;
00023 
00024 // a = b + c
00025 static inline void
00026 vadd( vec3& a, const vec3& b, const vec3& c ) {
00027   a.x = b.x + c.x;
00028   a.y = b.y + c.y;
00029   a.z = b.z + c.z;
00030   //a._vector = _mm_add_ps( b._vector, c._vector );
00031 }
00032 // a += b
00033 static inline void
00034 vadd( vec3& a, const vec3& b ) {
00035   a.x += b.x;
00036   a.y += b.y;
00037   a.z += b.z;
00038   //a._vector = _mm_add_ps( a._vector, b._vector );
00039 }
00040 // a = b - c
00041 static inline void
00042 vsub( vec3& a, const vec3& b, const vec3& c ){
00043   a.x = b.x - c.x;
00044   a.y = b.y - c.y;
00045   a.z = b.z - c.z;
00046   //a._vector = _mm_sub_ps( b._vector, c._vector );
00047 } 
00048 // a -= b
00049 static inline void
00050 vsub( vec3& a, const vec3& b ) {
00051   a.x -= b.x;
00052   a.y -= b.y;
00053   a.z -= b.z;
00054   //a._vector = _mm_sub_ps( a._vector, b._vector );
00055 }
00056 // a = -b
00057 static inline void
00058 vneg( vec3& a, const vec3& b ) {
00059   a.x = - b.x;
00060   a.y = - b.y;
00061   a.z = - b.z;
00062   //vec3 tmp( 0.f );
00063   //a._vector = _mm_sub_ps( tmp._vector, b._vector );
00064 }
00065 // a = -a
00066 static inline void
00067 vneg( vec3& a ) {
00068   a.x = - a.x;
00069   a.y = - a.y;
00070   a.z = - a.z;
00071   vec3 tmp( 0.f );
00072   a._vector = _mm_sub_ps( tmp._vector, a._vector );
00073 }
00074 
00075 //  a = f * b
00076 static inline void
00077 vmul( vec3& a, const float f, const vec3& b ) {
00078   a.x = f * b.x;
00079   a.y = f * b.y;
00080   a.z = f * b.z;  
00081 //   vec3 tmp( f );
00082 //   a._vector = _mm_mul_ps( tmp._vector, b._vector );
00083 }
00084 //  a *= f
00085 static inline void
00086 vmul( vec3& a, const float& f ) {
00087   a.x *= f;
00088   a.y *= f;
00089   a.z *= f;  
00090   //vec3 tmp( f );
00091   //a._vector = _mm_mul_ps( tmp._vector, a._vector );
00092 }
00093 //  a = b / f
00094 static inline void
00095 vdiv( vec3& a, const vec3& b, const float f ) {
00096   f = 1.f / f;
00097   a.x = f * b.x;
00098   a.y = f * b.y;
00099   a.z = f * b.z;  
00100 //   vec3 tmp( 1.f / f );
00101 //   a._vector = _mm_mul_ps( b._vector, tmp._vector );
00102 }
00103 //  a /= f
00104 static inline void
00105 vdiv( vec3& a, const float f ) {
00106   f = 1.f / f;
00107   a.x *= f;
00108   a.y *= f;
00109   a.z *= f;  
00110 //   vec3 tmp( 1.f / f );
00111 //   a._vector = _mm_mul_ps( a._vector, tmp._vector );
00112 }
00113 
00114 // a_i = b_i * c_i for each component
00115 static inline void
00116 vmul( vec3& a, const vec3& b, const vec3& c ) {
00117   a.x = b.x * c.x;
00118   a.y = b.y * c.y;
00119   a.z = b.z * c.z;
00120   //a._vector = _mm_mul_ps( b._vector, c._vector );
00121 }
00122 // a_i *= b_i for each component
00123 static inline void
00124 vmul( vec3& a, vec3& b ) {
00125   a.x *= b.x;
00126   a.y *= b.y;
00127   a.z *= b.z;
00128   //a._vector = _mm_mul_ps( a._vector, b._vector );
00129 }
00130 
00131 // a_i = b_i / c_i for each component
00132 static inline void
00133 vdiv( vec3& a, const vec3& b, const vec3& c ) {
00134   a.x = b.x / c.x;
00135   a.y = b.y / c.y;
00136   a.z = b.z / c.z;
00137   //a._vector = _mm_div_ps( b._vector, c._vector );
00138 }
00139 // a_i /= b_i for each component
00140 static inline void
00141 vdiv( vec3& a, const vec3& b ) {
00142   a.x /= b.x;
00143   a.y /= b.y;
00144   a.z /= b.z;
00145   //a._vector = _mm_div_ps( a._vector, b._vector );
00146 }
00147 
00148 // a = b + f * c
00149 static inline void
00150 vaddfmul( vec3& a, const vec3& b, const float& f, const vec3& c ) {
00151   vec3 tmp( f );
00152   a._vector = _mm_mul_ps( tmp._vector, c._vector );
00153   a._vector = _mm_add_ps( a._vector, b._vector );
00154 }
00155 
00156 // a += f * b
00157 static inline void
00158 vaddfmul( vec3& a, const float& f, const vec3& b ) {
00159   vec3 tmp( f );
00160   tmp._vector = _mm_mul_ps( tmp._vector, b._vector );
00161   a._vector = _mm_add_ps( a._vector, tmp._vector );
00162 }
00163 
00164 // a = b + c * d
00165 static inline void
00166 vaddmul( vec3& a, const vec3& b, const vec3& c, const vec3& d ) {
00167   a._vector = _mm_mul_ps( c._vector, d._vector );
00168   a._vector = _mm_add_ps( a._vector, b._vector );
00169 }
00170 
00171 // a += b * c
00172 static inline void
00173 vaddmul( vec3& a, const vec3& b, const vec3& c ) {
00174   vec3 tmp;
00175   tmp._vector = _mm_mul_ps( b._vector, c._vector );
00176   a._vector = _mm_add_ps( a._vector, tmp._vector );
00177 }
00178 
00179 // a = normalize( b )
00180 static inline void
00181 vnormalize( vec3& a, const vec3& b ) {
00182   vec3 tmp;
00183   a._vector = _mm_mul_ps( b._vector, b._vector );
00184   tmp._vector = _mm_shuffle_ps( a._vector, a._vector, 0x31 );
00185   a._vector = _mm_add_ps( a._vector, tmp._vector );
00186   tmp._vector = _mm_shuffle_ps( a._vector, a._vector, 0x2 );
00187   a._vector = _mm_add_ps( a._vector, tmp._vector );
00188   a._vector = _mm_rsqrt_ss( a._vector );
00189   a._vector = _mm_shuffle_ps( a._vector, a._vector, 0x0 );
00190   a._vector = _mm_mul_ps( a._vector, b._vector );
00191 }
00192 // a = normalize( a )
00193 static inline void
00194 vnormalize( vec3& a ) {
00195   vec3 tmp, v;
00196   b._vector = _mm_mul_ps( a._vector, a._vector );
00197   tmp._vector = _mm_shuffle_ps( b._vector, b._vector, 0x31 );
00198   b._vector = _mm_add_ps( b._vector, tmp._vector );
00199   tmp._vector = _mm_shuffle_ps( b._vector, b._vector, 0x2 );
00200   b._vector = _mm_add_ps( b._vector, tmp._vector );
00201   b._vector = _mm_rsqrt_ss( b._vector );
00202   b._vector = _mm_shuffle_ps( b._vector, b._vector, 0x0 );
00203   a._vector = _mm_mul_ps( a._vector, b._vector );
00204 }
00205 // ret = norm( a )
00206 static inline float
00207 vlength( const vec3& a ) {
00208   vec3 tmp, b;
00209   float f;
00210   b._vector = _mm_mul_ps( a._vector, a._vector );
00211   tmp._vector = _mm_shuffle_ps( b._vector, b._vector, 0x31 );
00212   b._vector = _mm_add_ps( b._vector, tmp._vector );
00213   tmp._vector = _mm_shuffle_ps( b._vector, b._vector, 0x2 );
00214   b._vector = _mm_add_ps( b._vector, tmp._vector );
00215   b._vector = _mm_rsqrt_ss( b._vector );
00216   _mm_store_ss( &f, b._vector );
00217   return f;
00218 }
00219 
00220 // ret = norm( a ) ^ 2
00221 static inline float
00222 vlength2( const vec3& a ) {
00223   vec3 tmp, b;
00224   float f;
00225   b._vector = _mm_mul_ps( a._vector, a._vector );
00226   tmp._vector = _mm_shuffle_ps( b._vector, b._vector, 0x31 );
00227   b._vector = _mm_add_ps( b._vector, tmp._vector );
00228   tmp._vector = _mm_shuffle_ps( b._vector, b._vector, 0x2 );
00229   b._vector = _mm_add_ps( b._vector, tmp._vector );
00230   _mm_store_ss( &f, b._vector );
00231   return f;
00232 }
00233 
00234 // ret = distance( a, b )
00235 static inline float
00236 vdistance( const vec3& a, const vec3& b ) {
00237   vec3 tmp;
00238   tmp._vector = _mm_sub_ps( a._vector, b._vector );
00239   return vlength( tmp );
00240 }
00241 // ret = distance( a, b ) ^ 2
00242 static inline float
00243 vdistance2( const vec3& a, const vec3& b ) {
00244   vec3 tmp;
00245   tmp._vector = _mm_sub_ps( a._vector, b._vector );
00246   return vlength2( tmp );
00247 }
00248 
00249 // ret = a . b
00250 static inline float
00251 vdot( const vec3& a, const vec3& b ) {
00252   /*
00253     vec3 tmp, tmp2;
00254     float f;
00255     tmp._vector = _mm_mul_ps( a._vector, b._vector );
00256     tmp2._vector = _mm_shuffle_ps( tmp._vector, tmp._vector, 0x31 );
00257     tmp._vector = _mm_add_ps( tmp._vector, tmp._vector );
00258     tmp2._vector = _mm_shuffle_ps( tmp._vector, tmp._vector, 0x2 );
00259     _mm_store_ss( &f, b._vector );
00260     return f;
00261   */
00262   return a.x * b.x + a.y * b.y + a.z * b.z;
00263 }
00264 // a = b ^ c
00265 static inline void
00266 vcross( vec3& a, const vec3& b, const vec3& c ) {
00267   a.x = b.y * c.z - b.z * c.y;
00268   a.y = b.z * c.x - b.x * c.z;
00269   a.z = b.x * c.y - b.y * c.x;
00270 }
00271 
00272 // v_i = min( a_i, b_i ) for each component
00273 static inline void
00274 vmin( vec3& v, const vec3& a, const vec3& b ) {
00275   v.x = min( a.x, b.x );
00276   v.y = min( a.y, b.y );
00277   v.z = min( a.z, b.z );
00278 }
00279 // v_i = max( a_i, b_i ) for each component
00280 static inline void
00281 vmax( vec3& v, const vec3& a, const vec3& b ) {
00282   v.x = max( a.x, b.x );
00283   v.y = max( a.y, b.y );
00284   v.z = max( a.z, b.z );
00285 }
00286 
00287 // compute coordinate system from a vector
00288 static inline void
00289 coordinate_system(vec3& v1, vec3& v2, vec3& v3) {
00290   float inv_length;
00291   if(fabs(v1.x) > fabs(v1.y)) {
00292     inv_length = 1.0f / sqrtf((v1.x * v1.x) + (v1.z * v1.z));
00293     v2 = vec3(-v1.z * inv_length, 0.0f, v1.x * inv_length);
00294   }
00295   else {
00296     inv_length = 1.0f / sqrtf((v1.y * v1.y) + (v1.z * v1.z));
00297     v2 = vec3(0.0f, v1.z * inv_length, -v1.y * inv_length);
00298   }
00299   vcross(v3, v1, v2);
00300 }
00301 
00302 #endif // __vec3d_sse_hpp__

Generated on Tue Nov 14 15:40:08 2006 for libcrbn by  doxygen 1.5.0