ISIS Core Library 0.7.2 (api 3.0.0)

/scr/tee1/isis/lib/Core/DataStorage/valuearray.cpp

Go to the documentation of this file.
00001 #include "valuearray.hpp"
00002 
00003 namespace isis
00004 {
00005 namespace data
00006 {
00008 // specialisation for complex - there shall be no scaling - and we cannot compute minmax
00009 template<> scaling_pair ValueArray<std::complex<float> >::getScalingTo( unsigned short /*typeID*/, autoscaleOption /*scaleopt*/ )const
00010 {
00011     return scaling_pair( util::Value<float>( 1 ), util::Value<float>( 0 ) );
00012 }
00013 template<> scaling_pair ValueArray<std::complex<double> >::getScalingTo( unsigned short /*typeID*/, autoscaleOption /*scaleopt*/ )const
00014 {
00015     return scaling_pair( util::Value<double>( 1 ), util::Value<double>( 0 ) );
00016 }
00017 
00018 #ifdef __SSE2__
00019 
00020 #include <emmintrin.h>
00021 namespace _internal
00022 {
00023 
00024 API_EXCLUDE_BEGIN
00025 
00027 // some voodoo to get the vector types into the templates /
00029 template<typename T> struct _VectorUnion {
00030     union {__m128i reg; T elem[16 / sizeof( T )];} vec;
00031     _VectorUnion( const T *el ) {std::copy( el, el + 16 / sizeof( T ), vec.elem );}
00032     _VectorUnion( __m128i _reg ) {vec.reg = _reg;}
00033     _VectorUnion() {}
00034 };
00035 template<typename T> struct _TypeVector;
00036 
00037 // mapping for signed types
00038 #define DEF_VECTOR_SI(TYPE,KEY)                              \
00039     template<> struct _TypeVector<TYPE>{                         \
00040         static __m128i gt(__m128i a,__m128i b){return _mm_cmpgt_epi ## KEY (a, b);}                                                        \
00041         static __m128i lt(__m128i a,__m128i b){return _mm_cmplt_epi ## KEY (a, b);}                                                        \
00042     }
00043 DEF_VECTOR_SI( int8_t, 8 );
00044 DEF_VECTOR_SI( int16_t, 16 );
00045 DEF_VECTOR_SI( int32_t, 32 );
00046 
00047 // value offset to compare unsigned int as signed (there is no compare for unsigned in SSE2)
00048 template<typename T> __m128i _getAddV()
00049 {
00050     _VectorUnion<T> ret;
00051     const T filler = 1 << ( sizeof( T ) * 8 - 1 );
00052     std::fill( ret.vec.elem, ret.vec.elem + 16 / sizeof( T ), filler );
00053     return ret.vec.reg;
00054 }
00055 
00056 // mapping for unsigned types
00057 #define DEF_VECTOR_UI(TYPE,KEY)                                 \
00058     template<> struct _TypeVector<TYPE>{                            \
00059         static __m128i addv;                                        \
00060         static inline __m128i gt(__m128i a,__m128i b){return _mm_cmpgt_epi ## KEY (_mm_add_epi ## KEY(a,addv), _mm_add_epi ## KEY(b,addv));} \
00061         static inline __m128i lt(__m128i a,__m128i b){return _mm_cmplt_epi ## KEY (_mm_add_epi ## KEY(a,addv), _mm_add_epi ## KEY(b,addv));} \
00062     };\
00063     __m128i _TypeVector<TYPE>::addv=_getAddV<TYPE>()
00064 
00065 DEF_VECTOR_UI( uint8_t, 8 );
00066 DEF_VECTOR_UI( uint16_t, 16 );
00067 DEF_VECTOR_UI( uint32_t, 32 );
00068 
00069 
00070 
00072 // optimized min/max function for integers /
00074 
00075 // generic fallback using cmpgt and some bitmask voodoo
00076 template<typename T> std::pair<__m128i, __m128i> _getMinMaxBlockLoop( const __m128i *data, size_t blocks )
00077 {
00078     std::pair<__m128i, __m128i> ret( _mm_loadu_si128( data ), _mm_loadu_si128( data ) );
00079     LOG( Runtime, verbose_info ) << "using optimized min/max computation for " << util::Value<T>::staticName() << " (masked mode)";
00080     static const __m128i one = _mm_set_epi16( -1, -1, -1, -1, -1, -1, -1, -1 );
00081 
00082     while ( --blocks ) {
00083         const __m128i at = _mm_loadu_si128( ++data );
00084 
00085         const __m128i less_mask = _TypeVector<T>::lt( at, ret.first );
00086         const __m128i greater_mask = _TypeVector<T>::gt( at, ret.second );
00087 
00088         ret.first =
00089             ( ret.first & ( less_mask ^ one ) ) //remove bigger values from current min
00090             | ( at & less_mask ); //put in the lesser values from at
00091         ret.second =
00092             ( ret.second & ( greater_mask ^ one ) ) //remove lesser values from current max
00093             | ( at & greater_mask ); //put in the bigger values from at
00094     }
00095 
00096     return ret;
00097 }
00098 
00099 // specialiced versions using processor opcodes for min/max
00100 template<> std::pair<__m128i, __m128i> _getMinMaxBlockLoop<uint8_t>( const __m128i *data, size_t blocks ) //PMAXUB
00101 {
00102     std::pair<__m128i, __m128i> ret( _mm_loadu_si128( data ), _mm_loadu_si128( data ) );
00103     LOG( Runtime, verbose_info ) << "using optimized min/max computation for " << util::Value<uint8_t>::staticName() << " (direct mode)";
00104 
00105     while ( --blocks ) {
00106         const __m128i at = _mm_loadu_si128( ++data );
00107         ret.first = _mm_min_epu8( ret.first, at );
00108         ret.second = _mm_max_epu8( ret.second, at );
00109     }
00110 
00111     return ret;
00112 }
00113 template<> std::pair<__m128i, __m128i> _getMinMaxBlockLoop<int16_t>( const __m128i *data, size_t blocks ) //PMAXSW
00114 {
00115     std::pair<__m128i, __m128i> ret( _mm_loadu_si128( data ), _mm_loadu_si128( data ) );
00116     LOG( Runtime, verbose_info ) << "using optimized min/max computation for " << util::Value<int16_t>::staticName() << " (direct mode)";
00117 
00118     while ( --blocks ) {
00119         const __m128i at = _mm_loadu_si128( ++data );
00120         ret.first = _mm_min_epi16( ret.first, at );
00121         ret.second = _mm_max_epi16( ret.second, at );
00122     }
00123 
00124     return ret;
00125 }
00126 
00127 #ifdef __SSE4_1__
00128 #include <smmintrin.h>
00129 template<> std::pair<__m128i, __m128i> _getMinMaxBlockLoop<int8_t>( const __m128i *data, size_t blocks ) //PMAXSB
00130 {
00131     std::pair<__m128i, __m128i> ret( _mm_loadu_si128( data ), _mm_loadu_si128( data ) );
00132     LOG( Runtime, verbose_info ) << "using optimized min/max computation for " << util::Value<int8_t>::staticName() << " (direct mode)";
00133 
00134     while ( --blocks ) {
00135         const __m128i at = _mm_loadu_si128( ++data );
00136         ret.first = _mm_min_epi8( ret.first, at );
00137         ret.second = _mm_max_epi8( ret.second, at );
00138     }
00139 
00140     return ret;
00141 }
00142 template<> std::pair<__m128i, __m128i> _getMinMaxBlockLoop<uint16_t>( const __m128i *data, size_t blocks ) //PMAXUW
00143 {
00144     std::pair<__m128i, __m128i> ret( _mm_loadu_si128( data ), _mm_loadu_si128( data ) );
00145     LOG( Runtime, verbose_info ) << "using optimized min/max computation for " << util::Value<uint16_t>::staticName() << " (direct mode)";
00146 
00147     while ( --blocks ) {
00148         const __m128i at = _mm_loadu_si128( ++data );
00149         ret.first = _mm_min_epu16( ret.first, at );
00150         ret.second = _mm_max_epu16( ret.second, at );
00151     }
00152 
00153     return ret;
00154 }
00155 template<> std::pair<__m128i, __m128i> _getMinMaxBlockLoop<int32_t>( const __m128i *data, size_t blocks ) //PMAXSD
00156 {
00157     std::pair<__m128i, __m128i> ret( _mm_loadu_si128( data ), _mm_loadu_si128( data ) );
00158     LOG( Runtime, verbose_info ) << "using optimized min/max computation for " << util::Value<int32_t>::staticName() << " (direct mode)";
00159 
00160     while ( --blocks ) {
00161         const __m128i at = _mm_loadu_si128( ++data );
00162         ret.first = _mm_min_epi32( ret.first, at );
00163         ret.second = _mm_max_epi32( ret.second, at );
00164     }
00165 
00166     return ret;
00167 }
00168 template<> std::pair<__m128i, __m128i> _getMinMaxBlockLoop<uint32_t>( const __m128i *data, size_t blocks ) //PMAXSD
00169 {
00170     std::pair<__m128i, __m128i> ret( _mm_loadu_si128( data ), _mm_loadu_si128( data ) );
00171     LOG( Runtime, verbose_info ) << "using optimized min/max computation for " << util::Value<uint32_t>::staticName() << " (direct mode)";
00172 
00173     while ( --blocks ) {
00174         const __m128i at = _mm_loadu_si128( ++data );
00175         ret.first = _mm_min_epu32( ret.first, at );
00176         ret.second = _mm_max_epu32( ret.second, at );
00177     }
00178 
00179     return ret;
00180 }
00181 #endif
00182 
00183 template<typename T> std::pair<T, T> _getMinMax( const T *data, size_t len )
00184 {
00185     size_t blocks = len / ( 16 / sizeof( T ) );
00186 
00187     T bmin = std::numeric_limits<T>::max();
00188     T bmax = std::numeric_limits<T>::min();
00189 
00190     if( blocks ) { // if there are 16byte-blocks of values
00191         std::pair<__m128i, __m128i> minmax = _getMinMaxBlockLoop<T>( reinterpret_cast<const __m128i *>( data ), blocks );
00192         // compute the min/max of the blocks bmin/bmax
00193         const _VectorUnion<T> smin = minmax.first;
00194         const _VectorUnion<T> smax = minmax.second;
00195         bmin = *std::min_element( smin.vec.elem, smin.vec.elem + 16 / sizeof( T ) );
00196         bmax = *std::max_element( smax.vec.elem, smax.vec.elem + 16 / sizeof( T ) );
00197     }
00198 
00199     // if there are some remaining elements
00200     if( data + blocks * 16 / sizeof( T ) < data + len ) {
00201         const T rmin = *std::min_element( data + blocks * 16 / sizeof( T ), data + len );
00202         const T rmax = *std::max_element( data + blocks * 16 / sizeof( T ), data + len );
00203         return std::pair<T, T>( std::min( bmin, rmin ), std::max( bmax, rmax ) );
00204     } else {
00205         return std::pair<T, T>( bmin, bmax );
00206     }
00207 }
00208 
00209 API_EXCLUDE_END
00210 
00212 // specialize calcMinMax for (u)int(8,16,32)_t /
00214 
00215 template<> std::pair< uint8_t,  uint8_t> calcMinMax< uint8_t, 1>( const  uint8_t *data, size_t len ) {return _getMinMax( data, len );}
00216 template<> std::pair<uint16_t, uint16_t> calcMinMax<uint16_t, 1>( const uint16_t *data, size_t len ) {return _getMinMax( data, len );}
00217 template<> std::pair<uint32_t, uint32_t> calcMinMax<uint32_t, 1>( const uint32_t *data, size_t len ) {return _getMinMax( data, len );}
00218 
00219 template<> std::pair< int8_t,  int8_t> calcMinMax< int8_t, 1>( const  int8_t *data, size_t len ) {return _getMinMax( data, len );}
00220 template<> std::pair<int16_t, int16_t> calcMinMax<int16_t, 1>( const int16_t *data, size_t len ) {return _getMinMax( data, len );}
00221 template<> std::pair<int32_t, int32_t> calcMinMax<int32_t, 1>( const int32_t *data, size_t len ) {return _getMinMax( data, len );}
00222 
00223 } //namepace _internal
00224 #else
00225 #warning Optimized min/max functions are not used because SSE2 is not enabled
00226 #endif
00227 
00228 }
00229 }
00230