ISIS Core Library 0.7.2 (api 3.0.0)
|
00001 #include "valuearray.hpp" 00002 00003 namespace isis 00004 { 00005 namespace data 00006 { 00008 // specialisation for complex - there shall be no scaling - and we cannot compute minmax 00009 template<> scaling_pair ValueArray<std::complex<float> >::getScalingTo( unsigned short /*typeID*/, autoscaleOption /*scaleopt*/ )const 00010 { 00011 return scaling_pair( util::Value<float>( 1 ), util::Value<float>( 0 ) ); 00012 } 00013 template<> scaling_pair ValueArray<std::complex<double> >::getScalingTo( unsigned short /*typeID*/, autoscaleOption /*scaleopt*/ )const 00014 { 00015 return scaling_pair( util::Value<double>( 1 ), util::Value<double>( 0 ) ); 00016 } 00017 00018 #ifdef __SSE2__ 00019 00020 #include <emmintrin.h> 00021 namespace _internal 00022 { 00023 00024 API_EXCLUDE_BEGIN 00025 00027 // some voodoo to get the vector types into the templates / 00029 template<typename T> struct _VectorUnion { 00030 union {__m128i reg; T elem[16 / sizeof( T )];} vec; 00031 _VectorUnion( const T *el ) {std::copy( el, el + 16 / sizeof( T ), vec.elem );} 00032 _VectorUnion( __m128i _reg ) {vec.reg = _reg;} 00033 _VectorUnion() {} 00034 }; 00035 template<typename T> struct _TypeVector; 00036 00037 // mapping for signed types 00038 #define DEF_VECTOR_SI(TYPE,KEY) \ 00039 template<> struct _TypeVector<TYPE>{ \ 00040 static __m128i gt(__m128i a,__m128i b){return _mm_cmpgt_epi ## KEY (a, b);} \ 00041 static __m128i lt(__m128i a,__m128i b){return _mm_cmplt_epi ## KEY (a, b);} \ 00042 } 00043 DEF_VECTOR_SI( int8_t, 8 ); 00044 DEF_VECTOR_SI( int16_t, 16 ); 00045 DEF_VECTOR_SI( int32_t, 32 ); 00046 00047 // value offset to compare unsigned int as signed (there is no compare for unsigned in SSE2) 00048 template<typename T> __m128i _getAddV() 00049 { 00050 _VectorUnion<T> ret; 00051 const T filler = 1 << ( sizeof( T ) * 8 - 1 ); 00052 std::fill( ret.vec.elem, ret.vec.elem + 16 / sizeof( T ), filler ); 00053 return ret.vec.reg; 00054 } 00055 00056 // mapping for unsigned types 00057 #define DEF_VECTOR_UI(TYPE,KEY) \ 00058 template<> struct _TypeVector<TYPE>{ \ 00059 static __m128i addv; \ 00060 static inline __m128i gt(__m128i a,__m128i b){return _mm_cmpgt_epi ## KEY (_mm_add_epi ## KEY(a,addv), _mm_add_epi ## KEY(b,addv));} \ 00061 static inline __m128i lt(__m128i a,__m128i b){return _mm_cmplt_epi ## KEY (_mm_add_epi ## KEY(a,addv), _mm_add_epi ## KEY(b,addv));} \ 00062 };\ 00063 __m128i _TypeVector<TYPE>::addv=_getAddV<TYPE>() 00064 00065 DEF_VECTOR_UI( uint8_t, 8 ); 00066 DEF_VECTOR_UI( uint16_t, 16 ); 00067 DEF_VECTOR_UI( uint32_t, 32 ); 00068 00069 00070 00072 // optimized min/max function for integers / 00074 00075 // generic fallback using cmpgt and some bitmask voodoo 00076 template<typename T> std::pair<__m128i, __m128i> _getMinMaxBlockLoop( const __m128i *data, size_t blocks ) 00077 { 00078 std::pair<__m128i, __m128i> ret( _mm_loadu_si128( data ), _mm_loadu_si128( data ) ); 00079 LOG( Runtime, verbose_info ) << "using optimized min/max computation for " << util::Value<T>::staticName() << " (masked mode)"; 00080 static const __m128i one = _mm_set_epi16( -1, -1, -1, -1, -1, -1, -1, -1 ); 00081 00082 while ( --blocks ) { 00083 const __m128i at = _mm_loadu_si128( ++data ); 00084 00085 const __m128i less_mask = _TypeVector<T>::lt( at, ret.first ); 00086 const __m128i greater_mask = _TypeVector<T>::gt( at, ret.second ); 00087 00088 ret.first = 00089 ( ret.first & ( less_mask ^ one ) ) //remove bigger values from current min 00090 | ( at & less_mask ); //put in the lesser values from at 00091 ret.second = 00092 ( ret.second & ( greater_mask ^ one ) ) //remove lesser values from current max 00093 | ( at & greater_mask ); //put in the bigger values from at 00094 } 00095 00096 return ret; 00097 } 00098 00099 // specialiced versions using processor opcodes for min/max 00100 template<> std::pair<__m128i, __m128i> _getMinMaxBlockLoop<uint8_t>( const __m128i *data, size_t blocks ) //PMAXUB 00101 { 00102 std::pair<__m128i, __m128i> ret( _mm_loadu_si128( data ), _mm_loadu_si128( data ) ); 00103 LOG( Runtime, verbose_info ) << "using optimized min/max computation for " << util::Value<uint8_t>::staticName() << " (direct mode)"; 00104 00105 while ( --blocks ) { 00106 const __m128i at = _mm_loadu_si128( ++data ); 00107 ret.first = _mm_min_epu8( ret.first, at ); 00108 ret.second = _mm_max_epu8( ret.second, at ); 00109 } 00110 00111 return ret; 00112 } 00113 template<> std::pair<__m128i, __m128i> _getMinMaxBlockLoop<int16_t>( const __m128i *data, size_t blocks ) //PMAXSW 00114 { 00115 std::pair<__m128i, __m128i> ret( _mm_loadu_si128( data ), _mm_loadu_si128( data ) ); 00116 LOG( Runtime, verbose_info ) << "using optimized min/max computation for " << util::Value<int16_t>::staticName() << " (direct mode)"; 00117 00118 while ( --blocks ) { 00119 const __m128i at = _mm_loadu_si128( ++data ); 00120 ret.first = _mm_min_epi16( ret.first, at ); 00121 ret.second = _mm_max_epi16( ret.second, at ); 00122 } 00123 00124 return ret; 00125 } 00126 00127 #ifdef __SSE4_1__ 00128 #include <smmintrin.h> 00129 template<> std::pair<__m128i, __m128i> _getMinMaxBlockLoop<int8_t>( const __m128i *data, size_t blocks ) //PMAXSB 00130 { 00131 std::pair<__m128i, __m128i> ret( _mm_loadu_si128( data ), _mm_loadu_si128( data ) ); 00132 LOG( Runtime, verbose_info ) << "using optimized min/max computation for " << util::Value<int8_t>::staticName() << " (direct mode)"; 00133 00134 while ( --blocks ) { 00135 const __m128i at = _mm_loadu_si128( ++data ); 00136 ret.first = _mm_min_epi8( ret.first, at ); 00137 ret.second = _mm_max_epi8( ret.second, at ); 00138 } 00139 00140 return ret; 00141 } 00142 template<> std::pair<__m128i, __m128i> _getMinMaxBlockLoop<uint16_t>( const __m128i *data, size_t blocks ) //PMAXUW 00143 { 00144 std::pair<__m128i, __m128i> ret( _mm_loadu_si128( data ), _mm_loadu_si128( data ) ); 00145 LOG( Runtime, verbose_info ) << "using optimized min/max computation for " << util::Value<uint16_t>::staticName() << " (direct mode)"; 00146 00147 while ( --blocks ) { 00148 const __m128i at = _mm_loadu_si128( ++data ); 00149 ret.first = _mm_min_epu16( ret.first, at ); 00150 ret.second = _mm_max_epu16( ret.second, at ); 00151 } 00152 00153 return ret; 00154 } 00155 template<> std::pair<__m128i, __m128i> _getMinMaxBlockLoop<int32_t>( const __m128i *data, size_t blocks ) //PMAXSD 00156 { 00157 std::pair<__m128i, __m128i> ret( _mm_loadu_si128( data ), _mm_loadu_si128( data ) ); 00158 LOG( Runtime, verbose_info ) << "using optimized min/max computation for " << util::Value<int32_t>::staticName() << " (direct mode)"; 00159 00160 while ( --blocks ) { 00161 const __m128i at = _mm_loadu_si128( ++data ); 00162 ret.first = _mm_min_epi32( ret.first, at ); 00163 ret.second = _mm_max_epi32( ret.second, at ); 00164 } 00165 00166 return ret; 00167 } 00168 template<> std::pair<__m128i, __m128i> _getMinMaxBlockLoop<uint32_t>( const __m128i *data, size_t blocks ) //PMAXSD 00169 { 00170 std::pair<__m128i, __m128i> ret( _mm_loadu_si128( data ), _mm_loadu_si128( data ) ); 00171 LOG( Runtime, verbose_info ) << "using optimized min/max computation for " << util::Value<uint32_t>::staticName() << " (direct mode)"; 00172 00173 while ( --blocks ) { 00174 const __m128i at = _mm_loadu_si128( ++data ); 00175 ret.first = _mm_min_epu32( ret.first, at ); 00176 ret.second = _mm_max_epu32( ret.second, at ); 00177 } 00178 00179 return ret; 00180 } 00181 #endif 00182 00183 template<typename T> std::pair<T, T> _getMinMax( const T *data, size_t len ) 00184 { 00185 size_t blocks = len / ( 16 / sizeof( T ) ); 00186 00187 T bmin = std::numeric_limits<T>::max(); 00188 T bmax = std::numeric_limits<T>::min(); 00189 00190 if( blocks ) { // if there are 16byte-blocks of values 00191 std::pair<__m128i, __m128i> minmax = _getMinMaxBlockLoop<T>( reinterpret_cast<const __m128i *>( data ), blocks ); 00192 // compute the min/max of the blocks bmin/bmax 00193 const _VectorUnion<T> smin = minmax.first; 00194 const _VectorUnion<T> smax = minmax.second; 00195 bmin = *std::min_element( smin.vec.elem, smin.vec.elem + 16 / sizeof( T ) ); 00196 bmax = *std::max_element( smax.vec.elem, smax.vec.elem + 16 / sizeof( T ) ); 00197 } 00198 00199 // if there are some remaining elements 00200 if( data + blocks * 16 / sizeof( T ) < data + len ) { 00201 const T rmin = *std::min_element( data + blocks * 16 / sizeof( T ), data + len ); 00202 const T rmax = *std::max_element( data + blocks * 16 / sizeof( T ), data + len ); 00203 return std::pair<T, T>( std::min( bmin, rmin ), std::max( bmax, rmax ) ); 00204 } else { 00205 return std::pair<T, T>( bmin, bmax ); 00206 } 00207 } 00208 00209 API_EXCLUDE_END 00210 00212 // specialize calcMinMax for (u)int(8,16,32)_t / 00214 00215 template<> std::pair< uint8_t, uint8_t> calcMinMax< uint8_t, 1>( const uint8_t *data, size_t len ) {return _getMinMax( data, len );} 00216 template<> std::pair<uint16_t, uint16_t> calcMinMax<uint16_t, 1>( const uint16_t *data, size_t len ) {return _getMinMax( data, len );} 00217 template<> std::pair<uint32_t, uint32_t> calcMinMax<uint32_t, 1>( const uint32_t *data, size_t len ) {return _getMinMax( data, len );} 00218 00219 template<> std::pair< int8_t, int8_t> calcMinMax< int8_t, 1>( const int8_t *data, size_t len ) {return _getMinMax( data, len );} 00220 template<> std::pair<int16_t, int16_t> calcMinMax<int16_t, 1>( const int16_t *data, size_t len ) {return _getMinMax( data, len );} 00221 template<> std::pair<int32_t, int32_t> calcMinMax<int32_t, 1>( const int32_t *data, size_t len ) {return _getMinMax( data, len );} 00222 00223 } //namepace _internal 00224 #else 00225 #warning Optimized min/max functions are not used because SSE2 is not enabled 00226 #endif 00227 00228 } 00229 } 00230