Shootout/Spectral
< Shootout
A Shootout Entry for the spectral norm test
Contents |
[edit]
1 Timings
Debian Linux x86, n=2,500
||Entry || Time|| ||dons || 9.407|| ||old || 10.564||
[edit]
2 Current entry
Damn fast.
{-# OPTIONS -fvia-C -fexcess-precision #-} -- -- The Computer Language Benchmarks Game -- http://shootout.alioth.debian.org/ -- -- Modified by Ryan Trinkle: 1) change from divInt# to uncheckedIShiftRA# -- 2) changed -optc-O to -optc-O3 -- 3) added -optc-ffast-math -- Translation from Clean by Don Stewart -- -- Should be compiled with: -- -O -fglasgow-exts -fbang-patterns -- -optc-O3 -optc-march=pentium4 -optc-mfpmath=sse -optc-msse2 -optc-ffast-math -- -- Note: The following flags appear to INCREASE running time: -- -O2 -optc-funroll-loops import System import Foreign.Marshal.Array import Foreign import Text.Printf import Control.Monad import Data.ByteString.Internal import GHC.Base import GHC.Float import GHC.Int type Reals = Ptr Double main = do n <- getArgs >>= readIO . head u <- mallocArray n :: IO Reals forM_ [0..n-1] $ \i -> pokeElemOff u i 1 v <- mallocArray n :: IO Reals forM_ [0..n-1] $ \i -> pokeElemOff v i 0 powerMethod 10 n u v printf "%.9f\n" (eigenvalue n u v 0 0 0) ------------------------------------------------------------------------ eigenvalue :: Int -> Reals -> Reals -> Int -> Double -> Double -> Double eigenvalue !n !u !v !i !vBv !vv | i < n = eigenvalue n u v (i+1) (vBv + ui * vi) (vv + vi * vi) | otherwise = sqrt $! vBv / vv where ui = inlinePerformIO (peekElemOff u i) vi = inlinePerformIO (peekElemOff v i) ------------------------------------------------------------------------ powerMethod :: Int -> Int -> Reals -> Reals -> IO () powerMethod !i !n !u !v = allocaArray n $ \t -> replicateM_ i $ timesAtAv t n u v >> timesAtAv t n v u -- multiply vector v by matrix A and then by matrix A transposed timesAtAv :: Reals -> Int -> Reals -> Reals -> IO () timesAtAv !t !n !u !atau = do timesAv n u t timesAtv n t atau timesAv :: Int -> Reals -> Reals -> IO () timesAv !n !u !au = go 0 where go :: Int -> IO () go !i = when (i < n) $ do pokeElemOff au i (avsum i 0 0) go (i+1) avsum :: Int -> Int -> Double -> Double avsum !i !j !acc | j < n = avsum i (j+1) (acc + ((aij i j) * uj)) | otherwise = acc where uj = inlinePerformIO (peekElemOff u j) timesAtv :: Int -> Reals -> Reals -> IO () timesAtv !n !u !a = go 0 where go :: Int -> IO () go !i = when (i < n) $ do pokeElemOff a i (atvsum i 0 0) go (i+1) atvsum :: Int -> Int -> Double -> Double atvsum !i !j !acc | j < n = atvsum i (j+1) (acc + ((aij j i) * uj)) | otherwise = acc where uj = inlinePerformIO (peekElemOff u j) -- -- manually unbox the inner loop: -- aij i j = 1 / fromIntegral ((i+j) * (i+j+1) `div` 2 + i + 1) -- aij (I# i) (I# j) = D# ( case i +# j of n -> case n *# (n+#1#) of t -> case t `uncheckedIShiftRA#` 1# of u -> case u +# (i +# 1#) of r -> 1.0## /## (int2Double# r))
[edit]
3 Proposed entry
Uses Ptr Double, and carefully unboxes the aij inner computation (where 70% of the time is spent). Runs 2.3x slower than C on my p4.
{-# OPTIONS -fexcess-precision #-} -- -- The Computer Language Shootout -- http://shootout.alioth.debian.org/ -- -- Translation from Clean by Don Stewart -- -- Should be compiled with: -- -O -fglasgow-exts -fbang-patterns -- -optc-O -optc-march=pentium4 -optc-mfpmath=sse -optc-msse2 -- import System import Foreign.Marshal.Array import Foreign import Text.Printf import Control.Monad import Data.ByteString.Base import GHC.Base import GHC.Float import GHC.Int type Reals = Ptr Double main = do n <- getArgs >>= readIO . head u <- mallocArray n :: IO Reals forM_ [0..n-1] $ \i -> pokeElemOff u i 1 v <- mallocArray n :: IO Reals forM_ [0..n-1] $ \i -> pokeElemOff v i 0 powerMethod 10 n u v printf "%.9f\n" (eigenvalue n u v 0 0 0) ------------------------------------------------------------------------ eigenvalue :: Int -> Reals -> Reals -> Int -> Double -> Double -> Double eigenvalue !n !u !v !i !vBv !vv | i < n = eigenvalue n u v (i+1) (vBv + ui * vi) (vv + vi * vi) | otherwise = sqrt $! vBv / vv where ui = inlinePerformIO (peekElemOff u i) vi = inlinePerformIO (peekElemOff v i) ------------------------------------------------------------------------ powerMethod :: Int -> Int -> Reals -> Reals -> IO () powerMethod !i !n !u !v = allocaArray n $ \t -> replicateM_ i $ timesAtAv t n u v >> timesAtAv t n v u -- multiply vector v by matrix A and then by matrix A transposed timesAtAv :: Reals -> Int -> Reals -> Reals -> IO () timesAtAv !t !n !u !atau = do timesAv n u t timesAtv n t atau {-# INLINE timesAtAv #-} timesAv :: Int -> Reals -> Reals -> IO () timesAv !n !u !au = go 0 where go :: Int -> IO () go !i = when (i < n) $ do pokeElemOff au i (avsum i 0 0) go (i+1) avsum :: Int -> Int -> Double -> Double avsum !i !j !acc | j < n = avsum i (j+1) (acc + ((aij i j) * uj)) | otherwise = acc where uj = inlinePerformIO (peekElemOff u j) {-# INLINE timesAv #-} timesAtv :: Int -> Reals -> Reals -> IO () timesAtv !n !u !a = go 0 where go :: Int -> IO () go !i = when (i < n) $ do pokeElemOff a i (atvsum i 0 0) go (i+1) atvsum :: Int -> Int -> Double -> Double atvsum !i !j !acc | j < n = atvsum i (j+1) (acc + ((aij j i) * uj)) | otherwise = acc where uj = inlinePerformIO (peekElemOff u j) {-# INLINE timesAtv #-} -- -- manually unbox the inner loop: -- aij i j = 1 / fromIntegral ((i+j) * (i+j+1) `div` 2 + i + 1) -- aij (I# i) (I# j) = D# ( case i +# j of n -> case n *# (n+#1#) of t -> case divInt# t 2# of u -> case u +# (i +# 1#) of r -> 1.0## /## (int2Double# r))
[edit]
4 Current entry
Better gcc flags. Bang patterns. Submitted
Didn't do so well in practice though :/
{-# OPTIONS -O -fglasgow-exts -fbang-patterns -funbox-strict-fields -fexcess-precision -optc-O2 -optc-march=pentium4 -optc-mfpmath=sse -optc-msse2 #-} -- -- The Great Computer Language Shootout -- http:--shootout.alioth.debian.org/ -- -- Contributed by Don Stewart -- import Monad import System import Text.Printf import Data.Array.IO import Data.Array.Base main = getArgs >>= approximate . read . head >>= printf "%.9f\n" approximate n = do u <- newArray (0,n-1) 1 :: IO (IOUArray Int Double) v <- newArray (0,n-1) 0 :: IO (IOUArray Int Double) sequence_ $ replicate 10 $ multiplyAtAv n u v >> multiplyAtAv n v u let loop !vbv !vv !i | i >= n = return (vbv,vv) | otherwise = do ui <- unsafeRead u i vi <- unsafeRead v i loop (vbv + ui * vi) (vv + vi * vi) (i+1) (vbv,vv) <- loop 0 0 0 return $! sqrt (vbv/vv) -- return element i,j of infinite matrix A a i j = 1 / fromIntegral (x*(x+1) `div` 2 + i + 1) where x = i+j -- multiply vector v by matrix A */ multiplyAv !n !v !av = loop 0 where loop i = when (i < n) $ do avi <- loop' i 0 0 unsafeWrite av i avi >> loop (i+1) loop' !i !j !av | j >= n = return av | otherwise = do vj <- unsafeRead v j loop' i (j+1) (av + a i j * vj) -- multiply vector v by matrix A transposed multiplyAtv !n !v !atv = loop 0 where loop i = when (i < n) $ do atvi <- loop' i 0 0 unsafeWrite atv i atvi >> loop (i+1) loop' !i !j !atvi | j >= n = return atvi | otherwise = do vj <- unsafeRead v j loop' i (j+1) (atvi + a j i * vj) -- multiply vector v by matrix A and then by matrix A transposed */ multiplyAtAv !n !v !atav = do u <- newArray (0,n-1) 0 :: IO (IOUArray Int Double) multiplyAv n v u >> multiplyAtv n u atav
[edit]
5 Old entry
GHC unboxed this better. Careful attention payed to the unboxing. -O2 -optc-O -optc-ffast-math -fexcess-precision
{-# OPTIONS -optc-O #-} -- -- The Great Computer Language Shootout -- http:--shootout.alioth.debian.org/ -- -- Contributed by Don Stewart -- -- gcc miscompiles this program with -O3 -- import Monad import System import Numeric import Data.Array.IO import Data.Array.Base main = getArgs >>= approximate . read . head >>= putStrLn . \s -> showFFloat (Just 9) s [] approximate n = do u <- newArray (0,n-1) 1 :: IO (IOUArray Int Double) v <- newArray_ (0,n-1) :: IO (IOUArray Int Double) sequence_ $ replicate 10 $ multiplyAtAv n u v >> multiplyAtAv n v u loop 0 0 0 n u v loop vbv vv i n u v | vbv `seq` vv `seq` i `seq` n `seq` u `seq` v `seq` False = undefined loop vbv vv i n u v | i >= n = return $! sqrt (vbv/vv) | otherwise = do ui <- unsafeRead u i vi <- unsafeRead v i loop (vbv + ui * vi) (vv + vi * vi) (i+1) n u v -- return element i,j of infinite matrix A a i j | i `seq` j `seq` False = undefined a i j = 1 / fromIntegral (x*(x+1) `div` 2 + i + 1) where x = i+j -- multiply vector v by matrix A */ multiplyAv n v av | n `seq` v `seq` av `seq` False = undefined multiplyAv n v av = loop 0 where loop i = when (i < n) $ loop' i 0 0 >>= unsafeWrite av i >> loop (i+1) loop' i j av | i `seq` j `seq` av `seq` False = undefined loop' i j av | j >= n = return av | otherwise = do vj <- v `unsafeRead` j loop' i (j+1) (av + a i j * vj) -- multiply vector v by matrix A transposed multiplyAtv n v atv | n `seq` v `seq` atv `seq` False = undefined multiplyAtv n v atv = loop 0 where loop i = when (i < n) $ loop' i 0 0 >>= unsafeWrite atv i >> loop (i+1) loop' i j atvi | j `seq` atvi `seq` False = undefined loop' i j atvi | j >= n = return atvi | otherwise = do vj <- v `unsafeRead` j loop' i (j+1) (atvi + a j i * vj) -- multiply vector v by matrix A and then by matrix A transposed */ multiplyAtAv n v atav | n `seq` v `seq` atav `seq` False = undefined multiplyAtAv n v atav = do u <- newArray_ (0,n-1) :: IO (IOUArray Int Double) multiplyAv n v u >> multiplyAtv n u atav
[edit]
6 Current entry
-- The Great Computer Language Shootout -- http:--shootout.alioth.debian.org/ -- -- Original C contributed by Sebastien Loisel -- Conversion to C++ by Jon Harrop -- Conversion to Haskell by Einar Karttunen import Control.Monad.ST import Data.Array.Base import Data.Array.ST import Numeric import System eval_A :: Int -> Int -> Double eval_A i j = 1 / fromIntegral ((i+j)*(i+j+1) `div` 2 + i + 1) plusAt :: STUArray s Int Double -> Int -> Double -> ST s () plusAt a i v = do o <- unsafeRead a i unsafeWrite a i (v+o) eval_A_Times_u :: STUArray s Int Double -> STUArray s Int Double -> ST s () eval_A_Times_u u au = outer (snd $ bounds u) where outer 0 = unsafeWrite au 0 0 >> inner 0 (snd $ bounds u) outer i = unsafeWrite au i 0 >> inner i (snd $ bounds u) >> outer (i-1) inner i 0 = unsafeRead u 0 >>= \uj -> plusAt au i (eval_A i 0 * uj) inner i j = unsafeRead u j >>= \uj -> plusAt au i (eval_A i j * uj) >> inner i (j-1) eval_At_Times_u :: STUArray s Int Double -> STUArray s Int Double -> ST s () eval_At_Times_u u au = outer (snd $ bounds u) where outer 0 = unsafeWrite au 0 0 >> inner 0 (snd $ bounds u) outer i = unsafeWrite au i 0 >> inner i (snd $ bounds u) >> outer (i-1) inner i 0 = unsafeRead u 0 >>= \uj -> plusAt au i (eval_A 0 i * uj) inner i j = unsafeRead u j >>= \uj -> plusAt au i (eval_A j i * uj) >> inner i (j-1) eval_AtA_Times_u u v = do w <- newArray (bounds u) 0 eval_A_Times_u u w >> eval_At_Times_u w v main = do n <- getArgs >>= return.read.head let (vBv,vv) = runST (do u <- newArray (0,n-1) 1 v <- newArray (0,n-1) 0 sequence_ $ replicate 10 (eval_AtA_Times_u u v >> eval_AtA_Times_u v u) vLoop u v n (0, 0)) putStrLn $ showFFloat (Just 9) (sqrt (vBv/vv)) "" vLoop :: STUArray s Int Double -> STUArray s Int Double -> Int -> (Double,Double) -> ST s (Double,Double) vLoop u v 0 a = return a vLoop u v (i+1) (vBv,vv) = vLoop u v i =<< op where op = do ui <- unsafeRead u i vi <- unsafeRead v i return (vBv+(ui*vi),vv+(vi*vi))
