单线程单进程
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
using BenchmarkTools
println ( "Number of threads: $ ( Threads . nthreads ()) " )
function evaluatefunctions ( N )
#x = linspace(-1500.0, 1500.0, N)
x = range ( - 1500.0 , stop = 1500.0 , length = N )
M = 10000
for i in 1 : M
y = sin . ( x )
x = asin . ( y )
y = cos . ( x )
x = acos . ( y )
y = tan . ( x )
x = atan . ( y )
end
end
@btime evaluatefunctions ( 2000 )
1
2
3
[ misaraty @master test ] $ julia test . jl
Number of threads : 1
1.099 s ( 60000 allocations : 922.85 MiB )
1
2
3
4
5
6
7
8
using BenchmarkTools
println ( "Number of threads: $ ( Threads . nthreads ()) " )
N = 200
A = ones ( N , N )
@btime for i = 1 : N
A [ i , i ] = 6
B = sqrt ( A )
end
1
2
3
[ misaraty @master test ] $ julia test . jl
Number of threads : 1
2.349 s ( 4401 allocations : 320.14 MiB )
多线程
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
using BenchmarkTools
println ( "Number of threads: $ ( Threads . nthreads ()) " )
function evaluatefunctions ( N )
#x = linspace(-1500.0, 1500.0, N)
x = range ( - 1500.0 , stop = 1500.0 , length = N )
M = 10000
Threads . @threads for i in 1 : M
y = sin . ( x )
x = asin . ( y )
y = cos . ( x )
x = acos . ( y )
y = tan . ( x )
x = atan . ( y )
end
end
@btime evaluatefunctions ( 2000 )
1
2
3
4
5
6
7
8
9
[ misaraty @master test ] $ julia - t 1 test . jl
Number of threads : 1
1.141 s ( 240009 allocations : 926.51 MiB )
[ misaraty @master test ] $ julia - t 4 test . jl
Number of threads : 4
310.887 ms ( 240027 allocations : 926.52 MiB )
[ misaraty @master test ] $ julia - t 10 test . jl
Number of threads : 10
129.303 ms ( 240062 allocations : 926.52 MiB )
1
2
3
4
5
6
7
8
using BenchmarkTools
println ( "Number of threads: $ ( Threads . nthreads ()) " )
N = 200
A = ones ( N , N )
@btime Threads . @threads for i = 1 : N
A [ i , i ] = 6
B = sqrt ( A )
end
1
2
3
4
5
6
7
8
9
[ misaraty @master test ] $ julia - t 1 test . jl
Number of threads : 1
2.145 s ( 4207 allocations : 320.13 MiB )
[ misaraty @master test ] $ julia - t 4 test . jl
Number of threads : 4
1.058 s ( 4223 allocations : 320.13 MiB )
[ misaraty @master test ] $ julia - t 10 test . jl
Number of threads : 10
496.962 ms ( 4252 allocations : 320.13 MiB )
多进程
Distributed
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
using BenchmarkTools
using Distributed
println ( "Number of process: $ ( length ( procs ())) " )
function evaluatefunctions ( N )
#x = linspace(-1500.0, 1500.0, N)
x = range ( - 1500.0 , stop = 1500.0 , length = N )
M = 10000
@sync @distributed for i in 1 : M
y = sin . ( x )
x = asin . ( y )
y = cos . ( x )
x = acos . ( y )
y = tan . ( x )
x = atan . ( y )
end
end
@btime evaluatefunctions ( 2000 )
1
2
3
4
5
6
7
8
9
[ misaraty @master test ] $ julia test . jl
Number of process : 1
1.059 s ( 240060 allocations : 926.52 MiB )
[ misaraty @master test ] $ julia - p 3 test . jl
Number of process : 4
374.000 ms ( 396 allocations : 14.55 KiB )
[ misaraty @master test ] $ julia - p 9 test . jl
Number of process : 10
139.397 ms ( 1265 allocations : 45.80 KiB )
1
2
3
4
5
6
7
8
9
using BenchmarkTools
using Distributed
println ( "Number of process: $ ( length ( procs ())) " )
N = 200
A = ones ( N , N )
@btime @sync @distributed for i = 1 : N
A [ i , i ] = 6
B = sqrt ( A )
end
1
2
3
4
5
6
7
8
9
[ misaraty @master test ] $ julia test . jl
Number of process : 1
2.160 s ( 4258 allocations : 320.13 MiB )
[ misaraty @master test ] $ julia - p 3 test . jl
Number of process : 4
469.861 ms ( 333 allocations : 13.06 KiB )
[ misaraty @master test ] $ julia - p 9 test . jl
Number of process : 10
134.422 ms ( 1074 allocations : 41.38 KiB )
MPI.jl
使用MPI.jl前,需自行安装openmpi4.1.2。openmpi1.8.8+intel的安装可参考Openmpi 。
1
2
3
4
5
tar -zxvf openmpi-4.1.2.tar.gz
./configure --prefix= /home/misaraty/soft/openmpi --enable-static
#./configure --prefix=/home/misaraty/soft/openmpi --enable-static CC=gcc CXX=g++ F77=gfortran FC=gfortran F90=gfortran
make
make install
修改.bashrc
或.bash_profile
,
1
2
3
# openmpi
export PATH = $PATH :/home/misaraty/soft/openmpi/bin
export LD_LIBRARY_PATH = /home/misaraty/soft/openmpi/lib
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
using BenchmarkTools
using MPI
MPI . Init ()
# println("Number of process: $(MPI.Comm_size(MPI.COMM_WORLD))")
function evaluatefunctions ( M )
comm = MPI . COMM_WORLD
nprocs = MPI . Comm_size ( comm )
myrank = MPI . Comm_rank ( comm )
N = 10000
ista , iend , nbun = start_and_end ( N , comm )
#x = linspace(-1500.0, 1500.0, M)
x = range ( - 1500.0 , stop = 1500.0 , length = M )
for i in ista : iend
y = sin . ( x )
x = asin . ( y )
y = cos . ( x )
x = acos . ( y )
y = tan . ( x )
x = atan . ( y )
end
end
function start_and_end ( N , comm )
nprocs = MPI . Comm_size ( comm )
myrank = MPI . Comm_rank ( comm )
if N % nprocs != 0
println ( "error! N%procs should be 0." )
end
nbun = div ( N , nprocs )
ista = myrank * nbun + 1
iend = ista + nbun - 1
return ista , iend , nbun
end
@btime evaluatefunctions ( 2000 )
MPI . Finalize ()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
[ misaraty @master test ] $ mpiexecjl - n 1 julia test . jl
1.059 s ( 60000 allocations : 922.85 MiB )
[ misaraty @master test ] $ mpiexecjl - n 4 julia test . jl
262.151 ms ( 15000 allocations : 230.71 MiB )
266.585 ms ( 15000 allocations : 230.71 MiB )
276.770 ms ( 15000 allocations : 230.71 MiB )
272.871 ms ( 15000 allocations : 230.71 MiB )
[ misaraty @master test ] $ mpiexecjl - n 10 julia test . jl
114.083 ms ( 6000 allocations : 92.29 MiB )
114.008 ms ( 6000 allocations : 92.29 MiB )
113.992 ms ( 113.953 ms ( 6000 allocations : 92.29 MiB )
6000 allocations : 92.29 MiB )
113.850 ms ( 6000 allocations : 92.29 MiB )
114.623 ms ( 6000 allocations : 92.29 MiB )
142.945 ms ( 6000 allocations : 92.29 MiB )
114.124 ms ( 6000 allocations : 92.29 MiB )
115.842 ms ( 6000 allocations : 92.29 MiB )
114.577 ms ( 6000 allocations : 92.29 MiB )
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
using BenchmarkTools
using MPI
MPI . Init ()
# println("Number of process: $(MPI.Comm_size(MPI.COMM_WORLD))")
function test ()
comm = MPI . COMM_WORLD
nprocs = MPI . Comm_size ( comm )
myrank = MPI . Comm_rank ( comm )
N = 200
ista , iend , nbun = start_and_end ( N , comm )
A = ones ( N , N )
for i = ista : iend
A [ i , i ] = 6
B = sqrt ( A )
end
end
function start_and_end ( N , comm )
nprocs = MPI . Comm_size ( comm )
myrank = MPI . Comm_rank ( comm )
if N % nprocs != 0
println ( "error! N%procs should be 0." )
end
nbun = div ( N , nprocs )
ista = myrank * nbun + 1
iend = ista + nbun - 1
return ista , iend , nbun
end
@btime test ()
MPI . Finalize ()
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
[ misaraty @master test ] $ mpiexecjl - n 1 julia test . jl
1.756 s ( 4202 allocations : 320.43 MiB )
[ misaraty @master test ] $ mpiexecjl - n 4 julia test . jl
352.608 ms ( 1052 allocations : 80.34 MiB )
408.630 ms ( 1052 allocations : 80.34 MiB )
572.909 ms ( 1052 allocations : 80.34 MiB )
576.099 ms ( 1052 allocations : 80.34 MiB )
[ misaraty @master test ] $ mpiexecjl - n 10 julia test . jl
181.883 ms ( 197.525 ms ( 422 allocations : 32.32 MiB )
172.983 ms ( 422 allocations : 32.32 MiB )
422 allocations : 32.32 MiB )
163.729 ms ( 422 allocations : 32.32 MiB )
309.789 ms ( 422 allocations : 32.32 MiB )
223.014 ms ( 422 allocations : 32.32 MiB )
225.081 ms ( 422 allocations : 32.32 MiB )
138.454 ms ( 422 allocations : 32.32 MiB )
135.505 ms ( 422 allocations : 32.32 MiB )
122.916 ms ( 422 allocations : 32.32 MiB )
文档
总结
Julia并行测试