UYBHM Yaz Çalıştayı. 15 – 26 Haziran 2009. Parallel Programming. Application:
Matrix Multiplication. (
UYBHM Yaz Çalıştayı 15 – 26 Haziran 2009
Parallel Programming Application: Matrix Multiplication (
[email protected])
!"#$%&' " !"#$%&'!()#%*)%+"#%,-'./0123'4*5$"#%,-6 " 7"--,-'1)8,$%#9: " ;,&'1)8,$%#9:
!
!
()#*%+,("$#%-$%.)#%/&, 0123456 !"#$%"&"$'$"(#)*+,-."/0""1/!'& 23+%"4-*+#56(#)6(75)8()6!9()6&9()619+$)"$: ; ""+$)"+9 ?3*"8+/@>+A$>+BB:; """"?3*"8(5-#%")'#%:5'+,:*)5&%#A',B' 4.!36 !
4'*%)$,()#*%+,("$#%-$%.)#%/&
" C($%-8'#95'B%$=#'%#5$"#%,-',B'),,*'D"$%"E)5'"##95' B%$=#':"#$%&'$#$,F'"-?'"))'#95'+,)(:-=',B':"#$%&'%# "$5'(=5?'#,'+,:*(#5'#95'5)5:5-#=',B'#95'B%$=#' $5=()#':"#$%&'$,F !
!
7)*#%#%/&%&8,()#*%.'9:1$/.;, 4#*%--%&8
" G-%B,$:'E),+HI=#$%*5?'*"$#%#%,-%-8',B'JK'&'JK':"#$%&',-'L' *$,+5==,$= !
!
7)*#%#%/&%&8,()#*%.'9:, /)*?
!
!
7)*)$$'$,()#*%+,("$#%-$%.)#%/& # !"#$%$%&'%'()%'$&)*+,-"$#%./0
" 2(**,=5'#95':"#$%&'%='?%D%?5?'%-#,''M'=(E:"#$%+5=N'O"+9' =(E:"#$%&'9"='!P'#(#!P'#5)5:5-#=N'G=%-8'#95'-,#"#%,-'1*@>'"='#95' =(E:"#$%&'%-'=(E:"#$%&'$,F')#"-?'=(E:"#$%&'+,)(:-'*Q !"#$%&$'$()$&$*$+)$&,,!"#$%.$'$()$.$*$+)$.,,-$/ 0&1.$'$()$23$4567#$656869:+$"!$+;$32 !"#$%#$'$()$#$*$8)$#,,-$23$+;$8;5:=&5=47:="9$79?$32 $$$$0&1.$'$0&1.$,$@&1#$3$A#1.)23$7??$:"$744;8;57:=9B$ +;$32 C
" )G)$9!C"$"7 ''',$'"'+,::(-%+"#%,-'#%:5'+,:*)5&%#A',B'F5"$97) ,$'F5$%7
!
!
H&I)12(%$3" Both n matrices A and B are partitioned among p processors so that each processor initially stores
" The algorithm uses one to all broadcasts of the blocks of matrix A in processor rows, and single-step circular upwards shifts of the blocks of matrix B along processor columns " Initially, each diagonal block Aii is selected for broadcast !
!
H&I)12(%$3" 2#5*='.$5*5"#5?'Z*'#%:5=6 # /$,"?+"=#'1%@%'#,'"))'*$,+5==,$='%-'#95'$,F # !()#%*)A'E),+H',B'1'$5+5%D5?'F%#9'$5=%?5-#'E),+H' ,B'/ # 25-?'#95'E),+H',B'/'(*',-5'=#5*'.F%#9' F$"*"$,(-?6 # 25)5+#'E),+H'1%@.W[J6:,?Z*'.F95$5'1%@W'%='#95'E),+H' E$,"?+"=#'%-'#95'*$5D%,(='=#5*6'"-?'E$,"?+"=#' #,'"))'*$,+5==,$='%-'$,FN'U,'#,'M !
!
H&I)12(%$3-)567 " R-%#%"))A'E$,"?+"=#'#95'?%"8,-")'5)5:5-#=',B'1
,TT -TT
-TJ
-JT
,JJ -JJ
-JM
-MJ
,MM -MM
-M3
-3M
,33 -33
-MT -3T !
-3J
-TM
-T3 -J3
+TT = ,TT -TT
+JT = ,JJ -JT
+TJ = ,TT -TJ
+JJ = ,JJ -JJ
+TM = ,TT -TM
+JM = ,JJ -JM
+T3 = ,TT -T3
+J3 = ,JJ -J3
+MT = ,MM -MT +3T = ,33 -3T +MJ = ,MM -MJ
+3J = ,33 -3J
+MM = ,MM -MM +3M = ,33 -3M +M3 = ,MM -M3 +33 = ,33 -33 !
H&I)12(%$3-)587 " /$,"?+"=#'#95'-5'5)5:5-#',B'1'%-'$,F=@'=9%B#'/'%-'+,)(:-' "-?'*5$B,$:':()#%*)%+"#%,-
-JT -MT -3T ,3T -TT !
,TJ -JJ
-JM
-MJ
,JM -MM
-M3
+T3 + = ,TJ -J3 +M3 + = ,M3 -33
-3M
,M3 -33
+JT + = ,JM -MT +3T + = ,3T -TT
-3J
+TT + = ,TJ -JT +MT + = ,M3 -3T
-J3
+TJ + = ,TJ -JJ
+MJ + = ,M3 -3J
+TM + = ,TJ -JM +MM + = ,M3 -3M
+JJ + = ,JM -MJ
+3J + = ,3T -TJ
+JM + = ,JM -MM +3M + = ,3T -TM
-TJ
-TM
+J3 + = ,JM -M3 +33 + = ,3T -T3
-T3 !
H&I)12(%$3-)597 " /$,"?+"=#'#95'-5'5)5:5-#',B'1'%-'$,F=@'=9%B#'/'%-'+,)(:-' "-?'*5$B,$:':()#%*)%+"#%,-
-MT -3T
!
-MJ -3J
,MT -TT
-TJ
-JT
,3J -JJ
,TM -MM
-M3
-3M
,J3 -33
+TT + = ,TM -MT +JT + = ,J3 -3T +TJ + = ,TM -MJ
+JJ + = ,J3 -3J
+TM + = ,TM -MM +JM + = ,J3 -3M +T3 + = ,TM -M3
+J3 + = ,J3 -33
+MT + = ,MT -TT +3T + = ,3J -JT
-TM -JM
+MJ + = ,MT -TJ
-T3 -J3
+3J + = ,3J -JJ
+MM + = ,MT -TM +3M + = ,3J -JM +M3 + = ,MT -T3 +33 + = ,3J -J3 !
H&I)12(%$3-)5:7 " /$,"?+"=#'#95'-5'5)5:5-#',B'1'%-'$,F=@'=9%B#'/'%-'+,)(:-' "-?'*5$B,$:':()#%*)%+"#%,-
-3T
-3M
,JT -TT
-TJ
-JT
,MJ -JJ
-JM
-MJ
,3M -MM
-MT !
-3J
-TM
,T3 -33 -T3 -J3 -M3 !
+TT + = ,T3 -3T
+JT + = ,JT -TT
+TJ + = ,T3 -3J
+JJ + = ,JT -TJ
+TM + = ,T3 -3M
+JM + = ,JT -TM
+T3 + = ,T3 -33
+J3 + = ,JT -T3
+MT + = ,MJ -JT
+3T + = ,3M -MT
+MJ + = ,MJ -JJ
+3J + = ,3M -MJ
+MM + = ,MJ -JM
+3M + = ,3M -MM
+M3 + = ,MJ -J3
+33 + = ,3M -M3
H&I)12(%$3-)5J7 " 29%B#%-8'%=',D5$N'2#,*'#95' R