added tests
This commit is contained in:
@@ -0,0 +1,66 @@
|
||||
SHELL=/bin/sh
|
||||
BENCHMARK=bt
|
||||
BENCHMARKU=BT
|
||||
|
||||
include ../config/make.def
|
||||
include ../sys/make.common
|
||||
|
||||
SOURCES = bt.fdv \
|
||||
set_constants.fdv \
|
||||
initialize.fdv \
|
||||
exact_solution.fdv \
|
||||
verify.fdv \
|
||||
compute_errors.fdv \
|
||||
timers.fdv \
|
||||
print_result.fdv
|
||||
|
||||
SOURCES_MPI = z_solve_mpi.fdv y_solve_mpi.fdv x_solve_mpi.fdv compute_rhs_mpi.fdv exact_rhs.fdv
|
||||
SOURCES_SINGLE = z_solve.fdv y_solve.fdv x_solve.fdv compute_rhs.fdv exact_rhs.fdv
|
||||
SOURCES_BLOCK = z_solve_block.fdv y_solve_block.fdv x_solve_block.fdv compute_rhs_block.fdv exact_rhs_block.fdv
|
||||
SOURCES_BLOCK1 = z_solve_block.fdv y_solve_block.fdv x_solve_block.fdv compute_rhs.fdv exact_rhs.fdv
|
||||
SOURCES_BLOCK2 = z_solve_block.fdv y_solve_block.fdv x_solve_block.fdv compute_rhs_block2.fdv exact_rhs.fdv
|
||||
|
||||
OBJS = ${SOURCES:.fdv=.o}
|
||||
OBJS_SINGLE = ${SOURCES_SINGLE:.fdv=.o}
|
||||
OBJS_MPI = ${SOURCES_MPI:.fdv=.o}
|
||||
OBJS_BLOCK = ${SOURCES_BLOCK:.fdv=.o}
|
||||
OBJS_BLOCK1 = ${SOURCES_BLOCK1:.fdv=.o}
|
||||
OBJS_BLOCK2 = ${SOURCES_BLOCK2:.fdv=.o}
|
||||
|
||||
${PROGRAM}: config
|
||||
@if [ "$(VERSION)" = "MPI" ] ; then \
|
||||
${MAKE} MPI_VER; \
|
||||
else \
|
||||
if [ "$(VERSION)" = "BLOCK" ] ; then \
|
||||
${MAKE} BLOCK_VER; \
|
||||
else \
|
||||
if [ "$(VERSION)" = "BLOCK1" ] ; then \
|
||||
${MAKE} BLOCK_VER1; \
|
||||
else \
|
||||
${MAKE} SINGLE_VER;\
|
||||
fi \
|
||||
fi \
|
||||
fi
|
||||
|
||||
MPI_VER: $(OBJS) $(OBJS_MPI)
|
||||
${FLINK} -o ${PROGRAM} ${OBJS} $(OBJS_MPI)
|
||||
|
||||
SINGLE_VER: $(OBJS) $(OBJS_SINGLE)
|
||||
${FLINK} -o ${PROGRAM} ${OBJS} $(OBJS_SINGLE)
|
||||
|
||||
BLOCK_VER: $(OBJS) $(OBJS_BLOCK)
|
||||
${FLINK} -o ${PROGRAM} ${OBJS} $(OBJS_BLOCK)
|
||||
|
||||
BLOCK_VER1: $(OBJS) $(OBJS_BLOCK1)
|
||||
${FLINK} -o ${PROGRAM} ${OBJS} $(OBJS_BLOCK1)
|
||||
|
||||
BLOCK_VER2: $(OBJS) $(OBJS_BLOCK2)
|
||||
${FLINK} -o ${PROGRAM} ${OBJS} $(OBJS_BLOCK2)
|
||||
|
||||
%.o: %.fdv npbparams.h header3d.h
|
||||
${F77} ${FFLAGS} -c -o $@ $<
|
||||
|
||||
clean:
|
||||
rm -f npbparams.h
|
||||
rm -f *.o *~
|
||||
rm -f *.cu *.cuf *.c *.f
|
||||
@@ -0,0 +1,12 @@
|
||||
@echo off
|
||||
|
||||
set CLASS=%1
|
||||
set OPT=%2
|
||||
|
||||
CALL ..\sys\setparams BT %CLASS%
|
||||
CALL %F77% %OPT% bt 1>out_%CLASS%.txt 2>err_%CLASS%.txt
|
||||
if exist bt.exe (
|
||||
copy bt.exe %BIN%\bt.%CLASS%.x.exe
|
||||
del bt.exe
|
||||
)
|
||||
|
||||
@@ -0,0 +1,120 @@
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
program btdv3
|
||||
|
||||
include 'header3d.h'
|
||||
integer i,niter,step,fstatus,n3
|
||||
double precision navg,mflops
|
||||
external timer_read,verify
|
||||
double precision tmax,timer_read
|
||||
logical verified
|
||||
character class
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Root node reads input file (if it exists) else takes
|
||||
! defaults from parameters
|
||||
!---------------------------------------------------------------------
|
||||
write (unit = *,fmt = 1000)
|
||||
open (unit = 2,file = 'inputbt.data',status = 'old',iostat = fstat
|
||||
&us)
|
||||
if (fstatus .eq. 0) then
|
||||
write (unit = *,fmt = 233)
|
||||
233 format(' Reading from input file inputbt.data')
|
||||
read (unit = 2,fmt = *) niter
|
||||
read (unit = 2,fmt = *) dt
|
||||
read (unit = 2,fmt = *) grid_points(1),grid_points(2),grid_poin
|
||||
&ts(3)
|
||||
close (unit = 2)
|
||||
else
|
||||
write (unit = *,fmt = 234)
|
||||
niter = niter_default
|
||||
dt = dt_default
|
||||
grid_points(1) = problem_size
|
||||
grid_points(2) = problem_size
|
||||
grid_points(3) = problem_size
|
||||
endif
|
||||
234 format(' No input file inputbt.data. Using compiled defaults')
|
||||
write (unit = *,fmt = 1001) grid_points(1),grid_points(2),grid_poi
|
||||
&nts(3)
|
||||
write (unit = *,fmt = 1002) niter,dt
|
||||
1000 format(//, ' NAS Parallel Benchmarks 3.3.1 - DVMH version',' - BT
|
||||
&Benchmark ',/)
|
||||
1001 format(' Size: ', i3, 'x', i3, 'x', i3)
|
||||
1002 format(' Iterations: ', i3, ' dt: ', F10.6)
|
||||
if (grid_points(1) .gt. imax .or. grid_points(2) .gt. jmax .or. gr
|
||||
&id_points(3) .gt. kmax) then
|
||||
print *, (grid_points(i), i = 1,3)
|
||||
print *, ' Problem size too big for compiled array sizes'
|
||||
goto 999
|
||||
endif
|
||||
open (unit = 2,file = 'inputStage',status = 'old',iostat = fstat
|
||||
&us)
|
||||
if (fstatus .eq. 0) then
|
||||
read (unit = 2,fmt = *) stage_n
|
||||
close (unit = 2)
|
||||
else
|
||||
stage_n = 0
|
||||
endif
|
||||
write(*,*) 'stage = ', stage_n
|
||||
|
||||
call set_constants()
|
||||
call initialize()
|
||||
call exact_rhs()
|
||||
|
||||
! ************* DO 2 iterations for touch all code
|
||||
call adi_first
|
||||
call adi_first
|
||||
call initialize
|
||||
|
||||
call timer_clear(1)
|
||||
call timer_start(1)
|
||||
do step = 1,niter
|
||||
if (mod (step,20) .eq. 0 .or. step .eq. 1) then
|
||||
write (unit = *,fmt = 200) step
|
||||
200 format(' Time step ', i8)
|
||||
endif
|
||||
call adi()
|
||||
enddo
|
||||
call timer_stop(1)
|
||||
tmax = timer_read (1)
|
||||
call verify(niter,class,verified)
|
||||
n3 = grid_points(1) * grid_points(2) * grid_points(3)
|
||||
navg = (grid_points(1) + grid_points(2) + grid_points(3)) / 3.0
|
||||
if (tmax .ne. 0.) then
|
||||
mflops = 1.0e-6 * float (niter) * (3478.8 * float (n3) - 17655.
|
||||
&7 * navg** 2 + 28023.7 * navg) / tmax
|
||||
else
|
||||
mflops = 0.0
|
||||
endif
|
||||
call print_results('BT',class,grid_points(1),grid_points(2),grid_p
|
||||
&oints(3),niter,tmax,mflops,' floating point',verified,npb
|
||||
&version)
|
||||
|
||||
! ,compiletime, cs1, cs2, cs3, cs4, cs5,cs6, '(none)')
|
||||
999 continue
|
||||
end
|
||||
|
||||
subroutine adi_first()
|
||||
call compute_rhs()
|
||||
call x_solve()
|
||||
call y_solve()
|
||||
call z_solve()
|
||||
return
|
||||
end
|
||||
|
||||
subroutine adi ()
|
||||
|
||||
!DVM$ interval 1
|
||||
call compute_rhs()
|
||||
!DVM$ end interval
|
||||
!DVM$ interval 11
|
||||
call x_solve()
|
||||
!DVM$ end interval
|
||||
!DVM$ interval 12
|
||||
call y_solve()
|
||||
!DVM$ end interval
|
||||
!DVM$ interval 13
|
||||
call z_solve()
|
||||
!DVM$ end interval
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,117 @@
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! this function computes the norm of the difference between the
|
||||
! computed solution and the exact solution
|
||||
!---------------------------------------------------------------------
|
||||
subroutine error_norm (rms)
|
||||
|
||||
include 'header3d.h'
|
||||
integer i,j,k,m,d
|
||||
double precision xi,eta,zeta,u_exact(5),rms(5),add
|
||||
double precision r1,r2,r3,r4,r5
|
||||
do m = 1,5
|
||||
rms(m) = 0.0d0
|
||||
enddo
|
||||
r1 = 0.0d0
|
||||
r2 = 0.0d0
|
||||
r3 = 0.0d0
|
||||
r4 = 0.0d0
|
||||
r5 = 0.0d0
|
||||
|
||||
!DVM$ region
|
||||
!DVM$ PARALLEL (k,j,i) ON u(*,i,j,k),
|
||||
!DVM$& REDUCTION(SUM(r1),SUM(r2),SUM(r3),SUM(r4),SUM(r5)),
|
||||
!DVM$&private(u_exact,xi,eta,zeta,m,add)
|
||||
do k = 0,problem_size - 1
|
||||
do j = 0,problem_size - 1
|
||||
do i = 0,problem_size - 1
|
||||
zeta = dble (k) * dnzm1
|
||||
eta = dble (j) * dnym1
|
||||
xi = dble (i) * dnxm1
|
||||
|
||||
! call exact_solution(xi, eta, zeta, u_exact)
|
||||
do m = 1,5
|
||||
u_exact(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) +
|
||||
& xi * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m,6
|
||||
&) + eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta *
|
||||
&(ce(m,7) + zeta * (ce(m,10) + zeta * ce(m,13))))
|
||||
enddo
|
||||
add = u(1,i,j,k) - u_exact(1)
|
||||
r1 = r1 + add * add
|
||||
add = u(2,i,j,k) - u_exact(2)
|
||||
r2 = r2 + add * add
|
||||
add = u(3,i,j,k) - u_exact(3)
|
||||
r3 = r3 + add * add
|
||||
add = u(4,i,j,k) - u_exact(4)
|
||||
r4 = r4 + add * add
|
||||
add = u(5,i,j,k) - u_exact(5)
|
||||
r5 = r5 + add * add
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!DVM$ end region
|
||||
rms(1) = r1
|
||||
rms(2) = r2
|
||||
rms(3) = r3
|
||||
rms(4) = r4
|
||||
rms(5) = r5
|
||||
do m = 1,5
|
||||
do d = 1,3
|
||||
rms(m) = rms(m) / dble (grid_points(d) - 2)
|
||||
enddo
|
||||
rms(m) = dsqrt (rms(m))
|
||||
enddo
|
||||
return
|
||||
end
|
||||
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
subroutine rhs_norm (rms)
|
||||
|
||||
include 'header3d.h'
|
||||
integer i,j,k,d,m
|
||||
double precision rms(5),add,r1,r2,r3,r4,r5
|
||||
r1 = 0.0d0
|
||||
r2 = 0.0d0
|
||||
r3 = 0.0d0
|
||||
r4 = 0.0d0
|
||||
r5 = 0.0d0
|
||||
|
||||
!DVM$ region
|
||||
!DVM$ PARALLEL (k,j,i) ON rhs(*,i,j,k),
|
||||
!DVM$&REDUCTION(SUM(r1),SUM(r2),SUM(r3),SUM(r4),SUM(r5)),
|
||||
!DVM$&private(add)
|
||||
do k = 1,problem_size - 2
|
||||
do j = 1,problem_size - 2
|
||||
do i = 1,problem_size - 2
|
||||
add = rhs(1,i,j,k)
|
||||
r1 = r1 + add * add
|
||||
add = rhs(2,i,j,k)
|
||||
r2 = r2 + add * add
|
||||
add = rhs(3,i,j,k)
|
||||
r3 = r3 + add * add
|
||||
add = rhs(4,i,j,k)
|
||||
r4 = r4 + add * add
|
||||
add = rhs(5,i,j,k)
|
||||
r5 = r5 + add * add
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!DVM$ end region
|
||||
rms(1) = r1
|
||||
rms(2) = r2
|
||||
rms(3) = r3
|
||||
rms(4) = r4
|
||||
rms(5) = r5
|
||||
do m = 1,5
|
||||
do d = 1,3
|
||||
rms(m) = rms(m) / dble (grid_points(d) - 2)
|
||||
enddo
|
||||
rms(m) = dsqrt (rms(m))
|
||||
enddo
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,218 @@
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
subroutine compute_rhs ()
|
||||
|
||||
include 'header3d.h'
|
||||
integer i,j,k,m
|
||||
double precision rho_inv,uijk,up1,um1,vijk,vp1,vm1,wijk,wp1,wm1,r
|
||||
&hs_(5)
|
||||
|
||||
!DVM$ region out(rho_i, us, vs, ws, qs, square)
|
||||
!DVM$ PARALLEL (k,j,i) ON us(i,j,k), SHADOW_COMPUTE,
|
||||
!DVM$& PRIVATE(rho_inv,m),cuda_block(128)
|
||||
do k = 0,problem_size - 1
|
||||
do j = 0,problem_size - 1
|
||||
do i = 0,problem_size - 1
|
||||
rho_inv = 1.0d0 / u(1,i,j,k)
|
||||
rho_i(i,j,k) = rho_inv
|
||||
us(i,j,k) = u(2,i,j,k) * rho_inv
|
||||
vs(i,j,k) = u(3,i,j,k) * rho_inv
|
||||
ws(i,j,k) = u(4,i,j,k) * rho_inv
|
||||
square(i,j,k) = 0.5d0 * (u(2,i,j,k) * u(2,i,j,k) + u(3,i,
|
||||
&j,k) * u(3,i,j,k) + u(4,i,j,k) * u(4,i,j,k)) * rho_inv
|
||||
qs(i,j,k) = square(i,j,k) * rho_inv
|
||||
do m = 1,5
|
||||
rhs(m,i,j,k) = forcing(m,i,j,k)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! compute xi-direction fluxes
|
||||
!---------------------------------------------------------------------
|
||||
!DVM$ PARALLEL (k,j,i) ON rhs(*,i,j,k), PRIVATE(uijk,up1,um1,m,
|
||||
!DVM$&vijk,vp1,vm1,wijk,wp1,wm1,rhs_),cuda_block(32)
|
||||
do k = 1,problem_size - 2
|
||||
do j = 1,problem_size - 2
|
||||
do i = 1,problem_size - 2
|
||||
uijk = us(i,j,k)
|
||||
up1 = us(i + 1,j,k)
|
||||
um1 = us(i - 1,j,k)
|
||||
rhs_(1) = forcing(1,i,j,k)
|
||||
rhs_(2) = forcing(2,i,j,k)
|
||||
rhs_(3) = forcing(3,i,j,k)
|
||||
rhs_(4) = forcing(4,i,j,k)
|
||||
rhs_(5) = forcing(5,i,j,k)
|
||||
|
||||
rhs_(1) = rhs_(1) + dx1tx1 * (u(1,i + 1,j,k) - 2.0d0 * u(
|
||||
&1,i,j,k) + u(1,i - 1,j,k)) - tx2 * (u(2,i + 1,j,k) - u(2,i - 1,j,k
|
||||
&))
|
||||
rhs_(2) = rhs_(2) + dx2tx1 * (u(2,i + 1,j,k) - 2.0d0 * u(
|
||||
&2,i,j,k) + u(2,i - 1,j,k)) + xxcon2 * con43 * (up1 - 2.0d0 * uijk
|
||||
&+ um1) - tx2 * (u(2,i + 1,j,k) * up1 - u(2,i - 1,j,k) * um1 + (u(5
|
||||
&,i + 1,j,k) - square(i + 1,j,k) - u(5,i - 1,j,k) + square(i - 1,j,
|
||||
&k)) * c2)
|
||||
rhs_(3) = rhs_(3) + dx3tx1 * (u(3,i + 1,j,k) - 2.0d0 * u(
|
||||
&3,i,j,k) + u(3,i - 1,j,k)) + xxcon2 * (vs(i + 1,j,k) - 2.0d0 * vs(
|
||||
&i,j,k) + vs(i - 1,j,k)) - tx2 * (u(3,i + 1,j,k) * up1 - u(3,i - 1,
|
||||
&j,k) * um1)
|
||||
rhs_(4) = rhs_(4) + dx4tx1 * (u(4,i + 1,j,k) - 2.0d0 * u(
|
||||
&4,i,j,k) + u(4,i - 1,j,k)) + xxcon2 * (ws(i + 1,j,k) - 2.0d0 * ws(
|
||||
&i,j,k) + ws(i - 1,j,k)) - tx2 * (u(4,i + 1,j,k) * up1 - u(4,i - 1,
|
||||
&j,k) * um1)
|
||||
rhs_(5) = rhs_(5) + dx5tx1 * (u(5,i + 1,j,k) - 2.0d0 * u(
|
||||
&5,i,j,k) + u(5,i - 1,j,k)) + xxcon3 * (qs(i + 1,j,k) - 2.0d0 * qs(
|
||||
&i,j,k) + qs(i - 1,j,k)) + xxcon4 * (up1 * up1 - 2.0d0 * uijk * uij
|
||||
&k + um1 * um1) + xxcon5 * (u(5,i + 1,j,k) * rho_i(i + 1,j,k) - 2.0
|
||||
&d0 * u(5,i,j,k) * rho_i(i,j,k) + u(5,i - 1,j,k) * rho_i(i - 1,j,k)
|
||||
&) - tx2 * ((c1 * u(5,i + 1,j,k) - c2 * square(i + 1,j,k)) * up1 -
|
||||
&(c1 * u(5,i - 1,j,k) - c2 * square(i - 1,j,k)) * um1)
|
||||
if (i .eq. 1) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (5.0d0 * u(m,i,j,k) - 4.
|
||||
&0d0 * u(m,i + 1,j,k) + u(m,i + 2,j,k))
|
||||
enddo
|
||||
else if (i .eq. 2) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * ((-(4.0d0)) * u(m,i - 1,
|
||||
&j,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i + 1,j,k) + u(m,i + 2,j,k
|
||||
&))
|
||||
enddo
|
||||
else if (i .ge. 3 .and. i .le. problem_size - 4) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (u(m,i - 2,j,k) - 4.0d0
|
||||
&* u(m,i - 1,j,k) + 6.0 * u(m,i,j,k) - 4.0d0 * u(m,i + 1,j,k) + u(m
|
||||
&,i + 2,j,k))
|
||||
enddo
|
||||
else if (i .eq. problem_size - 3) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (u(m,i - 2,j,k) - 4.0d0
|
||||
&* u(m,i - 1,j,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i + 1,j,k))
|
||||
enddo
|
||||
else if (i .eq. problem_size - 2) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (u(m,i - 2,j,k) - 4.d0 *
|
||||
& u(m,i - 1,j,k) + 5.d0 * u(m,i,j,k))
|
||||
enddo
|
||||
endif
|
||||
vijk = vs(i,j,k)
|
||||
vp1 = vs(i,j + 1,k)
|
||||
vm1 = vs(i,j - 1,k)
|
||||
rhs_(1) = rhs_(1) + dy1ty1 * (u(1,i,j + 1,k) - 2.0d0 * u(
|
||||
&1,i,j,k) + u(1,i,j - 1,k)) - ty2 * (u(3,i,j + 1,k) - u(3,i,j - 1,k
|
||||
&))
|
||||
rhs_(2) = rhs_(2) + dy2ty1 * (u(2,i,j + 1,k) - 2.0d0 * u(
|
||||
&2,i,j,k) + u(2,i,j - 1,k)) + yycon2 * (us(i,j + 1,k) - 2.0d0 * us(
|
||||
&i,j,k) + us(i,j - 1,k)) - ty2 * (u(2,i,j + 1,k) * vp1 - u(2,i,j -
|
||||
&1,k) * vm1)
|
||||
rhs_(3) = rhs_(3) + dy3ty1 * (u(3,i,j + 1,k) - 2.0d0 * u(
|
||||
&3,i,j,k) + u(3,i,j - 1,k)) + yycon2 * con43 * (vp1 - 2.0d0 * vijk
|
||||
&+ vm1) - ty2 * (u(3,i,j + 1,k) * vp1 - u(3,i,j - 1,k) * vm1 + (u(5
|
||||
&,i,j + 1,k) - square(i,j + 1,k) - u(5,i,j - 1,k) + square(i,j - 1,
|
||||
&k)) * c2)
|
||||
rhs_(4) = rhs_(4) + dy4ty1 * (u(4,i,j + 1,k) - 2.0d0 * u(
|
||||
&4,i,j,k) + u(4,i,j - 1,k)) + yycon2 * (ws(i,j + 1,k) - 2.0d0 * ws(
|
||||
&i,j,k) + ws(i,j - 1,k)) - ty2 * (u(4,i,j + 1,k) * vp1 - u(4,i,j -
|
||||
&1,k) * vm1)
|
||||
rhs_(5) = rhs_(5) + dy5ty1 * (u(5,i,j + 1,k) - 2.0d0 * u(
|
||||
&5,i,j,k) + u(5,i,j - 1,k)) + yycon3 * (qs(i,j + 1,k) - 2.0d0 * qs(
|
||||
&i,j,k) + qs(i,j - 1,k)) + yycon4 * (vp1 * vp1 - 2.0d0 * vijk * vij
|
||||
&k + vm1 * vm1) + yycon5 * (u(5,i,j + 1,k) * rho_i(i,j + 1,k) - 2.0
|
||||
&d0 * u(5,i,j,k) * rho_i(i,j,k) + u(5,i,j - 1,k) * rho_i(i,j - 1,k)
|
||||
&) - ty2 * ((c1 * u(5,i,j + 1,k) - c2 * square(i,j + 1,k)) * vp1 -
|
||||
&(c1 * u(5,i,j - 1,k) - c2 * square(i,j - 1,k)) * vm1)
|
||||
if (j .eq. 1) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (5.0d0 * u(m,i,j,k) - 4.
|
||||
&0d0 * u(m,i,j + 1,k) + u(m,i,j + 2,k))
|
||||
enddo
|
||||
else if (j .eq. 2) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * ((-(4.0d0)) * u(m,i,j -
|
||||
&1,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j + 1,k) + u(m,i,j + 2,k
|
||||
&))
|
||||
enddo
|
||||
else if (j .ge. 3 .and. j .le. problem_size - 4) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (u(m,i,j - 2,k) - 4.0d0
|
||||
&* u(m,i,j - 1,k) + 6.0 * u(m,i,j,k) - 4.0d0 * u(m,i,j + 1,k) + u(m
|
||||
&,i,j + 2,k))
|
||||
enddo
|
||||
else if (j .eq. problem_size - 3) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (u(m,i,j - 2,k) - 4.0d0
|
||||
&* u(m,i,j - 1,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j + 1,k))
|
||||
enddo
|
||||
else if (j .eq. problem_size - 2) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (u(m,i,j - 2,k) - 4.d0 *
|
||||
& u(m,i,j - 1,k) + 5.d0 * u(m,i,j,k))
|
||||
enddo
|
||||
endif
|
||||
wijk = ws(i,j,k)
|
||||
wp1 = ws(i,j,k + 1)
|
||||
wm1 = ws(i,j,k - 1)
|
||||
rhs_(1) = rhs_(1) + dz1tz1 * (u(1,i,j,k + 1) - 2.0d0 * u(
|
||||
&1,i,j,k) + u(1,i,j,k - 1)) - tz2 * (u(4,i,j,k + 1) - u(4,i,j,k - 1
|
||||
&))
|
||||
rhs_(2) = rhs_(2) + dz2tz1 * (u(2,i,j,k + 1) - 2.0d0 * u(
|
||||
&2,i,j,k) + u(2,i,j,k - 1)) + zzcon2 * (us(i,j,k + 1) - 2.0d0 * us(
|
||||
&i,j,k) + us(i,j,k - 1)) - tz2 * (u(2,i,j,k + 1) * wp1 - u(2,i,j,k
|
||||
&- 1) * wm1)
|
||||
rhs_(3) = rhs_(3) + dz3tz1 * (u(3,i,j,k + 1) - 2.0d0 * u(
|
||||
&3,i,j,k) + u(3,i,j,k - 1)) + zzcon2 * (vs(i,j,k + 1) - 2.0d0 * vs(
|
||||
&i,j,k) + vs(i,j,k - 1)) - tz2 * (u(3,i,j,k + 1) * wp1 - u(3,i,j,k
|
||||
&- 1) * wm1)
|
||||
rhs_(4) = rhs_(4) + dz4tz1 * (u(4,i,j,k + 1) - 2.0d0 * u(
|
||||
&4,i,j,k) + u(4,i,j,k - 1)) + zzcon2 * con43 * (wp1 - 2.0d0 * wijk
|
||||
&+ wm1) - tz2 * (u(4,i,j,k + 1) * wp1 - u(4,i,j,k - 1) * wm1 + (u(5
|
||||
&,i,j,k + 1) - square(i,j,k + 1) - u(5,i,j,k - 1) + square(i,j,k -
|
||||
&1)) * c2)
|
||||
rhs_(5) = rhs_(5) + dz5tz1 * (u(5,i,j,k + 1) - 2.0d0 * u(
|
||||
&5,i,j,k) + u(5,i,j,k - 1)) + zzcon3 * (qs(i,j,k + 1) - 2.0d0 * qs(
|
||||
&i,j,k) + qs(i,j,k - 1)) + zzcon4 * (wp1 * wp1 - 2.0d0 * wijk * wij
|
||||
&k + wm1 * wm1) + zzcon5 * (u(5,i,j,k + 1) * rho_i(i,j,k + 1) - 2.0
|
||||
&d0 * u(5,i,j,k) * rho_i(i,j,k) + u(5,i,j,k - 1) * rho_i(i,j,k - 1)
|
||||
&) - tz2 * ((c1 * u(5,i,j,k + 1) - c2 * square(i,j,k + 1)) * wp1 -
|
||||
&(c1 * u(5,i,j,k - 1) - c2 * square(i,j,k - 1)) * wm1)
|
||||
if (k .eq. 1) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (5.0d0 * u(m,i,j,k) - 4.
|
||||
&0d0 * u(m,i,j,k + 1) + u(m,i,j,k + 2))
|
||||
enddo
|
||||
else if (k .eq. 2) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * ((-(4.0d0)) * u(m,i,j,k
|
||||
&- 1) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j,k + 1) + u(m,i,j,k + 2
|
||||
&))
|
||||
enddo
|
||||
else if (k .ge. 3 .and. k .le. problem_size - 4) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (u(m,i,j,k - 2) - 4.0d0
|
||||
&* u(m,i,j,k - 1) + 6.0 * u(m,i,j,k) - 4.0d0 * u(m,i,j,k + 1) + u(m
|
||||
&,i,j,k + 2))
|
||||
enddo
|
||||
else if (k .eq. problem_size - 3) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (u(m,i,j,k - 2) - 4.0d0
|
||||
&* u(m,i,j,k - 1) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j,k + 1))
|
||||
enddo
|
||||
else if (k .eq. problem_size - 2) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (u(m,i,j,k - 2) - 4.d0 *
|
||||
& u(m,i,j,k - 1) + 5.d0 * u(m,i,j,k))
|
||||
enddo
|
||||
endif
|
||||
rhs(1,i,j,k) = rhs_(1) * dt
|
||||
rhs(2,i,j,k) = rhs_(2) * dt
|
||||
rhs(3,i,j,k) = rhs_(3) * dt
|
||||
rhs(4,i,j,k) = rhs_(4) * dt
|
||||
rhs(5,i,j,k) = rhs_(5) * dt
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!DVM$ end region
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,484 @@
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
subroutine compute_rhs ()
|
||||
|
||||
include 'header3d.h'
|
||||
integer i,j,k,m,z
|
||||
double precision rho_inv,uijk,up1,um1,vijk,vp1,vm1,wijk,wp1,wm1,r
|
||||
&hs_(5),s1,s2,s3,s4,s5,s6,s7,qs1,qs2,qs3,qs4,qs5,qs6,qs7
|
||||
double precision ue_(-2:2, 5), buf_(-2:2, 5),cuf_(-2:2),q_(-2:2)
|
||||
double precision dtemp(5), xi, eta, zeta, dtpp
|
||||
!DVM$ region
|
||||
!DVM$ PARALLEL (k,j,i) ON us(i,j,k),PRIVATE(m),cuda_block(128)
|
||||
do k = 0,problem_size - 1
|
||||
do j = 0,problem_size - 1
|
||||
do i = 0,problem_size - 1
|
||||
do m = 1,5
|
||||
rhs(m,i,j,k) = 0
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! compute xi-direction fluxes
|
||||
!---------------------------------------------------------------------
|
||||
!DVM$ PARALLEL (k,j,i) ON rhs(*,i,j,k), PRIVATE(uijk,up1,um1,m,
|
||||
!DVM$&vijk,vp1,vm1,wijk,wp1,wm1,rhs_,s1,s2,s3,s4,s5,s6,s7,
|
||||
!DVM$&qs1,qs2,qs3,qs4,qs5,qs6,qs7,
|
||||
!DVM$&zeta,eta,xi,dtemp,buf_,cuf_,q_,dtpp,z,ue_),cuda_block(32)
|
||||
do k = 1,problem_size - 2
|
||||
do j = 1,problem_size - 2
|
||||
do i = 1,problem_size - 2
|
||||
rhs_(1) = 0
|
||||
rhs_(2) = 0
|
||||
rhs_(3) = 0
|
||||
rhs_(4) = 0
|
||||
rhs_(5) = 0
|
||||
|
||||
zeta = dble(k) * dnzm1
|
||||
eta = dble(j) * dnym1
|
||||
do z = -2, 2
|
||||
xi = dble(i + z) * dnxm1
|
||||
do m = 1, 5
|
||||
dtemp(m) = ce(m,1) +
|
||||
> xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) + xi*ce(m,11)))) +
|
||||
> eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9) + eta*ce(m,12))))+
|
||||
> zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) +
|
||||
> zeta*ce(m,13))))
|
||||
end do
|
||||
do m = 1, 5
|
||||
ue_(z,m) = dtemp(m)
|
||||
end do
|
||||
dtpp = 1.0d0 / dtemp(1)
|
||||
do m = 2, 5
|
||||
buf_(z, m) = dtpp * dtemp(m)
|
||||
end do
|
||||
|
||||
cuf_(z) = buf_(z,2) * buf_(z,2)
|
||||
buf_(z,1) = cuf_(z) + buf_(z,3) * buf_(z,3) +
|
||||
> buf_(z,4) * buf_(z,4)
|
||||
q_(z) = 0.5d0*(buf_(z,2)*ue_(z,2) + buf_(z,3)*
|
||||
> ue_(z,3) + buf_(z,4)*ue_(z,4))
|
||||
enddo
|
||||
|
||||
rhs_(1) = rhs_(1) -
|
||||
> tx2*( ue_(1,2)-ue_(-1,2) )+
|
||||
> dx1tx1*(ue_(1,1)-2.0d0*ue_(0,1)+ue_(-1,1))
|
||||
|
||||
rhs_(2) = rhs_(2) - tx2 * (
|
||||
> (ue_(1,2)*buf_(1,2)+c2*(ue_(1,5)-q_(1)))-
|
||||
> (ue_(-1,2)*buf_(-1,2)+c2*(ue_(-1,5)-q_(-1))))+
|
||||
> xxcon1*(buf_(1,2)-2.0d0*buf_(0,2)+buf_(-1,2))+
|
||||
> dx2tx1*( ue_(1,2)-2.0d0* ue_(0,2)+ue_(-1,2))
|
||||
|
||||
rhs_(3) = rhs_(3) - tx2 * (
|
||||
> ue_(1,3)*buf_(1,2)-ue_(-1,3)*buf_(-1,2))+
|
||||
> xxcon2*(buf_(1,3)-2.0d0*buf_(0,3)+buf_(-1,3))+
|
||||
> dx3tx1*( ue_(1,3)-2.0d0*ue_(0,3) +ue_(-1,3))
|
||||
|
||||
rhs_(4) = rhs_(4) - tx2*(
|
||||
> ue_(1,4)*buf_(1,2)-ue_(-1,4)*buf_(-1,2))+
|
||||
> xxcon2*(buf_(1,4)-2.0d0*buf_(0,4)+buf_(-1,4))+
|
||||
> dx4tx1*( ue_(1,4)-2.0d0* ue_(0,4)+ ue_(-1,4))
|
||||
|
||||
rhs_(5) = rhs_(5) - tx2*(
|
||||
> buf_(1,2)*(c1*ue_(1,5)-c2*q_(1))-
|
||||
> buf_(-1,2)*(c1*ue_(-1,5)-c2*q_(-1)))+
|
||||
> 0.5d0*xxcon3*(buf_(1,1)-2.0d0*buf_(0,1)+
|
||||
> buf_(-1,1))+
|
||||
> xxcon4*(cuf_(1)-2.0d0*cuf_(0)+cuf_(-1))+
|
||||
> xxcon5*(buf_(1,5)-2.0d0*buf_(0,5)+buf_(-1,5))+
|
||||
> dx5tx1*( ue_(1,5)-2.0d0* ue_(0,5)+ ue_(-1,5))
|
||||
do m = 1, 5
|
||||
if(i .eq. 1) then
|
||||
rhs_(m) = rhs_(m) - dssp *
|
||||
> (5.0d0*ue_(0,m) - 4.0d0*ue_(1,m) +ue_(2,m))
|
||||
else if(i .eq. 2) then
|
||||
rhs_(m) = rhs_(m) - dssp *
|
||||
> (-4.0d0*ue_(-1,m) + 6.0d0*ue_(0,m) -
|
||||
> 4.0d0*ue_(1,m) + ue_(2,m))
|
||||
else if(i .eq. problem_size-3) then
|
||||
rhs_(m) = rhs_(m) - dssp *
|
||||
> (ue_(-2,m) - 4.0d0*ue_(-1,m) +
|
||||
> 6.0d0*ue_(0,m) - 4.0d0*ue_(1,m))
|
||||
else if(i .eq. problem_size-2) then
|
||||
rhs_(m) = rhs_(m) - dssp *
|
||||
> (ue_(-2,m) - 4.0d0*ue_(-1,m) + 5.0d0*ue_(0,m))
|
||||
else
|
||||
rhs_(m) = rhs_(m) - dssp*
|
||||
> (ue_(-2,m) - 4.0d0*ue_(-1,m) +
|
||||
> 6.0d0*ue_(0,m) - 4.0d0*ue_(1,m) + ue_(2,m))
|
||||
endif
|
||||
end do
|
||||
|
||||
zeta = dble(k) * dnzm1
|
||||
xi = dble(i) * dnxm1
|
||||
do z = -2, 2
|
||||
eta = dble(j + z) * dnym1
|
||||
do m = 1, 5
|
||||
dtemp(m) = ce(m,1) +
|
||||
> xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) + xi*ce(m,11)))) +
|
||||
> eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9) + eta*ce(m,12))))+
|
||||
> zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) +
|
||||
> zeta*ce(m,13))))
|
||||
end do
|
||||
do m = 1, 5
|
||||
ue_(z,m) = dtemp(m)
|
||||
end do
|
||||
dtpp = 1.0d0 / dtemp(1)
|
||||
do m = 2, 5
|
||||
buf_(z, m) = dtpp * dtemp(m)
|
||||
end do
|
||||
|
||||
cuf_(z) = buf_(z,3) * buf_(z,3)
|
||||
buf_(z,1) = cuf_(z) + buf_(z,2) * buf_(z,2) +
|
||||
> buf_(z,4) * buf_(z,4)
|
||||
q_(z) = 0.5d0*(buf_(z,2)*ue_(z,2) + buf_(z,3)
|
||||
> *ue_(z,3) + buf_(z,4) * ue_(z,4))
|
||||
enddo
|
||||
|
||||
rhs_(1) = rhs_(1) -
|
||||
> ty2*( ue_(1,3)-ue_(-1,3) )+
|
||||
> dy1ty1*(ue_(1,1)-2.0d0*ue_(0,1)+ue_(-1,1))
|
||||
|
||||
rhs_(2) = rhs_(2) - ty2*(
|
||||
> ue_(1,2)*buf_(1,3)-ue_(-1,2)*buf_(-1,3))+
|
||||
> yycon2*(buf_(1,2)-2.0d0*buf_(0,2)+buf_(-1,2))+
|
||||
> dy2ty1*( ue_(1,2)-2.0* ue_(0,2)+ ue_(-1,2))
|
||||
|
||||
rhs_(3) = rhs_(3) - ty2*(
|
||||
> (ue_(1,3)*buf_(1,3)+c2*(ue_(1,5)-q_(1)))-
|
||||
> (ue_(-1,3)*buf_(-1,3)+c2*(ue_(-1,5)-q_(-1))))+
|
||||
> yycon1*(buf_(1,3)-2.0d0*buf_(0,3)+buf_(-1,3))+
|
||||
> dy3ty1*( ue_(1,3)-2.0d0*ue_(0,3) +ue_(-1,3))
|
||||
|
||||
rhs_(4) = rhs_(4) - ty2*(
|
||||
> ue_(1,4)*buf_(1,3)-ue_(-1,4)*buf_(-1,3))+
|
||||
> yycon2*(buf_(1,4)-2.0d0*buf_(0,4)+buf_(-1,4))+
|
||||
> dy4ty1*( ue_(1,4)-2.0d0*ue_(0,4)+ ue_(-1,4))
|
||||
|
||||
rhs_(5) = rhs_(5) - ty2*(
|
||||
> buf_(1,3)*(c1*ue_(1,5)-c2*q_(1))-
|
||||
> buf_(-1,3)*(c1*ue_(-1,5)-c2*q_(-1)))+
|
||||
> 0.5d0*yycon3*(buf_(1,1)-2.0d0*buf_(0,1)+
|
||||
> buf_(-1,1))+
|
||||
> yycon4*(cuf_(1)-2.0d0*cuf_(0)+cuf_(-1))+
|
||||
> yycon5*(buf_(1,5)-2.0d0*buf_(0,5)+buf_(-1,5))+
|
||||
> dy5ty1*(ue_(1,5)-2.0d0*ue_(0,5)+ue_(-1,5))
|
||||
do m = 1, 5
|
||||
if(j .eq. 1) then
|
||||
rhs_(m) = rhs_(m) - dssp *
|
||||
> (5.0d0*ue_(0,m) - 4.0d0*ue_(1,m) +ue_(2,m))
|
||||
else if(j .eq. 2) then
|
||||
rhs_(m) = rhs_(m) - dssp *
|
||||
> (-4.0d0*ue_(-1,m) + 6.0d0*ue_(0,m) -
|
||||
> 4.0d0*ue_(1,m) + ue_(2,m))
|
||||
else if(j .eq. problem_size-3) then
|
||||
rhs_(m) = rhs_(m) - dssp *
|
||||
> (ue_(-2,m) - 4.0d0*ue_(-1,m) +
|
||||
> 6.0d0*ue_(0,m) - 4.0d0*ue_(1,m))
|
||||
else if(j .eq. problem_size-2) then
|
||||
rhs_(m) = rhs_(m) - dssp *
|
||||
> (ue_(-2,m) - 4.0d0*ue_(-1,m) + 5.0d0*ue_(0,m))
|
||||
else
|
||||
rhs_(m) = rhs_(m) - dssp*
|
||||
> (ue_(-2,m) - 4.0d0*ue_(-1,m) +
|
||||
> 6.0d0*ue_(0,m) - 4.0d0*ue_(1,m) + ue_(2,m))
|
||||
endif
|
||||
end do
|
||||
|
||||
xi = dble(i) * dnxm1
|
||||
eta = dble(j) * dnym1
|
||||
do z = -2, 2
|
||||
zeta = dble(k + z) * dnzm1
|
||||
do m = 1, 5
|
||||
dtemp(m) = ce(m,1) +
|
||||
> xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) + xi*ce(m,11)))) +
|
||||
> eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9) + eta*ce(m,12))))+
|
||||
> zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) +
|
||||
> zeta*ce(m,13))))
|
||||
end do
|
||||
do m = 1, 5
|
||||
ue_(z,m) = dtemp(m)
|
||||
end do
|
||||
dtpp = 1.0d0 / dtemp(1)
|
||||
do m = 2, 5
|
||||
buf_(z, m) = dtpp * dtemp(m)
|
||||
end do
|
||||
|
||||
cuf_(z) = buf_(z,4) * buf_(z,4)
|
||||
buf_(z,1) = cuf_(z) + buf_(z,2) * buf_(z,2) +
|
||||
> buf_(z,3) * buf_(z,3)
|
||||
q_(z) = 0.5d0*(buf_(z,2)*ue_(z,2) + buf_(z,3)*
|
||||
> ue_(z,3) + buf_(z,4)*ue_(z,4))
|
||||
enddo
|
||||
|
||||
rhs_(1) = rhs_(1) -
|
||||
> tz2*( ue_(1,4)-ue_(-1,4) )+
|
||||
> dz1tz1*(ue_(1,1)-2.0d0*ue_(0,1)+ue_(-1,1))
|
||||
|
||||
rhs_(2) = rhs_(2) - tz2 * (
|
||||
> ue_(1,2)*buf_(1,4)-ue_(-1,2)*buf_(-1,4))+
|
||||
> zzcon2*(buf_(1,2)-2.0d0*buf_(0,2)+buf_(-1,2))+
|
||||
> dz2tz1*( ue_(1,2)-2.0d0* ue_(0,2)+ ue_(-1,2))
|
||||
|
||||
rhs_(3) = rhs_(3) - tz2 * (
|
||||
> ue_(1,3)*buf_(1,4)-ue_(-1,3)*buf_(-1,4))+
|
||||
> zzcon2*(buf_(1,3)-2.0d0*buf_(0,3)+buf_(-1,3))+
|
||||
> dz3tz1*(ue_(1,3)-2.0d0*ue_(0,3)+ue_(-1,3))
|
||||
|
||||
rhs_(4) = rhs_(4) - tz2 * (
|
||||
> (ue_(1,4)*buf_(1,4)+c2*(ue_(1,5)-q_(1)))-
|
||||
> (ue_(-1,4)*buf_(-1,4)+c2*(ue_(-1,5)-q_(-1))))+
|
||||
> zzcon1*(buf_(1,4)-2.0d0*buf_(0,4)+buf_(-1,4))+
|
||||
> dz4tz1*( ue_(1,4)-2.0d0*ue_(0,4) +ue_(-1,4))
|
||||
|
||||
rhs_(5) = rhs_(5) - tz2 * (
|
||||
> buf_(1,4)*(c1*ue_(1,5)-c2*q_(1))-
|
||||
> buf_(-1,4)*(c1*ue_(-1,5)-c2*q_(-1)))+
|
||||
> 0.5d0*zzcon3*(buf_(1,1)-2.0d0*buf_(0,1)
|
||||
> +buf_(-1,1))+
|
||||
> zzcon4*(cuf_(1)-2.0d0*cuf_(0)+cuf_(-1))+
|
||||
> zzcon5*(buf_(1,5)-2.0d0*buf_(0,5)+buf_(-1,5))+
|
||||
> dz5tz1*( ue_(1,5)-2.0d0*ue_(0,5)+ ue_(-1,5))
|
||||
do m = 1, 5
|
||||
if(k .eq. 1) then
|
||||
rhs_(m) = rhs_(m) - dssp *
|
||||
> (5.0d0*ue_(0,m) - 4.0d0*ue_(1,m) +ue_(2,m))
|
||||
else if(k .eq. 2) then
|
||||
rhs_(m) = rhs_(m) - dssp *
|
||||
> (-4.0d0*ue_(-1,m) + 6.0d0*ue_(0,m) -
|
||||
> 4.0d0*ue_(1,m) + ue_(2,m))
|
||||
else if(k .eq. problem_size-3) then
|
||||
rhs_(m) = rhs_(m) - dssp *
|
||||
> (ue_(-2,m) - 4.0d0*ue_(-1,m) +
|
||||
> 6.0d0*ue_(0,m) - 4.0d0*ue_(1,m))
|
||||
else if(k .eq. problem_size-2) then
|
||||
rhs_(m) = rhs_(m) - dssp *
|
||||
> (ue_(-2,m) - 4.0d0*ue_(-1,m) + 5.0d0*ue_(0,m))
|
||||
else
|
||||
rhs_(m) = rhs_(m) - dssp*
|
||||
> (ue_(-2,m) - 4.0d0*ue_(-1,m) +
|
||||
> 6.0d0*ue_(0,m) - 4.0d0*ue_(1,m) + ue_(2,m))
|
||||
endif
|
||||
end do
|
||||
|
||||
do m = 1, 5
|
||||
rhs_(m) = -1.d0 * rhs_(m)
|
||||
end do
|
||||
|
||||
uijk = u(2,i,j,k) / u(1,i,j,k)
|
||||
up1 = u(2,i + 1,j,k) / u(1,i + 1,j,k)
|
||||
um1 = u(2,i - 1,j,k) / u(1,i - 1,j,k)
|
||||
|
||||
vijk = u(3,i,j,k) / u(1,i,j,k)
|
||||
vp1 = u(3,i,j + 1,k) / u(1,i,j + 1,k)
|
||||
vm1 = u(3,i,j - 1,k) / u(1,i,j - 1,k)
|
||||
|
||||
wijk = u(4,i,j,k) / u(1,i,j,k)
|
||||
wp1 = u(4,i,j,k + 1) / u(1,i,j,k + 1)
|
||||
wm1 = u(4,i,j,k - 1) / u(1,i,j,k - 1)
|
||||
|
||||
s1 = 0.5d0 * (u(2,i,j,k) * u(2,i,j,k) + u(3,i,
|
||||
&j,k) * u(3,i,j,k) + u(4,i,j,k) * u(4,i,j,k)) / u(1,i,j,k)
|
||||
s2 = 0.5d0 * (u(2,i+1,j,k) * u(2,i+1,j,k) + u(3,i
|
||||
&+1,j,k) * u(3,i+1,j,k) + u(4,i+1,j,k) * u(4,i+1,j,k)) /
|
||||
&u(1,i+1,j,k)
|
||||
s3 = 0.5d0 * (u(2,i-1,j,k) * u(2,i-1,j,k) + u(3,i
|
||||
&-1,j,k) * u(3,i-1,j,k) + u(4,i-1,j,k) * u(4,i-1,j,k)) /
|
||||
&u(1,i-1,j,k)
|
||||
s4 = 0.5d0 * (u(2,i,j+1,k) * u(2,i,j+1,k) + u(3,i,
|
||||
&j+1,k) * u(3,i,j+1,k) + u(4,i,j+1,k) * u(4,i,j+1,k)) /
|
||||
&u(1,i,j+1,k)
|
||||
s5 = 0.5d0 * (u(2,i,j-1,k) * u(2,i,j-1,k) + u(3,i,
|
||||
&j-1,k) * u(3,i,j-1,k) + u(4,i,j-1,k) * u(4,i,j-1,k)) /
|
||||
&u(1,i,j-1,k)
|
||||
s6 = 0.5d0 * (u(2,i,j,k+1) * u(2,i,j,k+1) + u(3,i,
|
||||
&j,k+1) * u(3,i,j,k+1) + u(4,i,j,k+1) * u(4,i,j,k+1)) /
|
||||
&u(1,i,j,k+1)
|
||||
s7 = 0.5d0 * (u(2,i,j,k-1) * u(2,i,j,k-1) + u(3,i,
|
||||
&j,k-1) * u(3,i,j,k-1) + u(4,i,j,k-1) * u(4,i,j,k-1)) /
|
||||
&u(1,i,j,k-1)
|
||||
|
||||
qs1 = s1 / u(1,i,j,k)
|
||||
qs2 = s2 / u(1,i+1,j,k)
|
||||
qs3 = s3 / u(1,i-1,j,k)
|
||||
qs4 = s4 / u(1,i,j+1,k)
|
||||
qs5 = s5 / u(1,i,j-1,k)
|
||||
qs6 = s6 / u(1,i,j,k+1)
|
||||
qs7 = s7 / u(1,i,j,k-1)
|
||||
|
||||
! rhs_(1) = forcing(1,i,j,k)
|
||||
! rhs_(2) = forcing(2,i,j,k)
|
||||
! rhs_(3) = forcing(3,i,j,k)
|
||||
! rhs_(4) = forcing(4,i,j,k)
|
||||
! rhs_(5) = forcing(5,i,j,k)
|
||||
|
||||
rhs_(1) = rhs_(1) + dx1tx1 * (u(1,i + 1,j,k) - 2.0d0 * u(
|
||||
&1,i,j,k) + u(1,i - 1,j,k)) - tx2 * (u(2,i + 1,j,k) - u(2,i - 1,j,k
|
||||
&))
|
||||
rhs_(2) = rhs_(2) + dx2tx1 * (u(2,i + 1,j,k) - 2.0d0 * u(
|
||||
&2,i,j,k) + u(2,i - 1,j,k)) + xxcon2 * con43 * (up1 - 2.0d0 * uijk
|
||||
&+ um1) - tx2 * (u(2,i + 1,j,k) * up1 - u(2,i - 1,j,k) * um1 + (u(5
|
||||
&,i + 1,j,k) - s2 - u(5,i - 1,j,k) + s3) * c2)
|
||||
rhs_(3) = rhs_(3) + dx3tx1 * (u(3,i + 1,j,k) - 2.0d0 * u(
|
||||
&3,i,j,k) + u(3,i - 1,j,k)) + xxcon2 * (u(3,i + 1,j,k) /
|
||||
&u(1,i + 1,j,k) - 2.0d0 * vijk + u(3,i - 1,j,k)/u(1,i - 1,j,k))
|
||||
&- tx2 * (u(3,i + 1,j,k) * up1 - u(3,i - 1,j,k) * um1)
|
||||
rhs_(4) = rhs_(4) + dx4tx1 * (u(4,i + 1,j,k) - 2.0d0 * u(
|
||||
&4,i,j,k) + u(4,i - 1,j,k)) + xxcon2 * (u(4,i + 1,j,k) /
|
||||
&u(1,i + 1,j,k) - 2.0d0 * wijk + u(4,i - 1,j,k) / u(1,i - 1,j,k))
|
||||
&- tx2 * (u(4,i + 1,j,k) * up1 - u(4,i - 1,j,k) * um1)
|
||||
rhs_(5) = rhs_(5) + dx5tx1 * (u(5,i + 1,j,k) - 2.0d0 * u(
|
||||
&5,i,j,k) + u(5,i - 1,j,k)) + xxcon3 * (qs2 - 2.0d0 * qs1 +
|
||||
&qs3) + xxcon4 * (up1 * up1 - 2.0d0 * uijk * uij
|
||||
&k + um1 * um1) + xxcon5 * (u(5,i + 1,j,k) * 1.0d0 / u(1,i + 1,j,k)
|
||||
& - 2.0d0 * u(5,i,j,k) * 1.0d0 / u(1,i,j,k) + u(5,i - 1,j,k) *
|
||||
&1.0d0 / u(1,i - 1,j,k)) - tx2 * ((c1 * u(5,i + 1,j,k) -
|
||||
&c2 * s2) * up1 -
|
||||
&(c1 * u(5,i - 1,j,k) - c2 * s3) * um1)
|
||||
if (i .eq. 1) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (5.0d0 * u(m,i,j,k) - 4.
|
||||
&0d0 * u(m,i + 1,j,k) + u(m,i + 2,j,k))
|
||||
enddo
|
||||
else if (i .eq. 2) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * ((-(4.0d0)) * u(m,i - 1,
|
||||
&j,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i + 1,j,k) + u(m,i + 2,j,k
|
||||
&))
|
||||
enddo
|
||||
else if (i .ge. 3 .and. i .le. problem_size - 4) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (u(m,i - 2,j,k) - 4.0d0
|
||||
&* u(m,i - 1,j,k) + 6.0 * u(m,i,j,k) - 4.0d0 * u(m,i + 1,j,k) + u(m
|
||||
&,i + 2,j,k))
|
||||
enddo
|
||||
else if (i .eq. problem_size - 3) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (u(m,i - 2,j,k) - 4.0d0
|
||||
&* u(m,i - 1,j,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i + 1,j,k))
|
||||
enddo
|
||||
else if (i .eq. problem_size - 2) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (u(m,i - 2,j,k) - 4.d0 *
|
||||
& u(m,i - 1,j,k) + 5.d0 * u(m,i,j,k))
|
||||
enddo
|
||||
endif
|
||||
|
||||
rhs_(1) = rhs_(1) + dy1ty1 * (u(1,i,j + 1,k) - 2.0d0 * u(
|
||||
&1,i,j,k) + u(1,i,j - 1,k)) - ty2 * (u(3,i,j + 1,k) - u(3,i,j - 1,k
|
||||
&))
|
||||
rhs_(2) = rhs_(2) + dy2ty1 * (u(2,i,j + 1,k) - 2.0d0 * u(
|
||||
&2,i,j,k) + u(2,i,j - 1,k)) + yycon2 * (u(2,i,j + 1,k) /
|
||||
&u(1,i,j + 1,k) - 2.0d0 * uijk + u(2,i,j - 1,k)/u(1,i,j - 1,k))
|
||||
&- ty2 * (u(2,i,j + 1,k) * vp1 - u(2,i,j -1,k) * vm1)
|
||||
rhs_(3) = rhs_(3) + dy3ty1 * (u(3,i,j + 1,k) - 2.0d0 * u(
|
||||
&3,i,j,k) + u(3,i,j - 1,k)) + yycon2 * con43 * (vp1 - 2.0d0 * vijk
|
||||
&+ vm1) - ty2 * (u(3,i,j + 1,k) * vp1 - u(3,i,j - 1,k) * vm1 + (u(5
|
||||
&,i,j + 1,k) - s4 - u(5,i,j - 1,k) + s5) * c2)
|
||||
rhs_(4) = rhs_(4) + dy4ty1 * (u(4,i,j + 1,k) - 2.0d0 * u(
|
||||
&4,i,j,k) + u(4,i,j - 1,k)) + yycon2 * (u(4,i,j + 1,k) /
|
||||
&u(1,i,j + 1,k) - 2.0d0 * wijk + u(4,i,j - 1,k) / u(1,i,j - 1,k))
|
||||
&- ty2 * (u(4,i,j + 1,k) * vp1 - u(4,i,j - 1,k) * vm1)
|
||||
rhs_(5) = rhs_(5) + dy5ty1 * (u(5,i,j + 1,k) - 2.0d0 * u(
|
||||
&5,i,j,k) + u(5,i,j - 1,k)) + yycon3 * (qs4 - 2.0d0 * qs1
|
||||
& + qs5) + yycon4 * (vp1 * vp1 - 2.0d0 * vijk * vij
|
||||
&k + vm1 * vm1) + yycon5 * (u(5,i,j + 1,k) * 1.0d0 / u(1,i,j + 1,k)
|
||||
&- 2.0d0 * u(5,i,j,k) * 1.0d0 / u(1,i,j,k) + u(5,i,j - 1,k) *
|
||||
&1.0d0 / u(1,i,j - 1,k)
|
||||
&) - ty2 * ((c1 * u(5,i,j + 1,k) - c2 * s4) * vp1 -
|
||||
&(c1 * u(5,i,j - 1,k) - c2 * s5) * vm1)
|
||||
if (j .eq. 1) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (5.0d0 * u(m,i,j,k) - 4.
|
||||
&0d0 * u(m,i,j + 1,k) + u(m,i,j + 2,k))
|
||||
enddo
|
||||
else if (j .eq. 2) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * ((-(4.0d0)) * u(m,i,j -
|
||||
&1,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j + 1,k) + u(m,i,j + 2,k
|
||||
&))
|
||||
enddo
|
||||
else if (j .ge. 3 .and. j .le. problem_size - 4) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (u(m,i,j - 2,k) - 4.0d0
|
||||
&* u(m,i,j - 1,k) + 6.0 * u(m,i,j,k) - 4.0d0 * u(m,i,j + 1,k) + u(m
|
||||
&,i,j + 2,k))
|
||||
enddo
|
||||
else if (j .eq. problem_size - 3) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (u(m,i,j - 2,k) - 4.0d0
|
||||
&* u(m,i,j - 1,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j + 1,k))
|
||||
enddo
|
||||
else if (j .eq. problem_size - 2) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (u(m,i,j - 2,k) - 4.d0 *
|
||||
& u(m,i,j - 1,k) + 5.d0 * u(m,i,j,k))
|
||||
enddo
|
||||
endif
|
||||
|
||||
rhs_(1) = rhs_(1) + dz1tz1 * (u(1,i,j,k + 1) - 2.0d0 * u(
|
||||
&1,i,j,k) + u(1,i,j,k - 1)) - tz2 * (u(4,i,j,k + 1) - u(4,i,j,k - 1
|
||||
&))
|
||||
rhs_(2) = rhs_(2) + dz2tz1 * (u(2,i,j,k + 1) - 2.0d0 * u(
|
||||
&2,i,j,k) + u(2,i,j,k - 1)) + zzcon2 * (u(2,i,j,k + 1) /
|
||||
&u(1,i,j,k + 1) - 2.0d0 * uijk + u(2,i,j,k - 1) / u(1,i,j,k - 1))
|
||||
&- tz2 * (u(2,i,j,k + 1) * wp1 - u(2,i,j,k - 1) * wm1)
|
||||
rhs_(3) = rhs_(3) + dz3tz1 * (u(3,i,j,k + 1) - 2.0d0 * u(
|
||||
&3,i,j,k) + u(3,i,j,k - 1)) + zzcon2 * (u(3,i,j,k + 1) /
|
||||
&u(1,i,j,k + 1) - 2.0d0 * vijk + u(3,i,j,k - 1) / u(1,i,j,k - 1))
|
||||
&- tz2 * (u(3,i,j,k + 1) * wp1 - u(3,i,j,k - 1) * wm1)
|
||||
rhs_(4) = rhs_(4) + dz4tz1 * (u(4,i,j,k + 1) - 2.0d0 * u(
|
||||
&4,i,j,k) + u(4,i,j,k - 1)) + zzcon2 * con43 * (wp1 - 2.0d0 * wijk
|
||||
&+ wm1) - tz2 * (u(4,i,j,k + 1) * wp1 - u(4,i,j,k - 1) * wm1 + (u(5
|
||||
&,i,j,k + 1) - s6 - u(5,i,j,k - 1) + s7) * c2)
|
||||
rhs_(5) = rhs_(5) + dz5tz1 * (u(5,i,j,k + 1) - 2.0d0 * u(
|
||||
&5,i,j,k) + u(5,i,j,k - 1)) + zzcon3 * (qs6 - 2.0d0 * qs1 +
|
||||
&qs7) + zzcon4 * (wp1 * wp1 - 2.0d0 * wijk * wij
|
||||
&k + wm1 * wm1) + zzcon5 * (u(5,i,j,k + 1) * 1.0d0 / u(1,i,j,k+1)
|
||||
&- 2.0d0 * u(5,i,j,k) * 1.0d0 / u(1,i,j,k) + u(5,i,j,k - 1) *
|
||||
&1.0d0 / u(1,i,j,k-1)
|
||||
&) - tz2 * ((c1 * u(5,i,j,k + 1) - c2 * s6) * wp1 -
|
||||
&(c1 * u(5,i,j,k - 1) - c2 * s7) * wm1)
|
||||
if (k .eq. 1) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (5.0d0 * u(m,i,j,k) - 4.
|
||||
&0d0 * u(m,i,j,k + 1) + u(m,i,j,k + 2))
|
||||
enddo
|
||||
else if (k .eq. 2) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * ((-(4.0d0)) * u(m,i,j,k
|
||||
&- 1) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j,k + 1) + u(m,i,j,k + 2
|
||||
&))
|
||||
enddo
|
||||
else if (k .ge. 3 .and. k .le. problem_size - 4) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (u(m,i,j,k - 2) - 4.0d0
|
||||
&* u(m,i,j,k - 1) + 6.0 * u(m,i,j,k) - 4.0d0 * u(m,i,j,k + 1) + u(m
|
||||
&,i,j,k + 2))
|
||||
enddo
|
||||
else if (k .eq. problem_size - 3) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (u(m,i,j,k - 2) - 4.0d0
|
||||
&* u(m,i,j,k - 1) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j,k + 1))
|
||||
enddo
|
||||
else if (k .eq. problem_size - 2) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (u(m,i,j,k - 2) - 4.d0 *
|
||||
& u(m,i,j,k - 1) + 5.d0 * u(m,i,j,k))
|
||||
enddo
|
||||
endif
|
||||
rhs(1,i,j,k) = rhs_(1) * dt
|
||||
rhs(2,i,j,k) = rhs_(2) * dt
|
||||
rhs(3,i,j,k) = rhs_(3) * dt
|
||||
rhs(4,i,j,k) = rhs_(4) * dt
|
||||
rhs(5,i,j,k) = rhs_(5) * dt
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!DVM$ end region
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,247 @@
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
subroutine compute_rhs ()
|
||||
|
||||
include 'header3d.h'
|
||||
integer i,j,k,m,z
|
||||
double precision rho_inv,uijk,up1,um1,vijk,vp1,vm1,wijk,wp1,wm1,r
|
||||
&hs_(5),s1,s2,s3,s4,s5,s6,s7,qs1,qs2,qs3,qs4,qs5,qs6,qs7
|
||||
!DVM$ region
|
||||
!DVM$ PARALLEL (k,j,i) ON us(i,j,k),PRIVATE(m),cuda_block(128)
|
||||
do k = 0,problem_size - 1
|
||||
do j = 0,problem_size - 1
|
||||
do i = 0,problem_size - 1
|
||||
do m = 1,5
|
||||
rhs(m,i,j,k) = 0
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! compute xi-direction fluxes
|
||||
!---------------------------------------------------------------------
|
||||
!DVM$ PARALLEL (k,j,i) ON rhs(*,i,j,k), PRIVATE(uijk,up1,um1,m,
|
||||
!DVM$&vijk,vp1,vm1,wijk,wp1,wm1,rhs_,s1,s2,s3,s4,s5,s6,s7,
|
||||
!DVM$&qs1,qs2,qs3,qs4,qs5,qs6,qs7),cuda_block(32)
|
||||
do k = 1,problem_size - 2
|
||||
do j = 1,problem_size - 2
|
||||
do i = 1,problem_size - 2
|
||||
do m = 1, 5
|
||||
rhs_(m) = forcing(m,i,j,k)
|
||||
end do
|
||||
|
||||
uijk = u(2,i,j,k) / u(1,i,j,k)
|
||||
up1 = u(2,i + 1,j,k) / u(1,i + 1,j,k)
|
||||
um1 = u(2,i - 1,j,k) / u(1,i - 1,j,k)
|
||||
|
||||
vijk = u(3,i,j,k) / u(1,i,j,k)
|
||||
vp1 = u(3,i,j + 1,k) / u(1,i,j + 1,k)
|
||||
vm1 = u(3,i,j - 1,k) / u(1,i,j - 1,k)
|
||||
|
||||
wijk = u(4,i,j,k) / u(1,i,j,k)
|
||||
wp1 = u(4,i,j,k + 1) / u(1,i,j,k + 1)
|
||||
wm1 = u(4,i,j,k - 1) / u(1,i,j,k - 1)
|
||||
|
||||
s1 = 0.5d0 * (u(2,i,j,k) * u(2,i,j,k) + u(3,i,
|
||||
&j,k) * u(3,i,j,k) + u(4,i,j,k) * u(4,i,j,k)) / u(1,i,j,k)
|
||||
s2 = 0.5d0 * (u(2,i+1,j,k) * u(2,i+1,j,k) + u(3,i
|
||||
&+1,j,k) * u(3,i+1,j,k) + u(4,i+1,j,k) * u(4,i+1,j,k)) /
|
||||
&u(1,i+1,j,k)
|
||||
s3 = 0.5d0 * (u(2,i-1,j,k) * u(2,i-1,j,k) + u(3,i
|
||||
&-1,j,k) * u(3,i-1,j,k) + u(4,i-1,j,k) * u(4,i-1,j,k)) /
|
||||
&u(1,i-1,j,k)
|
||||
s4 = 0.5d0 * (u(2,i,j+1,k) * u(2,i,j+1,k) + u(3,i,
|
||||
&j+1,k) * u(3,i,j+1,k) + u(4,i,j+1,k) * u(4,i,j+1,k)) /
|
||||
&u(1,i,j+1,k)
|
||||
s5 = 0.5d0 * (u(2,i,j-1,k) * u(2,i,j-1,k) + u(3,i,
|
||||
&j-1,k) * u(3,i,j-1,k) + u(4,i,j-1,k) * u(4,i,j-1,k)) /
|
||||
&u(1,i,j-1,k)
|
||||
s6 = 0.5d0 * (u(2,i,j,k+1) * u(2,i,j,k+1) + u(3,i,
|
||||
&j,k+1) * u(3,i,j,k+1) + u(4,i,j,k+1) * u(4,i,j,k+1)) /
|
||||
&u(1,i,j,k+1)
|
||||
s7 = 0.5d0 * (u(2,i,j,k-1) * u(2,i,j,k-1) + u(3,i,
|
||||
&j,k-1) * u(3,i,j,k-1) + u(4,i,j,k-1) * u(4,i,j,k-1)) /
|
||||
&u(1,i,j,k-1)
|
||||
|
||||
qs1 = s1 / u(1,i,j,k)
|
||||
qs2 = s2 / u(1,i+1,j,k)
|
||||
qs3 = s3 / u(1,i-1,j,k)
|
||||
qs4 = s4 / u(1,i,j+1,k)
|
||||
qs5 = s5 / u(1,i,j-1,k)
|
||||
qs6 = s6 / u(1,i,j,k+1)
|
||||
qs7 = s7 / u(1,i,j,k-1)
|
||||
|
||||
! rhs_(1) = forcing(1,i,j,k)
|
||||
! rhs_(2) = forcing(2,i,j,k)
|
||||
! rhs_(3) = forcing(3,i,j,k)
|
||||
! rhs_(4) = forcing(4,i,j,k)
|
||||
! rhs_(5) = forcing(5,i,j,k)
|
||||
|
||||
rhs_(1) = rhs_(1) + dx1tx1 * (u(1,i + 1,j,k) - 2.0d0 * u(
|
||||
&1,i,j,k) + u(1,i - 1,j,k)) - tx2 * (u(2,i + 1,j,k) - u(2,i - 1,j,k
|
||||
&))
|
||||
rhs_(2) = rhs_(2) + dx2tx1 * (u(2,i + 1,j,k) - 2.0d0 * u(
|
||||
&2,i,j,k) + u(2,i - 1,j,k)) + xxcon2 * con43 * (up1 - 2.0d0 * uijk
|
||||
&+ um1) - tx2 * (u(2,i + 1,j,k) * up1 - u(2,i - 1,j,k) * um1 + (u(5
|
||||
&,i + 1,j,k) - s2 - u(5,i - 1,j,k) + s3) * c2)
|
||||
rhs_(3) = rhs_(3) + dx3tx1 * (u(3,i + 1,j,k) - 2.0d0 * u(
|
||||
&3,i,j,k) + u(3,i - 1,j,k)) + xxcon2 * (u(3,i + 1,j,k) /
|
||||
&u(1,i + 1,j,k) - 2.0d0 * vijk + u(3,i - 1,j,k)/u(1,i - 1,j,k))
|
||||
&- tx2 * (u(3,i + 1,j,k) * up1 - u(3,i - 1,j,k) * um1)
|
||||
rhs_(4) = rhs_(4) + dx4tx1 * (u(4,i + 1,j,k) - 2.0d0 * u(
|
||||
&4,i,j,k) + u(4,i - 1,j,k)) + xxcon2 * (u(4,i + 1,j,k) /
|
||||
&u(1,i + 1,j,k) - 2.0d0 * wijk + u(4,i - 1,j,k) / u(1,i - 1,j,k))
|
||||
&- tx2 * (u(4,i + 1,j,k) * up1 - u(4,i - 1,j,k) * um1)
|
||||
rhs_(5) = rhs_(5) + dx5tx1 * (u(5,i + 1,j,k) - 2.0d0 * u(
|
||||
&5,i,j,k) + u(5,i - 1,j,k)) + xxcon3 * (qs2 - 2.0d0 * qs1 +
|
||||
&qs3) + xxcon4 * (up1 * up1 - 2.0d0 * uijk * uij
|
||||
&k + um1 * um1) + xxcon5 * (u(5,i + 1,j,k) * 1.0d0 / u(1,i + 1,j,k)
|
||||
& - 2.0d0 * u(5,i,j,k) * 1.0d0 / u(1,i,j,k) + u(5,i - 1,j,k) *
|
||||
&1.0d0 / u(1,i - 1,j,k)) - tx2 * ((c1 * u(5,i + 1,j,k) -
|
||||
&c2 * s2) * up1 -
|
||||
&(c1 * u(5,i - 1,j,k) - c2 * s3) * um1)
|
||||
if (i .eq. 1) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (5.0d0 * u(m,i,j,k) - 4.
|
||||
&0d0 * u(m,i + 1,j,k) + u(m,i + 2,j,k))
|
||||
enddo
|
||||
else if (i .eq. 2) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * ((-(4.0d0)) * u(m,i - 1,
|
||||
&j,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i + 1,j,k) + u(m,i + 2,j,k
|
||||
&))
|
||||
enddo
|
||||
else if (i .ge. 3 .and. i .le. problem_size - 4) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (u(m,i - 2,j,k) - 4.0d0
|
||||
&* u(m,i - 1,j,k) + 6.0 * u(m,i,j,k) - 4.0d0 * u(m,i + 1,j,k) + u(m
|
||||
&,i + 2,j,k))
|
||||
enddo
|
||||
else if (i .eq. problem_size - 3) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (u(m,i - 2,j,k) - 4.0d0
|
||||
&* u(m,i - 1,j,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i + 1,j,k))
|
||||
enddo
|
||||
else if (i .eq. problem_size - 2) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (u(m,i - 2,j,k) - 4.d0 *
|
||||
& u(m,i - 1,j,k) + 5.d0 * u(m,i,j,k))
|
||||
enddo
|
||||
endif
|
||||
|
||||
rhs_(1) = rhs_(1) + dy1ty1 * (u(1,i,j + 1,k) - 2.0d0 * u(
|
||||
&1,i,j,k) + u(1,i,j - 1,k)) - ty2 * (u(3,i,j + 1,k) - u(3,i,j - 1,k
|
||||
&))
|
||||
rhs_(2) = rhs_(2) + dy2ty1 * (u(2,i,j + 1,k) - 2.0d0 * u(
|
||||
&2,i,j,k) + u(2,i,j - 1,k)) + yycon2 * (u(2,i,j + 1,k) /
|
||||
&u(1,i,j + 1,k) - 2.0d0 * uijk + u(2,i,j - 1,k)/u(1,i,j - 1,k))
|
||||
&- ty2 * (u(2,i,j + 1,k) * vp1 - u(2,i,j -1,k) * vm1)
|
||||
rhs_(3) = rhs_(3) + dy3ty1 * (u(3,i,j + 1,k) - 2.0d0 * u(
|
||||
&3,i,j,k) + u(3,i,j - 1,k)) + yycon2 * con43 * (vp1 - 2.0d0 * vijk
|
||||
&+ vm1) - ty2 * (u(3,i,j + 1,k) * vp1 - u(3,i,j - 1,k) * vm1 + (u(5
|
||||
&,i,j + 1,k) - s4 - u(5,i,j - 1,k) + s5) * c2)
|
||||
rhs_(4) = rhs_(4) + dy4ty1 * (u(4,i,j + 1,k) - 2.0d0 * u(
|
||||
&4,i,j,k) + u(4,i,j - 1,k)) + yycon2 * (u(4,i,j + 1,k) /
|
||||
&u(1,i,j + 1,k) - 2.0d0 * wijk + u(4,i,j - 1,k) / u(1,i,j - 1,k))
|
||||
&- ty2 * (u(4,i,j + 1,k) * vp1 - u(4,i,j - 1,k) * vm1)
|
||||
rhs_(5) = rhs_(5) + dy5ty1 * (u(5,i,j + 1,k) - 2.0d0 * u(
|
||||
&5,i,j,k) + u(5,i,j - 1,k)) + yycon3 * (qs4 - 2.0d0 * qs1
|
||||
& + qs5) + yycon4 * (vp1 * vp1 - 2.0d0 * vijk * vij
|
||||
&k + vm1 * vm1) + yycon5 * (u(5,i,j + 1,k) * 1.0d0 / u(1,i,j + 1,k)
|
||||
&- 2.0d0 * u(5,i,j,k) * 1.0d0 / u(1,i,j,k) + u(5,i,j - 1,k) *
|
||||
&1.0d0 / u(1,i,j - 1,k)
|
||||
&) - ty2 * ((c1 * u(5,i,j + 1,k) - c2 * s4) * vp1 -
|
||||
&(c1 * u(5,i,j - 1,k) - c2 * s5) * vm1)
|
||||
if (j .eq. 1) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (5.0d0 * u(m,i,j,k) - 4.
|
||||
&0d0 * u(m,i,j + 1,k) + u(m,i,j + 2,k))
|
||||
enddo
|
||||
else if (j .eq. 2) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * ((-(4.0d0)) * u(m,i,j -
|
||||
&1,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j + 1,k) + u(m,i,j + 2,k
|
||||
&))
|
||||
enddo
|
||||
else if (j .ge. 3 .and. j .le. problem_size - 4) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (u(m,i,j - 2,k) - 4.0d0
|
||||
&* u(m,i,j - 1,k) + 6.0 * u(m,i,j,k) - 4.0d0 * u(m,i,j + 1,k) + u(m
|
||||
&,i,j + 2,k))
|
||||
enddo
|
||||
else if (j .eq. problem_size - 3) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (u(m,i,j - 2,k) - 4.0d0
|
||||
&* u(m,i,j - 1,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j + 1,k))
|
||||
enddo
|
||||
else if (j .eq. problem_size - 2) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (u(m,i,j - 2,k) - 4.d0 *
|
||||
& u(m,i,j - 1,k) + 5.d0 * u(m,i,j,k))
|
||||
enddo
|
||||
endif
|
||||
|
||||
rhs_(1) = rhs_(1) + dz1tz1 * (u(1,i,j,k + 1) - 2.0d0 * u(
|
||||
&1,i,j,k) + u(1,i,j,k - 1)) - tz2 * (u(4,i,j,k + 1) - u(4,i,j,k - 1
|
||||
&))
|
||||
rhs_(2) = rhs_(2) + dz2tz1 * (u(2,i,j,k + 1) - 2.0d0 * u(
|
||||
&2,i,j,k) + u(2,i,j,k - 1)) + zzcon2 * (u(2,i,j,k + 1) /
|
||||
&u(1,i,j,k + 1) - 2.0d0 * uijk + u(2,i,j,k - 1) / u(1,i,j,k - 1))
|
||||
&- tz2 * (u(2,i,j,k + 1) * wp1 - u(2,i,j,k - 1) * wm1)
|
||||
rhs_(3) = rhs_(3) + dz3tz1 * (u(3,i,j,k + 1) - 2.0d0 * u(
|
||||
&3,i,j,k) + u(3,i,j,k - 1)) + zzcon2 * (u(3,i,j,k + 1) /
|
||||
&u(1,i,j,k + 1) - 2.0d0 * vijk + u(3,i,j,k - 1) / u(1,i,j,k - 1))
|
||||
&- tz2 * (u(3,i,j,k + 1) * wp1 - u(3,i,j,k - 1) * wm1)
|
||||
rhs_(4) = rhs_(4) + dz4tz1 * (u(4,i,j,k + 1) - 2.0d0 * u(
|
||||
&4,i,j,k) + u(4,i,j,k - 1)) + zzcon2 * con43 * (wp1 - 2.0d0 * wijk
|
||||
&+ wm1) - tz2 * (u(4,i,j,k + 1) * wp1 - u(4,i,j,k - 1) * wm1 + (u(5
|
||||
&,i,j,k + 1) - s6 - u(5,i,j,k - 1) + s7) * c2)
|
||||
rhs_(5) = rhs_(5) + dz5tz1 * (u(5,i,j,k + 1) - 2.0d0 * u(
|
||||
&5,i,j,k) + u(5,i,j,k - 1)) + zzcon3 * (qs6 - 2.0d0 * qs1 +
|
||||
&qs7) + zzcon4 * (wp1 * wp1 - 2.0d0 * wijk * wij
|
||||
&k + wm1 * wm1) + zzcon5 * (u(5,i,j,k + 1) * 1.0d0 / u(1,i,j,k+1)
|
||||
&- 2.0d0 * u(5,i,j,k) * 1.0d0 / u(1,i,j,k) + u(5,i,j,k - 1) *
|
||||
&1.0d0 / u(1,i,j,k-1)
|
||||
&) - tz2 * ((c1 * u(5,i,j,k + 1) - c2 * s6) * wp1 -
|
||||
&(c1 * u(5,i,j,k - 1) - c2 * s7) * wm1)
|
||||
if (k .eq. 1) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (5.0d0 * u(m,i,j,k) - 4.
|
||||
&0d0 * u(m,i,j,k + 1) + u(m,i,j,k + 2))
|
||||
enddo
|
||||
else if (k .eq. 2) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * ((-(4.0d0)) * u(m,i,j,k
|
||||
&- 1) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j,k + 1) + u(m,i,j,k + 2
|
||||
&))
|
||||
enddo
|
||||
else if (k .ge. 3 .and. k .le. problem_size - 4) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (u(m,i,j,k - 2) - 4.0d0
|
||||
&* u(m,i,j,k - 1) + 6.0 * u(m,i,j,k) - 4.0d0 * u(m,i,j,k + 1) + u(m
|
||||
&,i,j,k + 2))
|
||||
enddo
|
||||
else if (k .eq. problem_size - 3) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (u(m,i,j,k - 2) - 4.0d0
|
||||
&* u(m,i,j,k - 1) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j,k + 1))
|
||||
enddo
|
||||
else if (k .eq. problem_size - 2) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (u(m,i,j,k - 2) - 4.d0 *
|
||||
& u(m,i,j,k - 1) + 5.d0 * u(m,i,j,k))
|
||||
enddo
|
||||
endif
|
||||
rhs(1,i,j,k) = rhs_(1) * dt
|
||||
rhs(2,i,j,k) = rhs_(2) * dt
|
||||
rhs(3,i,j,k) = rhs_(3) * dt
|
||||
rhs(4,i,j,k) = rhs_(4) * dt
|
||||
rhs(5,i,j,k) = rhs_(5) * dt
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!DVM$ end region
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,219 @@
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
subroutine compute_rhs ()
|
||||
|
||||
include 'header3d.h'
|
||||
integer i,j,k,m
|
||||
double precision rho_inv,uijk,up1,um1,vijk,vp1,vm1,wijk,wp1,wm1,r
|
||||
&hs_(5)
|
||||
|
||||
!DVM$ region out(rho_i, us, vs, ws, qs, square)
|
||||
!DVM$ PARALLEL (k,j,i) ON us(i,j,k), SHADOW_COMPUTE,
|
||||
!DVM$& PRIVATE(rho_inv,m),cuda_block(128)
|
||||
!DVM$& ,SHADOW_RENEW(u(0:0,2:2,2:2,2:2))
|
||||
do k = 0,problem_size - 1
|
||||
do j = 0,problem_size - 1
|
||||
do i = 0,problem_size - 1
|
||||
rho_inv = 1.0d0 / u(1,i,j,k)
|
||||
rho_i(i,j,k) = rho_inv
|
||||
us(i,j,k) = u(2,i,j,k) * rho_inv
|
||||
vs(i,j,k) = u(3,i,j,k) * rho_inv
|
||||
ws(i,j,k) = u(4,i,j,k) * rho_inv
|
||||
square(i,j,k) = 0.5d0 * (u(2,i,j,k) * u(2,i,j,k) + u(3,i,
|
||||
&j,k) * u(3,i,j,k) + u(4,i,j,k) * u(4,i,j,k)) * rho_inv
|
||||
qs(i,j,k) = square(i,j,k) * rho_inv
|
||||
do m = 1,5
|
||||
rhs(m,i,j,k) = forcing(m,i,j,k)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! compute xi-direction fluxes
|
||||
!---------------------------------------------------------------------
|
||||
!DVM$ PARALLEL (k,j,i) ON rhs(*,i,j,k), PRIVATE(uijk,up1,um1,m,
|
||||
!DVM$&vijk,vp1,vm1,wijk,wp1,wm1,rhs_),cuda_block(32)
|
||||
do k = 1,problem_size - 2
|
||||
do j = 1,problem_size - 2
|
||||
do i = 1,problem_size - 2
|
||||
uijk = us(i,j,k)
|
||||
up1 = us(i + 1,j,k)
|
||||
um1 = us(i - 1,j,k)
|
||||
rhs_(1) = forcing(1,i,j,k)
|
||||
rhs_(2) = forcing(2,i,j,k)
|
||||
rhs_(3) = forcing(3,i,j,k)
|
||||
rhs_(4) = forcing(4,i,j,k)
|
||||
rhs_(5) = forcing(5,i,j,k)
|
||||
|
||||
rhs_(1) = rhs_(1) + dx1tx1 * (u(1,i + 1,j,k) - 2.0d0 * u(
|
||||
&1,i,j,k) + u(1,i - 1,j,k)) - tx2 * (u(2,i + 1,j,k) - u(2,i - 1,j,k
|
||||
&))
|
||||
rhs_(2) = rhs_(2) + dx2tx1 * (u(2,i + 1,j,k) - 2.0d0 * u(
|
||||
&2,i,j,k) + u(2,i - 1,j,k)) + xxcon2 * con43 * (up1 - 2.0d0 * uijk
|
||||
&+ um1) - tx2 * (u(2,i + 1,j,k) * up1 - u(2,i - 1,j,k) * um1 + (u(5
|
||||
&,i + 1,j,k) - square(i + 1,j,k) - u(5,i - 1,j,k) + square(i - 1,j,
|
||||
&k)) * c2)
|
||||
rhs_(3) = rhs_(3) + dx3tx1 * (u(3,i + 1,j,k) - 2.0d0 * u(
|
||||
&3,i,j,k) + u(3,i - 1,j,k)) + xxcon2 * (vs(i + 1,j,k) - 2.0d0 * vs(
|
||||
&i,j,k) + vs(i - 1,j,k)) - tx2 * (u(3,i + 1,j,k) * up1 - u(3,i - 1,
|
||||
&j,k) * um1)
|
||||
rhs_(4) = rhs_(4) + dx4tx1 * (u(4,i + 1,j,k) - 2.0d0 * u(
|
||||
&4,i,j,k) + u(4,i - 1,j,k)) + xxcon2 * (ws(i + 1,j,k) - 2.0d0 * ws(
|
||||
&i,j,k) + ws(i - 1,j,k)) - tx2 * (u(4,i + 1,j,k) * up1 - u(4,i - 1,
|
||||
&j,k) * um1)
|
||||
rhs_(5) = rhs_(5) + dx5tx1 * (u(5,i + 1,j,k) - 2.0d0 * u(
|
||||
&5,i,j,k) + u(5,i - 1,j,k)) + xxcon3 * (qs(i + 1,j,k) - 2.0d0 * qs(
|
||||
&i,j,k) + qs(i - 1,j,k)) + xxcon4 * (up1 * up1 - 2.0d0 * uijk * uij
|
||||
&k + um1 * um1) + xxcon5 * (u(5,i + 1,j,k) * rho_i(i + 1,j,k) - 2.0
|
||||
&d0 * u(5,i,j,k) * rho_i(i,j,k) + u(5,i - 1,j,k) * rho_i(i - 1,j,k)
|
||||
&) - tx2 * ((c1 * u(5,i + 1,j,k) - c2 * square(i + 1,j,k)) * up1 -
|
||||
&(c1 * u(5,i - 1,j,k) - c2 * square(i - 1,j,k)) * um1)
|
||||
if (i .eq. 1) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (5.0d0 * u(m,i,j,k) - 4.
|
||||
&0d0 * u(m,i + 1,j,k) + u(m,i + 2,j,k))
|
||||
enddo
|
||||
else if (i .eq. 2) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * ((-(4.0d0)) * u(m,i - 1,
|
||||
&j,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i + 1,j,k) + u(m,i + 2,j,k
|
||||
&))
|
||||
enddo
|
||||
else if (i .ge. 3 .and. i .le. problem_size - 4) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (u(m,i - 2,j,k) - 4.0d0
|
||||
&* u(m,i - 1,j,k) + 6.0 * u(m,i,j,k) - 4.0d0 * u(m,i + 1,j,k) + u(m
|
||||
&,i + 2,j,k))
|
||||
enddo
|
||||
else if (i .eq. problem_size - 3) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (u(m,i - 2,j,k) - 4.0d0
|
||||
&* u(m,i - 1,j,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i + 1,j,k))
|
||||
enddo
|
||||
else if (i .eq. problem_size - 2) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (u(m,i - 2,j,k) - 4.d0 *
|
||||
& u(m,i - 1,j,k) + 5.d0 * u(m,i,j,k))
|
||||
enddo
|
||||
endif
|
||||
vijk = vs(i,j,k)
|
||||
vp1 = vs(i,j + 1,k)
|
||||
vm1 = vs(i,j - 1,k)
|
||||
rhs_(1) = rhs_(1) + dy1ty1 * (u(1,i,j + 1,k) - 2.0d0 * u(
|
||||
&1,i,j,k) + u(1,i,j - 1,k)) - ty2 * (u(3,i,j + 1,k) - u(3,i,j - 1,k
|
||||
&))
|
||||
rhs_(2) = rhs_(2) + dy2ty1 * (u(2,i,j + 1,k) - 2.0d0 * u(
|
||||
&2,i,j,k) + u(2,i,j - 1,k)) + yycon2 * (us(i,j + 1,k) - 2.0d0 * us(
|
||||
&i,j,k) + us(i,j - 1,k)) - ty2 * (u(2,i,j + 1,k) * vp1 - u(2,i,j -
|
||||
&1,k) * vm1)
|
||||
rhs_(3) = rhs_(3) + dy3ty1 * (u(3,i,j + 1,k) - 2.0d0 * u(
|
||||
&3,i,j,k) + u(3,i,j - 1,k)) + yycon2 * con43 * (vp1 - 2.0d0 * vijk
|
||||
&+ vm1) - ty2 * (u(3,i,j + 1,k) * vp1 - u(3,i,j - 1,k) * vm1 + (u(5
|
||||
&,i,j + 1,k) - square(i,j + 1,k) - u(5,i,j - 1,k) + square(i,j - 1,
|
||||
&k)) * c2)
|
||||
rhs_(4) = rhs_(4) + dy4ty1 * (u(4,i,j + 1,k) - 2.0d0 * u(
|
||||
&4,i,j,k) + u(4,i,j - 1,k)) + yycon2 * (ws(i,j + 1,k) - 2.0d0 * ws(
|
||||
&i,j,k) + ws(i,j - 1,k)) - ty2 * (u(4,i,j + 1,k) * vp1 - u(4,i,j -
|
||||
&1,k) * vm1)
|
||||
rhs_(5) = rhs_(5) + dy5ty1 * (u(5,i,j + 1,k) - 2.0d0 * u(
|
||||
&5,i,j,k) + u(5,i,j - 1,k)) + yycon3 * (qs(i,j + 1,k) - 2.0d0 * qs(
|
||||
&i,j,k) + qs(i,j - 1,k)) + yycon4 * (vp1 * vp1 - 2.0d0 * vijk * vij
|
||||
&k + vm1 * vm1) + yycon5 * (u(5,i,j + 1,k) * rho_i(i,j + 1,k) - 2.0
|
||||
&d0 * u(5,i,j,k) * rho_i(i,j,k) + u(5,i,j - 1,k) * rho_i(i,j - 1,k)
|
||||
&) - ty2 * ((c1 * u(5,i,j + 1,k) - c2 * square(i,j + 1,k)) * vp1 -
|
||||
&(c1 * u(5,i,j - 1,k) - c2 * square(i,j - 1,k)) * vm1)
|
||||
if (j .eq. 1) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (5.0d0 * u(m,i,j,k) - 4.
|
||||
&0d0 * u(m,i,j + 1,k) + u(m,i,j + 2,k))
|
||||
enddo
|
||||
else if (j .eq. 2) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * ((-(4.0d0)) * u(m,i,j -
|
||||
&1,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j + 1,k) + u(m,i,j + 2,k
|
||||
&))
|
||||
enddo
|
||||
else if (j .ge. 3 .and. j .le. problem_size - 4) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (u(m,i,j - 2,k) - 4.0d0
|
||||
&* u(m,i,j - 1,k) + 6.0 * u(m,i,j,k) - 4.0d0 * u(m,i,j + 1,k) + u(m
|
||||
&,i,j + 2,k))
|
||||
enddo
|
||||
else if (j .eq. problem_size - 3) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (u(m,i,j - 2,k) - 4.0d0
|
||||
&* u(m,i,j - 1,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j + 1,k))
|
||||
enddo
|
||||
else if (j .eq. problem_size - 2) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (u(m,i,j - 2,k) - 4.d0 *
|
||||
& u(m,i,j - 1,k) + 5.d0 * u(m,i,j,k))
|
||||
enddo
|
||||
endif
|
||||
wijk = ws(i,j,k)
|
||||
wp1 = ws(i,j,k + 1)
|
||||
wm1 = ws(i,j,k - 1)
|
||||
rhs_(1) = rhs_(1) + dz1tz1 * (u(1,i,j,k + 1) - 2.0d0 * u(
|
||||
&1,i,j,k) + u(1,i,j,k - 1)) - tz2 * (u(4,i,j,k + 1) - u(4,i,j,k - 1
|
||||
&))
|
||||
rhs_(2) = rhs_(2) + dz2tz1 * (u(2,i,j,k + 1) - 2.0d0 * u(
|
||||
&2,i,j,k) + u(2,i,j,k - 1)) + zzcon2 * (us(i,j,k + 1) - 2.0d0 * us(
|
||||
&i,j,k) + us(i,j,k - 1)) - tz2 * (u(2,i,j,k + 1) * wp1 - u(2,i,j,k
|
||||
&- 1) * wm1)
|
||||
rhs_(3) = rhs_(3) + dz3tz1 * (u(3,i,j,k + 1) - 2.0d0 * u(
|
||||
&3,i,j,k) + u(3,i,j,k - 1)) + zzcon2 * (vs(i,j,k + 1) - 2.0d0 * vs(
|
||||
&i,j,k) + vs(i,j,k - 1)) - tz2 * (u(3,i,j,k + 1) * wp1 - u(3,i,j,k
|
||||
&- 1) * wm1)
|
||||
rhs_(4) = rhs_(4) + dz4tz1 * (u(4,i,j,k + 1) - 2.0d0 * u(
|
||||
&4,i,j,k) + u(4,i,j,k - 1)) + zzcon2 * con43 * (wp1 - 2.0d0 * wijk
|
||||
&+ wm1) - tz2 * (u(4,i,j,k + 1) * wp1 - u(4,i,j,k - 1) * wm1 + (u(5
|
||||
&,i,j,k + 1) - square(i,j,k + 1) - u(5,i,j,k - 1) + square(i,j,k -
|
||||
&1)) * c2)
|
||||
rhs_(5) = rhs_(5) + dz5tz1 * (u(5,i,j,k + 1) - 2.0d0 * u(
|
||||
&5,i,j,k) + u(5,i,j,k - 1)) + zzcon3 * (qs(i,j,k + 1) - 2.0d0 * qs(
|
||||
&i,j,k) + qs(i,j,k - 1)) + zzcon4 * (wp1 * wp1 - 2.0d0 * wijk * wij
|
||||
&k + wm1 * wm1) + zzcon5 * (u(5,i,j,k + 1) * rho_i(i,j,k + 1) - 2.0
|
||||
&d0 * u(5,i,j,k) * rho_i(i,j,k) + u(5,i,j,k - 1) * rho_i(i,j,k - 1)
|
||||
&) - tz2 * ((c1 * u(5,i,j,k + 1) - c2 * square(i,j,k + 1)) * wp1 -
|
||||
&(c1 * u(5,i,j,k - 1) - c2 * square(i,j,k - 1)) * wm1)
|
||||
if (k .eq. 1) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (5.0d0 * u(m,i,j,k) - 4.
|
||||
&0d0 * u(m,i,j,k + 1) + u(m,i,j,k + 2))
|
||||
enddo
|
||||
else if (k .eq. 2) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * ((-(4.0d0)) * u(m,i,j,k
|
||||
&- 1) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j,k + 1) + u(m,i,j,k + 2
|
||||
&))
|
||||
enddo
|
||||
else if (k .ge. 3 .and. k .le. problem_size - 4) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (u(m,i,j,k - 2) - 4.0d0
|
||||
&* u(m,i,j,k - 1) + 6.0 * u(m,i,j,k) - 4.0d0 * u(m,i,j,k + 1) + u(m
|
||||
&,i,j,k + 2))
|
||||
enddo
|
||||
else if (k .eq. problem_size - 3) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (u(m,i,j,k - 2) - 4.0d0
|
||||
&* u(m,i,j,k - 1) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j,k + 1))
|
||||
enddo
|
||||
else if (k .eq. problem_size - 2) then
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - dssp * (u(m,i,j,k - 2) - 4.d0 *
|
||||
& u(m,i,j,k - 1) + 5.d0 * u(m,i,j,k))
|
||||
enddo
|
||||
endif
|
||||
rhs(1,i,j,k) = rhs_(1) * dt
|
||||
rhs(2,i,j,k) = rhs_(2) * dt
|
||||
rhs(3,i,j,k) = rhs_(3) * dt
|
||||
rhs(4,i,j,k) = rhs_(4) * dt
|
||||
rhs(5,i,j,k) = rhs_(5) * dt
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!DVM$ end region
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,307 @@
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
subroutine exact_rhs
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
|
||||
include 'header3d.h'
|
||||
|
||||
double precision dtemp(5), xi, eta, zeta, dtpp
|
||||
integer m, i, j, k, ip1, im1, jp1, p, p1,
|
||||
> jm1, km1, kp1,z
|
||||
double precision ue_(-2:2, 5), buf_(-2:2, 5),cuf_(-2:2),q_(-2:2)
|
||||
|
||||
|
||||
!DVM$ region
|
||||
!DVM$ parallel (k,j,i) on forcing(*,i,j,k),private(m)
|
||||
do k= 0, problem_size-1
|
||||
do j = 0, problem_size-1
|
||||
do i = 0, problem_size-1
|
||||
do m = 1, 5
|
||||
forcing(m,i,j,k) = 0.0d0
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c xi-direction flux differences
|
||||
c---------------------------------------------------------------------
|
||||
!DVM$ parallel (k,j,i) on forcing(*,i,j,k),private(zeta,eta,xi,m,dtemp
|
||||
!DVM$& ,buf_,cuf_,q_,dtpp,z,ue_)
|
||||
do k = 1, problem_size-2
|
||||
do j = 1, problem_size-2
|
||||
do i = 1, problem_size-2
|
||||
zeta = dble(k) * dnzm1
|
||||
eta = dble(j) * dnym1
|
||||
do z = -2, 2
|
||||
xi = dble(i + z) * dnxm1
|
||||
do m = 1, 5
|
||||
dtemp(m) = ce(m,1) +
|
||||
> xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) + xi*ce(m,11)))) +
|
||||
> eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9) + eta*ce(m,12))))+
|
||||
> zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) +
|
||||
> zeta*ce(m,13))))
|
||||
end do
|
||||
do m = 1, 5
|
||||
ue_(z,m) = dtemp(m)
|
||||
end do
|
||||
dtpp = 1.0d0 / dtemp(1)
|
||||
do m = 2, 5
|
||||
buf_(z, m) = dtpp * dtemp(m)
|
||||
end do
|
||||
|
||||
cuf_(z) = buf_(z,2) * buf_(z,2)
|
||||
buf_(z,1) = cuf_(z) + buf_(z,3) * buf_(z,3) +
|
||||
> buf_(z,4) * buf_(z,4)
|
||||
q_(z) = 0.5d0*(buf_(z,2)*ue_(z,2) + buf_(z,3)*
|
||||
> ue_(z,3) + buf_(z,4)*ue_(z,4))
|
||||
enddo
|
||||
|
||||
forcing(1,i,j,k) = forcing(1,i,j,k) -
|
||||
> tx2*( ue_(1,2)-ue_(-1,2) )+
|
||||
> dx1tx1*(ue_(1,1)-2.0d0*ue_(0,1)+ue_(-1,1))
|
||||
|
||||
forcing(2,i,j,k) = forcing(2,i,j,k) - tx2 * (
|
||||
> (ue_(1,2)*buf_(1,2)+c2*(ue_(1,5)-q_(1)))-
|
||||
> (ue_(-1,2)*buf_(-1,2)+c2*(ue_(-1,5)-q_(-1))))+
|
||||
> xxcon1*(buf_(1,2)-2.0d0*buf_(0,2)+buf_(-1,2))+
|
||||
> dx2tx1*( ue_(1,2)-2.0d0* ue_(0,2)+ue_(-1,2))
|
||||
|
||||
forcing(3,i,j,k) = forcing(3,i,j,k) - tx2 * (
|
||||
> ue_(1,3)*buf_(1,2)-ue_(-1,3)*buf_(-1,2))+
|
||||
> xxcon2*(buf_(1,3)-2.0d0*buf_(0,3)+buf_(-1,3))+
|
||||
> dx3tx1*( ue_(1,3)-2.0d0*ue_(0,3) +ue_(-1,3))
|
||||
|
||||
forcing(4,i,j,k) = forcing(4,i,j,k) - tx2*(
|
||||
> ue_(1,4)*buf_(1,2)-ue_(-1,4)*buf_(-1,2))+
|
||||
> xxcon2*(buf_(1,4)-2.0d0*buf_(0,4)+buf_(-1,4))+
|
||||
> dx4tx1*( ue_(1,4)-2.0d0* ue_(0,4)+ ue_(-1,4))
|
||||
|
||||
forcing(5,i,j,k) = forcing(5,i,j,k) - tx2*(
|
||||
> buf_(1,2)*(c1*ue_(1,5)-c2*q_(1))-
|
||||
> buf_(-1,2)*(c1*ue_(-1,5)-c2*q_(-1)))+
|
||||
> 0.5d0*xxcon3*(buf_(1,1)-2.0d0*buf_(0,1)+
|
||||
> buf_(-1,1))+
|
||||
> xxcon4*(cuf_(1)-2.0d0*cuf_(0)+cuf_(-1))+
|
||||
> xxcon5*(buf_(1,5)-2.0d0*buf_(0,5)+buf_(-1,5))+
|
||||
> dx5tx1*( ue_(1,5)-2.0d0* ue_(0,5)+ ue_(-1,5))
|
||||
do m = 1, 5
|
||||
if(i .eq. 1) then
|
||||
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
|
||||
> (5.0d0*ue_(0,m) - 4.0d0*ue_(1,m) +ue_(2,m))
|
||||
else if(i .eq. 2) then
|
||||
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
|
||||
> (-4.0d0*ue_(-1,m) + 6.0d0*ue_(0,m) -
|
||||
> 4.0d0*ue_(1,m) + ue_(2,m))
|
||||
else if(i .eq. problem_size-3) then
|
||||
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
|
||||
> (ue_(-2,m) - 4.0d0*ue_(-1,m) +
|
||||
> 6.0d0*ue_(0,m) - 4.0d0*ue_(1,m))
|
||||
else if(i .eq. problem_size-2) then
|
||||
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
|
||||
> (ue_(-2,m) - 4.0d0*ue_(-1,m) + 5.0d0*ue_(0,m))
|
||||
else
|
||||
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp*
|
||||
> (ue_(-2,m) - 4.0d0*ue_(-1,m) +
|
||||
> 6.0d0*ue_(0,m) - 4.0d0*ue_(1,m) + ue_(2,m))
|
||||
endif
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c eta-direction flux differences
|
||||
c---------------------------------------------------------------------
|
||||
!DVM$ parallel (k,j,i) on forcing(*,i,j,k),private(zeta,eta,xi,m,dtemp
|
||||
!DVM$& ,buf_,cuf_,q_,dtpp,z,ue_)
|
||||
do k = 1, problem_size- 2
|
||||
do j = 1, problem_size-2
|
||||
do i = 1, problem_size- 2
|
||||
zeta = dble(k) * dnzm1
|
||||
xi = dble(i) * dnxm1
|
||||
do z = -2, 2
|
||||
eta = dble(j + z) * dnym1
|
||||
do m = 1, 5
|
||||
dtemp(m) = ce(m,1) +
|
||||
> xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) + xi*ce(m,11)))) +
|
||||
> eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9) + eta*ce(m,12))))+
|
||||
> zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) +
|
||||
> zeta*ce(m,13))))
|
||||
end do
|
||||
do m = 1, 5
|
||||
ue_(z,m) = dtemp(m)
|
||||
end do
|
||||
dtpp = 1.0d0 / dtemp(1)
|
||||
do m = 2, 5
|
||||
buf_(z, m) = dtpp * dtemp(m)
|
||||
end do
|
||||
|
||||
cuf_(z) = buf_(z,3) * buf_(z,3)
|
||||
buf_(z,1) = cuf_(z) + buf_(z,2) * buf_(z,2) +
|
||||
> buf_(z,4) * buf_(z,4)
|
||||
q_(z) = 0.5d0*(buf_(z,2)*ue_(z,2) + buf_(z,3)
|
||||
> *ue_(z,3) + buf_(z,4) * ue_(z,4))
|
||||
enddo
|
||||
|
||||
forcing(1,i,j,k) = forcing(1,i,j,k) -
|
||||
> ty2*( ue_(1,3)-ue_(-1,3) )+
|
||||
> dy1ty1*(ue_(1,1)-2.0d0*ue_(0,1)+ue_(-1,1))
|
||||
|
||||
forcing(2,i,j,k) = forcing(2,i,j,k) - ty2*(
|
||||
> ue_(1,2)*buf_(1,3)-ue_(-1,2)*buf_(-1,3))+
|
||||
> yycon2*(buf_(1,2)-2.0d0*buf_(0,2)+buf_(-1,2))+
|
||||
> dy2ty1*( ue_(1,2)-2.0* ue_(0,2)+ ue_(-1,2))
|
||||
|
||||
forcing(3,i,j,k) = forcing(3,i,j,k) - ty2*(
|
||||
> (ue_(1,3)*buf_(1,3)+c2*(ue_(1,5)-q_(1)))-
|
||||
> (ue_(-1,3)*buf_(-1,3)+c2*(ue_(-1,5)-q_(-1))))+
|
||||
> yycon1*(buf_(1,3)-2.0d0*buf_(0,3)+buf_(-1,3))+
|
||||
> dy3ty1*( ue_(1,3)-2.0d0*ue_(0,3) +ue_(-1,3))
|
||||
|
||||
forcing(4,i,j,k) = forcing(4,i,j,k) - ty2*(
|
||||
> ue_(1,4)*buf_(1,3)-ue_(-1,4)*buf_(-1,3))+
|
||||
> yycon2*(buf_(1,4)-2.0d0*buf_(0,4)+buf_(-1,4))+
|
||||
> dy4ty1*( ue_(1,4)-2.0d0*ue_(0,4)+ ue_(-1,4))
|
||||
|
||||
forcing(5,i,j,k) = forcing(5,i,j,k) - ty2*(
|
||||
> buf_(1,3)*(c1*ue_(1,5)-c2*q_(1))-
|
||||
> buf_(-1,3)*(c1*ue_(-1,5)-c2*q_(-1)))+
|
||||
> 0.5d0*yycon3*(buf_(1,1)-2.0d0*buf_(0,1)+
|
||||
> buf_(-1,1))+
|
||||
> yycon4*(cuf_(1)-2.0d0*cuf_(0)+cuf_(-1))+
|
||||
> yycon5*(buf_(1,5)-2.0d0*buf_(0,5)+buf_(-1,5))+
|
||||
> dy5ty1*(ue_(1,5)-2.0d0*ue_(0,5)+ue_(-1,5))
|
||||
do m = 1, 5
|
||||
if(j .eq. 1) then
|
||||
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
|
||||
> (5.0d0*ue_(0,m) - 4.0d0*ue_(1,m) +ue_(2,m))
|
||||
else if(j .eq. 2) then
|
||||
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
|
||||
> (-4.0d0*ue_(-1,m) + 6.0d0*ue_(0,m) -
|
||||
> 4.0d0*ue_(1,m) + ue_(2,m))
|
||||
else if(j .eq. problem_size-3) then
|
||||
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
|
||||
> (ue_(-2,m) - 4.0d0*ue_(-1,m) +
|
||||
> 6.0d0*ue_(0,m) - 4.0d0*ue_(1,m))
|
||||
else if(j .eq. problem_size-2) then
|
||||
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
|
||||
> (ue_(-2,m) - 4.0d0*ue_(-1,m) + 5.0d0*ue_(0,m))
|
||||
else
|
||||
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp*
|
||||
> (ue_(-2,m) - 4.0d0*ue_(-1,m) +
|
||||
> 6.0d0*ue_(0,m) - 4.0d0*ue_(1,m) + ue_(2,m))
|
||||
endif
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c zeta-direction flux differences
|
||||
c---------------------------------------------------------------------
|
||||
!DVM$ parallel (k,j,i) on forcing(*,i,j,k),private(zeta,eta,xi,m
|
||||
!DVM$& ,buf_,cuf_,q_,ue_,dtpp,dtemp,z)
|
||||
do k = 1, problem_size-2
|
||||
do j = 1, problem_size-2
|
||||
do i = 1, problem_size-2
|
||||
xi = dble(i) * dnxm1
|
||||
eta = dble(j) * dnym1
|
||||
do z = -2, 2
|
||||
zeta = dble(k + z) * dnzm1
|
||||
do m = 1, 5
|
||||
dtemp(m) = ce(m,1) +
|
||||
> xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) + xi*ce(m,11)))) +
|
||||
> eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9) + eta*ce(m,12))))+
|
||||
> zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) +
|
||||
> zeta*ce(m,13))))
|
||||
end do
|
||||
do m = 1, 5
|
||||
ue_(z,m) = dtemp(m)
|
||||
end do
|
||||
dtpp = 1.0d0 / dtemp(1)
|
||||
do m = 2, 5
|
||||
buf_(z, m) = dtpp * dtemp(m)
|
||||
end do
|
||||
|
||||
cuf_(z) = buf_(z,4) * buf_(z,4)
|
||||
buf_(z,1) = cuf_(z) + buf_(z,2) * buf_(z,2) +
|
||||
> buf_(z,3) * buf_(z,3)
|
||||
q_(z) = 0.5d0*(buf_(z,2)*ue_(z,2) + buf_(z,3)*
|
||||
> ue_(z,3) + buf_(z,4)*ue_(z,4))
|
||||
enddo
|
||||
|
||||
forcing(1,i,j,k) = forcing(1,i,j,k) -
|
||||
> tz2*( ue_(1,4)-ue_(-1,4) )+
|
||||
> dz1tz1*(ue_(1,1)-2.0d0*ue_(0,1)+ue_(-1,1))
|
||||
|
||||
forcing(2,i,j,k) = forcing(2,i,j,k) - tz2 * (
|
||||
> ue_(1,2)*buf_(1,4)-ue_(-1,2)*buf_(-1,4))+
|
||||
> zzcon2*(buf_(1,2)-2.0d0*buf_(0,2)+buf_(-1,2))+
|
||||
> dz2tz1*( ue_(1,2)-2.0d0* ue_(0,2)+ ue_(-1,2))
|
||||
|
||||
forcing(3,i,j,k) = forcing(3,i,j,k) - tz2 * (
|
||||
> ue_(1,3)*buf_(1,4)-ue_(-1,3)*buf_(-1,4))+
|
||||
> zzcon2*(buf_(1,3)-2.0d0*buf_(0,3)+buf_(-1,3))+
|
||||
> dz3tz1*(ue_(1,3)-2.0d0*ue_(0,3)+ue_(-1,3))
|
||||
|
||||
forcing(4,i,j,k) = forcing(4,i,j,k) - tz2 * (
|
||||
> (ue_(1,4)*buf_(1,4)+c2*(ue_(1,5)-q_(1)))-
|
||||
> (ue_(-1,4)*buf_(-1,4)+c2*(ue_(-1,5)-q_(-1))))+
|
||||
> zzcon1*(buf_(1,4)-2.0d0*buf_(0,4)+buf_(-1,4))+
|
||||
> dz4tz1*( ue_(1,4)-2.0d0*ue_(0,4) +ue_(-1,4))
|
||||
|
||||
forcing(5,i,j,k) = forcing(5,i,j,k) - tz2 * (
|
||||
> buf_(1,4)*(c1*ue_(1,5)-c2*q_(1))-
|
||||
> buf_(-1,4)*(c1*ue_(-1,5)-c2*q_(-1)))+
|
||||
> 0.5d0*zzcon3*(buf_(1,1)-2.0d0*buf_(0,1)
|
||||
> +buf_(-1,1))+
|
||||
> zzcon4*(cuf_(1)-2.0d0*cuf_(0)+cuf_(-1))+
|
||||
> zzcon5*(buf_(1,5)-2.0d0*buf_(0,5)+buf_(-1,5))+
|
||||
> dz5tz1*( ue_(1,5)-2.0d0*ue_(0,5)+ ue_(-1,5))
|
||||
do m = 1, 5
|
||||
if(k .eq. 1) then
|
||||
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
|
||||
> (5.0d0*ue_(0,m) - 4.0d0*ue_(1,m) +ue_(2,m))
|
||||
else if(k .eq. 2) then
|
||||
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
|
||||
> (-4.0d0*ue_(-1,m) + 6.0d0*ue_(0,m) -
|
||||
> 4.0d0*ue_(1,m) + ue_(2,m))
|
||||
else if(k .eq. problem_size-3) then
|
||||
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
|
||||
> (ue_(-2,m) - 4.0d0*ue_(-1,m) +
|
||||
> 6.0d0*ue_(0,m) - 4.0d0*ue_(1,m))
|
||||
else if(k .eq. problem_size-2) then
|
||||
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
|
||||
> (ue_(-2,m) - 4.0d0*ue_(-1,m) + 5.0d0*ue_(0,m))
|
||||
else
|
||||
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp*
|
||||
> (ue_(-2,m) - 4.0d0*ue_(-1,m) +
|
||||
> 6.0d0*ue_(0,m) - 4.0d0*ue_(1,m) + ue_(2,m))
|
||||
endif
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c now change the sign of the forcing function,
|
||||
c---------------------------------------------------------------------
|
||||
!DVM$ parallel (k,j,i) on forcing(*,i,j,k),private(m)
|
||||
do k = 1, problem_size-2
|
||||
do j = 1, problem_size-2
|
||||
do i = 1, problem_size-2
|
||||
do m = 1, 5
|
||||
forcing(m,i,j,k) = -1.d0 * forcing(m,i,j,k)
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
!DVM$ end region
|
||||
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,4 @@
|
||||
subroutine exact_rhs
|
||||
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,18 @@
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! this function returns the exact solution at point xi, eta, zeta
|
||||
!---------------------------------------------------------------------
|
||||
subroutine exact_solution (xi, eta, zeta, dtemp)
|
||||
|
||||
include 'header3d.h'
|
||||
double precision xi,eta,zeta,dtemp(5)
|
||||
integer m
|
||||
do m = 1,5
|
||||
dtemp(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) + xi * (ce(m
|
||||
&,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m,6) + eta * (
|
||||
&ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta * (ce(m,7) +
|
||||
&zeta * (ce(m,10) + zeta * ce(m,13))))
|
||||
enddo
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,106 @@
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!
|
||||
! header.h
|
||||
!
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
|
||||
implicit none
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! The following include file is generated automatically by the
|
||||
! "setparams" utility. It defines
|
||||
! maxcells: the square root of the maximum number of processors
|
||||
! problem_size: 12, 64, 102, 162 (for class T, A, B, C)
|
||||
! dt_default: default time step for this problem size if no
|
||||
! config file
|
||||
! niter_default: default number of iterations for this problem size
|
||||
!---------------------------------------------------------------------
|
||||
|
||||
include 'npbparams.h'
|
||||
|
||||
integer aa, bb, cc, BLOCK_SIZE
|
||||
parameter (aa=1, bb=2, cc=3, BLOCK_SIZE=5)
|
||||
|
||||
integer grid_points(3)
|
||||
double precision elapsed_time
|
||||
common /global/ elapsed_time, grid_points
|
||||
|
||||
double precision tx1, tx2, tx3, ty1, ty2, ty3, tz1, tz2, tz3
|
||||
double precision dx1, dx2, dx3, dx4, dx5, dy1, dy2, dy3, dy4
|
||||
double precision dy5, dz1, dz2, dz3, dz4, dz5, dssp, dt
|
||||
double precision ce(5,13), dxmax, dymax, dzmax, xxcon1, xxcon2
|
||||
double precision xxcon3, xxcon4, xxcon5, dx1tx1, dx2tx1, dx3tx1
|
||||
double precision dx4tx1, dx5tx1, yycon1, yycon2, yycon3, yycon4
|
||||
double precision yycon5, dy1ty1, dy2ty1, dy3ty1, dy4ty1, dy5ty1
|
||||
double precision zzcon1, zzcon2, zzcon3, zzcon4, zzcon5, dz1tz1
|
||||
double precision dz2tz1, dz3tz1, dz4tz1, dz5tz1, dnxm1, dnym1
|
||||
double precision dnzm1, c1c2, c1c5, c3c4, c1345, conz1, c1, c2
|
||||
double precision c3, c4, c5, c4dssp, c5dssp, dtdssp, dttx1
|
||||
double precision dttx2, dtty1, dtty2, dttz1, dttz2, c2dttx1
|
||||
double precision c2dtty1, c2dttz1, comz1, comz4, comz5, comz6
|
||||
double precision c3c4tx3, c3c4ty3, c3c4tz3, c2iv, con43, con16
|
||||
integer stage_n, BL, R
|
||||
|
||||
common /constants/ tx1, tx2, tx3, ty1, ty2, ty3, tz1, tz2, tz3
|
||||
common /constants/ dx1, dx2, dx3, dx4, dx5, dy1, dy2, dy3, dy4
|
||||
common /constants/ dy5, dz1, dz2, dz3, dz4, dz5, dssp, dt
|
||||
common /constants/ ce, dxmax, dymax, dzmax, xxcon1, xxcon2
|
||||
common /constants/ xxcon3, xxcon4, xxcon5, dx1tx1, dx2tx1, dx3tx1
|
||||
common /constants/ dx4tx1, dx5tx1, yycon1, yycon2, yycon3, yycon4
|
||||
common /constants/ yycon5, dy1ty1, dy2ty1, dy3ty1, dy4ty1, dy5ty1
|
||||
common /constants/ zzcon1, zzcon2, zzcon3, zzcon4, zzcon5, dz1tz1
|
||||
common /constants/ dz2tz1, dz3tz1, dz4tz1, dz5tz1, dnxm1, dnym1
|
||||
common /constants/ dnzm1, c1c2, c1c5, c3c4, c1345, conz1, c1, c2
|
||||
common /constants/ c3, c4, c5, c4dssp, c5dssp, dtdssp, dttx1
|
||||
common /constants/ dttx2, dtty1, dtty2, dttz1, dttz2, c2dttx1
|
||||
common /constants/ c2dtty1, c2dttz1, comz1, comz4, comz5, comz6
|
||||
common /constants/ c3c4tx3, c3c4ty3, c3c4tz3, c2iv, con43, con16
|
||||
common /constants/ stage_n
|
||||
|
||||
integer IMAX, JMAX, KMAX
|
||||
|
||||
parameter (IMAX=problem_size,JMAX=problem_size,KMAX=problem_size)
|
||||
parameter (BL=1, R=0)
|
||||
!
|
||||
! to improve cache performance, grid dimensions padded by 1
|
||||
! for even number sizes only.
|
||||
!
|
||||
double precision us(0:IMAX/2*2, 0:JMAX/2*2, 0:KMAX/2*2)
|
||||
double precision vs(0:IMAX/2*2, 0:JMAX/2*2, 0:KMAX/2*2)
|
||||
double precision ws(0:IMAX/2*2, 0:JMAX/2*2, 0:KMAX/2*2)
|
||||
double precision qs(0:IMAX/2*2, 0:JMAX/2*2, 0:KMAX/2*2)
|
||||
double precision rho_i(0:IMAX/2*2, 0:JMAX/2*2, 0:KMAX/2*2)
|
||||
double precision square(0:IMAX/2*2, 0:JMAX/2*2, 0:KMAX/2*2)
|
||||
double precision forcing (5,0:IMAX/2*2,0:JMAX/2*2, 0:KMAX/2*2)
|
||||
double precision u(5,0:(IMAX+1)/2*2,0:(JMAX+1)/2*2,0:(KMAX+1)/2*2)
|
||||
double precision rhs(5,0:IMAX/2*2, 0:JMAX/2*2, 0:KMAX/2*2)
|
||||
double precision lhs__(5,5,0:IMAX/2*2,0:JMAX/2*2,0:KMAX/2*2/BL+R)
|
||||
common /fields/ u, us, vs, ws, qs, rho_i, square
|
||||
common /fields/ rhs, forcing, lhs__
|
||||
|
||||
double precision cv(-2:problem_size+1)
|
||||
double precision cuf(-2:problem_size+1), q(-2:problem_size+1)
|
||||
double precision ue(-2:problem_size+1,5), buf(-2:problem_size+1,5)
|
||||
common /work_1d/ cv, cuf, q, ue, buf
|
||||
|
||||
double precision tmp1, tmp2, tmp3, tmp11, tmp22
|
||||
double precision t1, t2, t3, tm1, tm2, tm3
|
||||
|
||||
common /work_lhs/ tmp1, tmp2, tmp3, tmp11, tmp22
|
||||
common /work_lhs/ t1, t2, t3, tm1, tm2, tm3
|
||||
double precision tmp_block(5,5), b_inverse(5,5), tmp_vec(5)
|
||||
common /work_solve/ tmp_block, b_inverse, tmp_vec
|
||||
!--------------------------------------------------------------------
|
||||
! FDVM Specifications
|
||||
!--------------------------------------------------------------------
|
||||
|
||||
!DVM$ DISTRIBUTE us (BLOCK,BLOCK,BLOCK)
|
||||
!DVM$ ALIGN (i,j,k) WITH us(i,j,k) :: vs, ws, qs, rho_i, square
|
||||
!DVM$ ALIGN (*,*,i,j,k) WITH us(i,j,k) :: lhs__
|
||||
!DVM$ ALIGN (*,i,j,k) WITH us(i,j,k) :: u, rhs
|
||||
!DVM$ ALIGN (*,i,j,k) WITH us(i,j,k) :: forcing
|
||||
|
||||
!DVM$ SHADOW u(2:2,2:2,2:2,2:2)
|
||||
|
||||
@@ -0,0 +1,181 @@
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! This subroutine initializes the field variable u using
|
||||
! tri-linear transfinite interpolation of the boundary values
|
||||
!---------------------------------------------------------------------
|
||||
subroutine initialize ()
|
||||
|
||||
include 'header3d.h'
|
||||
integer i,j,k,m,ix,iy,iz
|
||||
double precision xi,eta,zeta,pface(5,3,2),pxi,peta,pzeta,temp(5),
|
||||
&xi1,yi1,zi1
|
||||
xi = 0.0
|
||||
eta = 0.0
|
||||
zeta = 0.0
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Later (in compute_rhs) we compute 1/u for every element. A few of
|
||||
! the corner elements are not used, but it convenient (and faster)
|
||||
! to compute the whole thing with a simple loop. Make sure those
|
||||
! values are nonzero by initializing the whole thing here.
|
||||
!---------------------------------------------------------------------
|
||||
!DVM$ region out(u)
|
||||
!DVM$ PARALLEL (k,j,i) ON u(*,i,j,k), SHADOW_COMPUTE, private(m)
|
||||
do k = 0,imax - 1
|
||||
do j = 0,imax - 1
|
||||
do i = 0,imax - 1
|
||||
do m = 1,5
|
||||
u(m,i,j,k) = 1.0
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!DVM$ PARALLEL (k,j,i) ON u(*,i,j,k), private(m,zeta,eta, xi,ix,
|
||||
!DVM$& iy,iz,Pxi,Peta,Pzeta,Pface,xi1,yi1,zi1,temp),SHADOW_COMPUTE
|
||||
do k = 0,grid_points(3) - 1
|
||||
do j = 0,grid_points(2) - 1
|
||||
do i = 0,grid_points(1) - 1
|
||||
zeta = dble (k) * dnzm1
|
||||
eta = dble (j) * dnym1
|
||||
xi = dble (i) * dnxm1
|
||||
do ix = 1,2
|
||||
|
||||
! call exact_solution(dble(ix-1), eta, zeta, Pface(1,1,ix))
|
||||
xi1 = dble (ix - 1)
|
||||
do m = 1,5
|
||||
pface(m,1,ix) = ce(m,1) + xi1 * (ce(m,2) + xi1 * (c
|
||||
&e(m,5) + xi1 * (ce(m,8) + xi1 * ce(m,11)))) + eta * (ce(m,3) + eta
|
||||
& * (ce(m,6) + eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4)
|
||||
& + zeta * (ce(m,7) + zeta * (ce(m,10) + zeta * ce(m,13))))
|
||||
enddo
|
||||
enddo
|
||||
do iy = 1,2
|
||||
|
||||
! call exact_solution(xi, dble(iy-1) , zeta, Pface(1,2,iy))
|
||||
yi1 = dble (iy - 1)
|
||||
do m = 1,5
|
||||
pface(m,2,iy) = ce(m,1) + xi * (ce(m,2) + xi * (ce(
|
||||
&m,5) + xi * (ce(m,8) + xi * ce(m,11)))) + yi1 * (ce(m,3) + yi1 * (
|
||||
&ce(m,6) + yi1 * (ce(m,9) + yi1 * ce(m,12)))) + zeta * (ce(m,4) + z
|
||||
&eta * (ce(m,7) + zeta * (ce(m,10) + zeta * ce(m,13))))
|
||||
enddo
|
||||
enddo
|
||||
do iz = 1,2
|
||||
|
||||
! call exact_solution(xi, eta, dble(iz-1), Pface(1,3,iz))
|
||||
zi1 = dble (iz - 1)
|
||||
do m = 1,5
|
||||
pface(m,3,iz) = ce(m,1) + xi * (ce(m,2) + xi * (ce(
|
||||
&m,5) + xi * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (
|
||||
&ce(m,6) + eta * (ce(m,9) + eta * ce(m,12)))) + zi1 * (ce(m,4) + zi
|
||||
&1 * (ce(m,7) + zi1 * (ce(m,10) + zi1 * ce(m,13))))
|
||||
enddo
|
||||
enddo
|
||||
do m = 1,5
|
||||
pxi = xi * pface(m,1,2) + (1.0d0 - xi) * pface(m,1,1)
|
||||
peta = eta * pface(m,2,2) + (1.0d0 - eta) * pface(m,2,
|
||||
&1)
|
||||
pzeta = zeta * pface(m,3,2) + (1.0d0 - zeta) * pface(m
|
||||
&,3,1)
|
||||
u(m,i,j,k) = pxi + peta + pzeta - pxi * peta - pxi * p
|
||||
&zeta - peta * pzeta + pxi * peta * pzeta
|
||||
enddo
|
||||
|
||||
if(i .eq. 0) then
|
||||
do m = 1,5
|
||||
temp(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) + xi
|
||||
& * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m,6) +
|
||||
& eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta * (ce
|
||||
&(m,7) + zeta * (ce(m,10) + zeta * ce(m,13))))
|
||||
enddo
|
||||
do m = 1,5
|
||||
u(m,i,j,k) = temp(m)
|
||||
enddo
|
||||
endif
|
||||
if(i .eq. grid_points(1) - 1) then
|
||||
xi = 1.0d0
|
||||
do m = 1,5
|
||||
temp(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) + xi
|
||||
& * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m,6) +
|
||||
& eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta * (ce
|
||||
&(m,7) + zeta * (ce(m,10) + zeta * ce(m,13))))
|
||||
enddo
|
||||
do m = 1,5
|
||||
u(m,i,j,k) = temp(m)
|
||||
enddo
|
||||
endif
|
||||
|
||||
if(j .eq. 0) then
|
||||
zeta = dble (k) * dnzm1
|
||||
xi = dble (i) * dnxm1
|
||||
eta = 0.0d0
|
||||
|
||||
do m = 1,5
|
||||
temp(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) + xi
|
||||
& * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m,6) +
|
||||
& eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta * (ce
|
||||
&(m,7) + zeta * (ce(m,10) + zeta * ce(m,13))))
|
||||
enddo
|
||||
do m = 1,5
|
||||
u(m,i,j,k) = temp(m)
|
||||
enddo
|
||||
endif
|
||||
|
||||
if(j .eq. grid_points(2) - 1) then
|
||||
zeta = dble (k) * dnzm1
|
||||
xi = dble (i) * dnxm1
|
||||
eta = 1.0d0
|
||||
! call exact_solution(xi, eta, zeta, temp)
|
||||
do m = 1,5
|
||||
temp(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) + xi
|
||||
& * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m,6) +
|
||||
& eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta * (ce
|
||||
&(m,7) + zeta * (ce(m,10) + zeta * ce(m,13))))
|
||||
enddo
|
||||
do m = 1,5
|
||||
u(m,i,j,k) = temp(m)
|
||||
enddo
|
||||
endif
|
||||
|
||||
if(k .eq. 0) then
|
||||
zeta = 0.0d0
|
||||
xi = dble (i) * dnxm1
|
||||
eta = dble (j) * dnym1
|
||||
|
||||
! call exact_solution(xi, eta, zeta, temp)
|
||||
do m = 1,5
|
||||
temp(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) + xi
|
||||
& * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m,6) +
|
||||
& eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta * (ce
|
||||
&(m,7) + zeta * (ce(m,10) + zeta * ce(m,13))))
|
||||
enddo
|
||||
do m = 1,5
|
||||
u(m,i,j,k) = temp(m)
|
||||
enddo
|
||||
endif
|
||||
|
||||
if(k .eq. grid_points(3) - 1) then
|
||||
zeta = 1.0d0
|
||||
xi = dble (i) * dnxm1
|
||||
eta = dble (j) * dnym1
|
||||
|
||||
! call exact_solution(xi, eta, zeta, temp)
|
||||
do m = 1,5
|
||||
temp(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) + xi
|
||||
& * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m,6) +
|
||||
& eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta * (ce
|
||||
&(m,7) + zeta * (ce(m,10) + zeta * ce(m,13))))
|
||||
enddo
|
||||
do m = 1,5
|
||||
u(m,i,j,k) = temp(m)
|
||||
enddo
|
||||
endif
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ end region
|
||||
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,58 @@
|
||||
|
||||
subroutine print_results (name, class, n1, n2, n3, niter, t, mops,
|
||||
& optype, verified, npbversion)
|
||||
|
||||
! , compiletime, cs1, cs2, cs3, cs4, cs5, cs6, cs7)
|
||||
implicit none
|
||||
character*2 name
|
||||
character*1 class
|
||||
integer n1,n2,n3,niter,j
|
||||
double precision t,mops
|
||||
character optype*24,size*13
|
||||
logical verified
|
||||
character*(*) npbversion
|
||||
|
||||
! , compiletime,cs1, cs2, cs3, cs4, cs5, cs6, cs7
|
||||
write (unit = *,fmt = 2) name
|
||||
2 format(//, ' ', A2, ' Benchmark Completed.')
|
||||
write (unit = *,fmt = 3) class
|
||||
3 format(' Class = ', 12x, a12)
|
||||
|
||||
! If this is not a grid-based problem (EP, FT, CG), then
|
||||
! we only print n1, which contains some measure of the
|
||||
! problem size. In that case, n2 and n3 are both zero.
|
||||
! Otherwise, we print the grid size n1xn2xn3
|
||||
if (n2 .eq. 0 .and. n3 .eq. 0) then
|
||||
if (name(1:2) .eq. 'EP') then
|
||||
write (unit = size,fmt = '(f12.0)') 2.d0** n1
|
||||
do j = 13,1,(-(1))
|
||||
if (size(j:j) .eq. '.') size(j:j) = ' '
|
||||
enddo
|
||||
write (unit = *,fmt = 42) size
|
||||
42 format(' Size = ',12x, a14)
|
||||
else
|
||||
write (unit = *,fmt = 44) n1
|
||||
44 format(' Size = ',12x, i12)
|
||||
endif
|
||||
else
|
||||
write (unit = *,fmt = 4) n1,n2,n3
|
||||
4 format(' Size = ',12x, i3,'x',i3,'x',i3)
|
||||
endif
|
||||
write (unit = *,fmt = 5) niter
|
||||
5 format(' Iterations = ', 12x, i12)
|
||||
write (unit = *,fmt = 6) t
|
||||
6 format(' Time in seconds = ',12x, f12.2)
|
||||
write (unit = *,fmt = 9) mops
|
||||
9 format(' Mop/s total = ',12x, f12.2)
|
||||
write (unit = *,fmt = 11) optype
|
||||
11 format(' Operation type = ', a24)
|
||||
if (verified) then
|
||||
write (unit = *,fmt = 12) ' SUCCESSFUL'
|
||||
else
|
||||
write (unit = *,fmt = 12) 'UNSUCCESSFUL'
|
||||
endif
|
||||
12 format(' Verification = ', 12x, a)
|
||||
write (unit = *,fmt = 13) npbversion
|
||||
13 format(' Version = ', 12x, a12)
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,165 @@
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
subroutine set_constants ()
|
||||
|
||||
include 'header3d.h'
|
||||
ce(1,1) = 2.0d0
|
||||
ce(1,2) = 0.0d0
|
||||
ce(1,3) = 0.0d0
|
||||
ce(1,4) = 4.0d0
|
||||
ce(1,5) = 5.0d0
|
||||
ce(1,6) = 3.0d0
|
||||
ce(1,7) = 0.5d0
|
||||
ce(1,8) = 0.02d0
|
||||
ce(1,9) = 0.01d0
|
||||
ce(1,10) = 0.03d0
|
||||
ce(1,11) = 0.5d0
|
||||
ce(1,12) = 0.4d0
|
||||
ce(1,13) = 0.3d0
|
||||
ce(2,1) = 1.0d0
|
||||
ce(2,2) = 0.0d0
|
||||
ce(2,3) = 0.0d0
|
||||
ce(2,4) = 0.0d0
|
||||
ce(2,5) = 1.0d0
|
||||
ce(2,6) = 2.0d0
|
||||
ce(2,7) = 3.0d0
|
||||
ce(2,8) = 0.01d0
|
||||
ce(2,9) = 0.03d0
|
||||
ce(2,10) = 0.02d0
|
||||
ce(2,11) = 0.4d0
|
||||
ce(2,12) = 0.3d0
|
||||
ce(2,13) = 0.5d0
|
||||
ce(3,1) = 2.0d0
|
||||
ce(3,2) = 2.0d0
|
||||
ce(3,3) = 0.0d0
|
||||
ce(3,4) = 0.0d0
|
||||
ce(3,5) = 0.0d0
|
||||
ce(3,6) = 2.0d0
|
||||
ce(3,7) = 3.0d0
|
||||
ce(3,8) = 0.04d0
|
||||
ce(3,9) = 0.03d0
|
||||
ce(3,10) = 0.05d0
|
||||
ce(3,11) = 0.3d0
|
||||
ce(3,12) = 0.5d0
|
||||
ce(3,13) = 0.4d0
|
||||
ce(4,1) = 2.0d0
|
||||
ce(4,2) = 2.0d0
|
||||
ce(4,3) = 0.0d0
|
||||
ce(4,4) = 0.0d0
|
||||
ce(4,5) = 0.0d0
|
||||
ce(4,6) = 2.0d0
|
||||
ce(4,7) = 3.0d0
|
||||
ce(4,8) = 0.03d0
|
||||
ce(4,9) = 0.05d0
|
||||
ce(4,10) = 0.04d0
|
||||
ce(4,11) = 0.2d0
|
||||
ce(4,12) = 0.1d0
|
||||
ce(4,13) = 0.3d0
|
||||
ce(5,1) = 5.0d0
|
||||
ce(5,2) = 4.0d0
|
||||
ce(5,3) = 3.0d0
|
||||
ce(5,4) = 2.0d0
|
||||
ce(5,5) = 0.1d0
|
||||
ce(5,6) = 0.4d0
|
||||
ce(5,7) = 0.3d0
|
||||
ce(5,8) = 0.05d0
|
||||
ce(5,9) = 0.04d0
|
||||
ce(5,10) = 0.03d0
|
||||
ce(5,11) = 0.1d0
|
||||
ce(5,12) = 0.3d0
|
||||
ce(5,13) = 0.2d0
|
||||
c1 = 1.4d0
|
||||
c2 = 0.4d0
|
||||
c3 = 0.1d0
|
||||
c4 = 1.0d0
|
||||
c5 = 1.4d0
|
||||
dnxm1 = 1.0d0 / dble (grid_points(1) - 1)
|
||||
dnym1 = 1.0d0 / dble (grid_points(2) - 1)
|
||||
dnzm1 = 1.0d0 / dble (grid_points(3) - 1)
|
||||
c1c2 = c1 * c2
|
||||
c1c5 = c1 * c5
|
||||
c3c4 = c3 * c4
|
||||
c1345 = c1c5 * c3c4
|
||||
conz1 = 1.0d0 - c1c5
|
||||
tx1 = 1.0d0 / (dnxm1 * dnxm1)
|
||||
tx2 = 1.0d0 / (2.0d0 * dnxm1)
|
||||
tx3 = 1.0d0 / dnxm1
|
||||
ty1 = 1.0d0 / (dnym1 * dnym1)
|
||||
ty2 = 1.0d0 / (2.0d0 * dnym1)
|
||||
ty3 = 1.0d0 / dnym1
|
||||
tz1 = 1.0d0 / (dnzm1 * dnzm1)
|
||||
tz2 = 1.0d0 / (2.0d0 * dnzm1)
|
||||
tz3 = 1.0d0 / dnzm1
|
||||
dx1 = 0.75d0
|
||||
dx2 = 0.75d0
|
||||
dx3 = 0.75d0
|
||||
dx4 = 0.75d0
|
||||
dx5 = 0.75d0
|
||||
dy1 = 0.75d0
|
||||
dy2 = 0.75d0
|
||||
dy3 = 0.75d0
|
||||
dy4 = 0.75d0
|
||||
dy5 = 0.75d0
|
||||
dz1 = 1.0d0
|
||||
dz2 = 1.0d0
|
||||
dz3 = 1.0d0
|
||||
dz4 = 1.0d0
|
||||
dz5 = 1.0d0
|
||||
dxmax = dmax1 (dx3,dx4)
|
||||
dymax = dmax1 (dy2,dy4)
|
||||
dzmax = dmax1 (dz2,dz3)
|
||||
dssp = 0.25d0 * dmax1 (dx1,dmax1 (dy1,dz1))
|
||||
c4dssp = 4.0d0 * dssp
|
||||
c5dssp = 5.0d0 * dssp
|
||||
dttx1 = dt * tx1
|
||||
dttx2 = dt * tx2
|
||||
dtty1 = dt * ty1
|
||||
dtty2 = dt * ty2
|
||||
dttz1 = dt * tz1
|
||||
dttz2 = dt * tz2
|
||||
c2dttx1 = 2.0d0 * dttx1
|
||||
c2dtty1 = 2.0d0 * dtty1
|
||||
c2dttz1 = 2.0d0 * dttz1
|
||||
dtdssp = dt * dssp
|
||||
comz1 = dtdssp
|
||||
comz4 = 4.0d0 * dtdssp
|
||||
comz5 = 5.0d0 * dtdssp
|
||||
comz6 = 6.0d0 * dtdssp
|
||||
c3c4tx3 = c3c4 * tx3
|
||||
c3c4ty3 = c3c4 * ty3
|
||||
c3c4tz3 = c3c4 * tz3
|
||||
dx1tx1 = dx1 * tx1
|
||||
dx2tx1 = dx2 * tx1
|
||||
dx3tx1 = dx3 * tx1
|
||||
dx4tx1 = dx4 * tx1
|
||||
dx5tx1 = dx5 * tx1
|
||||
dy1ty1 = dy1 * ty1
|
||||
dy2ty1 = dy2 * ty1
|
||||
dy3ty1 = dy3 * ty1
|
||||
dy4ty1 = dy4 * ty1
|
||||
dy5ty1 = dy5 * ty1
|
||||
dz1tz1 = dz1 * tz1
|
||||
dz2tz1 = dz2 * tz1
|
||||
dz3tz1 = dz3 * tz1
|
||||
dz4tz1 = dz4 * tz1
|
||||
dz5tz1 = dz5 * tz1
|
||||
c2iv = 2.5d0
|
||||
con43 = 4.0d0 / 3.0d0
|
||||
con16 = 1.0d0 / 6.0d0
|
||||
xxcon1 = c3c4tx3 * con43 * tx3
|
||||
xxcon2 = c3c4tx3 * tx3
|
||||
xxcon3 = c3c4tx3 * conz1 * tx3
|
||||
xxcon4 = c3c4tx3 * con16 * tx3
|
||||
xxcon5 = c3c4tx3 * c1c5 * tx3
|
||||
yycon1 = c3c4ty3 * con43 * ty3
|
||||
yycon2 = c3c4ty3 * ty3
|
||||
yycon3 = c3c4ty3 * conz1 * ty3
|
||||
yycon4 = c3c4ty3 * con16 * ty3
|
||||
yycon5 = c3c4ty3 * c1c5 * ty3
|
||||
zzcon1 = c3c4tz3 * con43 * tz3
|
||||
zzcon2 = c3c4tz3 * tz3
|
||||
zzcon3 = c3c4tz3 * conz1 * tz3
|
||||
zzcon4 = c3c4tz3 * con16 * tz3
|
||||
zzcon5 = c3c4tz3 * c1c5 * tz3
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,84 @@
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
subroutine timer_clear (n)
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
implicit none
|
||||
integer n
|
||||
double precision start(64),elapsed(64)
|
||||
common /tt/start,elapsed
|
||||
elapsed(n) = 0.0
|
||||
return
|
||||
end
|
||||
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
subroutine timer_start (n)
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
implicit none
|
||||
external elapsed_time
|
||||
double precision elapsed_time
|
||||
integer n
|
||||
double precision start(64),elapsed(64)
|
||||
common /tt/start,elapsed
|
||||
start(n) = elapsed_time ()
|
||||
return
|
||||
end
|
||||
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
subroutine timer_stop (n)
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
implicit none
|
||||
external elapsed_time
|
||||
double precision elapsed_time
|
||||
integer n
|
||||
double precision start(64),elapsed(64)
|
||||
common /tt/start,elapsed
|
||||
double precision t,now
|
||||
now = elapsed_time ()
|
||||
t = now - start(n)
|
||||
elapsed(n) = elapsed(n) + t
|
||||
return
|
||||
end
|
||||
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
double precision function timer_read (n)
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
implicit none
|
||||
integer n
|
||||
double precision start(64),elapsed(64)
|
||||
common /tt/start,elapsed
|
||||
timer_read = elapsed(n)
|
||||
return
|
||||
end
|
||||
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
double precision function elapsed_time ()
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
implicit none
|
||||
double precision t,dvtime
|
||||
integer dvm_debug
|
||||
|
||||
! dvm_debug = 0 - standard mode, dvm_debug > 0 - debugging mode
|
||||
parameter (dvm_debug = 0)
|
||||
data t/0.d0/
|
||||
t = dvtime ()
|
||||
elapsed_time = t
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,312 @@
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! verification routine
|
||||
!---------------------------------------------------------------------
|
||||
subroutine verify (no_time_steps, class, verified)
|
||||
|
||||
include 'header3d.h'
|
||||
double precision xcrref(5),xceref(5),xcrdif(5),xcedif(5),epsilon,
|
||||
&xce(5),xcr(5),dtref
|
||||
integer m,no_time_steps
|
||||
character class
|
||||
logical verified
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! tolerance level
|
||||
!---------------------------------------------------------------------
|
||||
epsilon = 1.0d-08
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! compute the error norm and the residual norm, and exit if not printing
|
||||
!---------------------------------------------------------------------
|
||||
call error_norm(xce)
|
||||
call compute_rhs()
|
||||
call rhs_norm(xcr)
|
||||
do m = 1,5
|
||||
xcr(m) = xcr(m) / dt
|
||||
enddo
|
||||
class = 'U'
|
||||
verified = .TRUE.
|
||||
do m = 1,5
|
||||
xcrref(m) = 1.0
|
||||
xceref(m) = 1.0
|
||||
enddo
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! reference data for 12X12X12 grids after 100 time steps, with DT = 1.0d-02
|
||||
!---------------------------------------------------------------------
|
||||
if (problem_size .eq. 12 .and. problem_size .eq. 12 .and. problem_
|
||||
&size .eq. 12 .and. no_time_steps .eq. 60) then
|
||||
class = 'S'
|
||||
dtref = 1.0d-2
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Reference values of RMS-norms of residual.
|
||||
!---------------------------------------------------------------------
|
||||
xcrref(1) = 1.7034283709541311d-01
|
||||
xcrref(2) = 1.2975252070034097d-02
|
||||
xcrref(3) = 3.2527926989486055d-02
|
||||
xcrref(4) = 2.6436421275166801d-02
|
||||
xcrref(5) = 1.9211784131744430d-01
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Reference values of RMS-norms of solution error.
|
||||
!---------------------------------------------------------------------
|
||||
xceref(1) = 4.9976913345811579d-04
|
||||
xceref(2) = 4.5195666782961927d-05
|
||||
xceref(3) = 7.3973765172921357d-05
|
||||
xceref(4) = 7.3821238632439731d-05
|
||||
xceref(5) = 8.9269630987491446d-04
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! reference data for 24X24X24 grids after 200 time steps, with DT = 0.8d-3
|
||||
!---------------------------------------------------------------------
|
||||
else if (problem_size .eq. 24 .and. problem_size .eq. 24 .and. pro
|
||||
&blem_size .eq. 24 .and. no_time_steps .eq. 200) then
|
||||
class = 'W'
|
||||
dtref = 0.8d-3
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Reference values of RMS-norms of residual.
|
||||
!---------------------------------------------------------------------
|
||||
xcrref(1) = 0.1125590409344d+03
|
||||
xcrref(2) = 0.1180007595731d+02
|
||||
xcrref(3) = 0.2710329767846d+02
|
||||
xcrref(4) = 0.2469174937669d+02
|
||||
xcrref(5) = 0.2638427874317d+03
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Reference values of RMS-norms of solution error.
|
||||
!---------------------------------------------------------------------
|
||||
xceref(1) = 0.4419655736008d+01
|
||||
xceref(2) = 0.4638531260002d+00
|
||||
xceref(3) = 0.1011551749967d+01
|
||||
xceref(4) = 0.9235878729944d+00
|
||||
xceref(5) = 0.1018045837718d+02
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! reference data for 64X64X64 grids after 200 time steps, with DT = 0.8d-3
|
||||
!---------------------------------------------------------------------
|
||||
else if (problem_size .eq. 64 .and. problem_size .eq. 64 .and. pro
|
||||
&blem_size .eq. 64 .and. no_time_steps .eq. 200) then
|
||||
class = 'A'
|
||||
dtref = 0.8d-3
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Reference values of RMS-norms of residual.
|
||||
!---------------------------------------------------------------------
|
||||
xcrref(1) = 1.0806346714637264d+02
|
||||
xcrref(2) = 1.1319730901220813d+01
|
||||
xcrref(3) = 2.5974354511582465d+01
|
||||
xcrref(4) = 2.3665622544678910d+01
|
||||
xcrref(5) = 2.5278963211748344d+02
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Reference values of RMS-norms of solution error.
|
||||
!---------------------------------------------------------------------
|
||||
xceref(1) = 4.2348416040525025d+00
|
||||
xceref(2) = 4.4390282496995698d-01
|
||||
xceref(3) = 9.6692480136345650d-01
|
||||
xceref(4) = 8.8302063039765474d-01
|
||||
xceref(5) = 9.7379901770829278d+00
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! reference data for 102X102X102 grids after 200 time steps,
|
||||
! with DT = 3.0d-04
|
||||
!---------------------------------------------------------------------
|
||||
else if (problem_size .eq. 102 .and. problem_size .eq. 102 .and. p
|
||||
&roblem_size .eq. 102 .and. no_time_steps .eq. 200) then
|
||||
class = 'B'
|
||||
dtref = 3.0d-4
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Reference values of RMS-norms of residual.
|
||||
!---------------------------------------------------------------------
|
||||
xcrref(1) = 1.4233597229287254d+03
|
||||
xcrref(2) = 9.9330522590150238d+01
|
||||
xcrref(3) = 3.5646025644535285d+02
|
||||
xcrref(4) = 3.2485447959084092d+02
|
||||
xcrref(5) = 3.2707541254659363d+03
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Reference values of RMS-norms of solution error.
|
||||
!---------------------------------------------------------------------
|
||||
xceref(1) = 5.2969847140936856d+01
|
||||
xceref(2) = 4.4632896115670668d+00
|
||||
xceref(3) = 1.3122573342210174d+01
|
||||
xceref(4) = 1.2006925323559144d+01
|
||||
xceref(5) = 1.2459576151035986d+02
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! reference data for 162X162X162 grids after 200 time steps,
|
||||
! with DT = 1.0d-04
|
||||
!---------------------------------------------------------------------
|
||||
else if (problem_size .eq. 162 .and. problem_size .eq. 162 .and. p
|
||||
&roblem_size .eq. 162 .and. no_time_steps .eq. 200) then
|
||||
class = 'C'
|
||||
dtref = 1.0d-4
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Reference values of RMS-norms of residual.
|
||||
!---------------------------------------------------------------------
|
||||
xcrref(1) = 0.62398116551764615d+04
|
||||
xcrref(2) = 0.50793239190423964d+03
|
||||
xcrref(3) = 0.15423530093013596d+04
|
||||
xcrref(4) = 0.13302387929291190d+04
|
||||
xcrref(5) = 0.11604087428436455d+05
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Reference values of RMS-norms of solution error.
|
||||
!---------------------------------------------------------------------
|
||||
xceref(1) = 0.16462008369091265d+03
|
||||
xceref(2) = 0.11497107903824313d+02
|
||||
xceref(3) = 0.41207446207461508d+02
|
||||
xceref(4) = 0.37087651059694167d+02
|
||||
xceref(5) = 0.36211053051841265d+03
|
||||
!---------------------------------------------------------------------
|
||||
! reference data for 408x408x408 grids after 250 time steps, with DT = 0.2d-04
|
||||
!---------------------------------------------------------------------
|
||||
elseif ( (grid_points(1) .eq. 408) .and.
|
||||
& (grid_points(2) .eq. 408) .and.
|
||||
& (grid_points(3) .eq. 408) .and.
|
||||
& (no_time_steps . eq. 250) ) then
|
||||
|
||||
class = 'D'
|
||||
dtref = 0.2d-4
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Reference values of RMS-norms of residual.
|
||||
!---------------------------------------------------------------------
|
||||
xcrref(1) = 0.2533188551738d+05
|
||||
xcrref(2) = 0.2346393716980d+04
|
||||
xcrref(3) = 0.6294554366904d+04
|
||||
xcrref(4) = 0.5352565376030d+04
|
||||
xcrref(5) = 0.3905864038618d+05
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Reference values of RMS-norms of solution error.
|
||||
!---------------------------------------------------------------------
|
||||
|
||||
xceref(1) = 0.3100009377557d+03
|
||||
xceref(2) = 0.2424086324913d+02
|
||||
xceref(3) = 0.7782212022645d+02
|
||||
xceref(4) = 0.6835623860116d+02
|
||||
xceref(5) = 0.6065737200368d+03
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! reference data for 1020x1020x1020 grids after 250 time steps, with DT = 0.4d-05
|
||||
!---------------------------------------------------------------------
|
||||
elseif ( (grid_points(1) .eq. 1020) .and.
|
||||
& (grid_points(2) .eq. 1020) .and.
|
||||
& (grid_points(3) .eq. 1020) .and.
|
||||
& (no_time_steps . eq. 250) ) then
|
||||
|
||||
class = 'E'
|
||||
dtref = 0.4d-5
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Reference values of RMS-norms of residual.
|
||||
!---------------------------------------------------------------------
|
||||
xcrref(1) = 0.9795372484517d+05
|
||||
xcrref(2) = 0.9739814511521d+04
|
||||
xcrref(3) = 0.2467606342965d+05
|
||||
xcrref(4) = 0.2092419572860d+05
|
||||
xcrref(5) = 0.1392138856939d+06
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Reference values of RMS-norms of solution error.
|
||||
!---------------------------------------------------------------------
|
||||
|
||||
xceref(1) = 0.4327562208414d+03
|
||||
xceref(2) = 0.3699051964887d+02
|
||||
xceref(3) = 0.1089845040954d+03
|
||||
xceref(4) = 0.9462517622043d+02
|
||||
xceref(5) = 0.7765512765309d+03
|
||||
|
||||
else
|
||||
verified = .FALSE.
|
||||
endif
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! verification test for residuals if gridsize is either 12X12X12 or
|
||||
! 64X64X64 or 102X102X102 or 162X162X162
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! Compute the difference of solution values and the known reference values.
|
||||
!---------------------------------------------------------------------
|
||||
do m = 1,5
|
||||
xcrdif(m) = dabs ((xcr(m) - xcrref(m)) / xcrref(m))
|
||||
xcedif(m) = dabs ((xce(m) - xceref(m)) / xceref(m))
|
||||
enddo
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Output the comparison of computed results to known cases.
|
||||
!---------------------------------------------------------------------
|
||||
if (class .ne. 'U') then
|
||||
write (unit = *,fmt = 1990) class
|
||||
1990 format(' Verification being performed for class ', a)
|
||||
write (unit = *,fmt = 2000) epsilon
|
||||
2000 format(' accuracy setting for epsilon = ', E20.13)
|
||||
if (dabs (dt - dtref) .gt. epsilon) then
|
||||
verified = .FALSE.
|
||||
class = 'U'
|
||||
write (unit = *,fmt = 1000) dtref
|
||||
1000 format(' DT does not match the reference value of ',
|
||||
& E15.8)
|
||||
endif
|
||||
else
|
||||
write (unit = *,fmt = 1995)
|
||||
1995 format(' Unknown class')
|
||||
endif
|
||||
if (class .ne. 'U') then
|
||||
write (unit = *,fmt = 2001)
|
||||
else
|
||||
write (unit = *,fmt = 2005)
|
||||
endif
|
||||
2001 format(' Comparison of RMS-norms of residual')
|
||||
2005 format(' RMS-norms of residual')
|
||||
do m = 1,5
|
||||
if (class .eq. 'U') then
|
||||
write (unit = *,fmt = 2015) m,xcr(m)
|
||||
else if (xcrdif(m) .gt. epsilon .or. isnan(xcrdif(m)))then
|
||||
verified = .FALSE.
|
||||
write (unit = *,fmt = 2010) m,xcr(m),xcrref(m),xcrdif(m)
|
||||
else
|
||||
write (unit = *,fmt = 2011) m,xcr(m),xcrref(m),xcrdif(m)
|
||||
endif
|
||||
enddo
|
||||
if (class .ne. 'U') then
|
||||
write (unit = *,fmt = 2002)
|
||||
else
|
||||
write (unit = *,fmt = 2006)
|
||||
endif
|
||||
2002 format(' Comparison of RMS-norms of solution error')
|
||||
2006 format(' RMS-norms of solution error')
|
||||
do m = 1,5
|
||||
if (class .eq. 'U') then
|
||||
write (unit = *,fmt = 2015) m,xce(m)
|
||||
else if (xcedif(m) .gt. epsilon .or. isnan(xcedif(m))) then
|
||||
verified = .FALSE.
|
||||
write (unit = *,fmt = 2010) m,xce(m),xceref(m),xcedif(m)
|
||||
else
|
||||
write (unit = *,fmt = 2011) m,xce(m),xceref(m),xcedif(m)
|
||||
endif
|
||||
enddo
|
||||
2010 format(' FAILURE: ', i2, E20.13, E20.13, E20.13)
|
||||
2011 format(' ', i2, E20.13, E20.13, E20.13)
|
||||
2015 format(' ', i2, E20.13)
|
||||
if (class .eq. 'U') then
|
||||
write (unit = *,fmt = 2022)
|
||||
write (unit = *,fmt = 2023)
|
||||
2022 format(' No reference values provided')
|
||||
2023 format(' No verification performed')
|
||||
else if (verified) then
|
||||
write (unit = *,fmt = 2020)
|
||||
2020 format(' Verification Successful')
|
||||
else
|
||||
write (unit = *,fmt = 2021)
|
||||
2021 format(' Verification failed')
|
||||
endif
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,627 @@
|
||||
!---------------------------------------------------------------------
|
||||
! performs guaussian elimination on this cell.
|
||||
!
|
||||
! assumes that unpacking routines for non-first cells
|
||||
! preload C' and rhs' from previous cell.
|
||||
!
|
||||
! assumed send happens outside this routine, but that
|
||||
! c'(IMAX) and rhs'(IMAX) will be sent to next cell
|
||||
!---------------------------------------------------------------------
|
||||
subroutine x_solve ()
|
||||
|
||||
include 'header3d.h'
|
||||
double precision pivot,coeff
|
||||
integer i__0,j__1
|
||||
integer m,n
|
||||
double precision coeff__2
|
||||
double precision pivot__3
|
||||
double precision lhs_(5,5,3),u_(0:3,5)
|
||||
double precision rhs_(5)
|
||||
integer i,j,k,isize
|
||||
isize = problem_size - 1
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! outer most do loops - sweeping in i direction
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! begin inner most do loop
|
||||
! do all the elements of the cell unless last
|
||||
!---------------------------------------------------------------------
|
||||
!, ACROSS(rhs(1:0,0:0,0:0,0:0),lhs__(1:0,0:0,0:0,0:0,0:0))
|
||||
!DVM$ region local(lhs__)
|
||||
!DVM$ PARALLEL (k,j) ON rhs(*,*,j,k),private(u_,i,rhs_,tmp1,tmp2,
|
||||
!DVM$&tmp3,t1,t2,t3,tm1,tm2,pivot,coeff, tm3,i__0,j__1,tmp11,
|
||||
!DVM$&tmp22,lhs_,m,n,coeff__2, pivot__3)
|
||||
do k = 1,problem_size - 2
|
||||
do j = 1,problem_size - 2
|
||||
do m = 1,5
|
||||
u_(0,m) = u(m,0,j,k)
|
||||
u_(1,m) = u(m,1,j,k)
|
||||
enddo
|
||||
do i = 1,isize - 1
|
||||
do m = 1,5
|
||||
u_(2,m) = u(m,i + 1,j,k)
|
||||
enddo
|
||||
|
||||
! if(i .ne. isize) then
|
||||
tmp1 = 1.0d+00 / u_(1,1)
|
||||
tmp2 = tmp1 * tmp1
|
||||
tmp3 = tmp1 * tmp2
|
||||
t1 = 1.0d+00 / u_(0,1)
|
||||
t2 = t1 * t1
|
||||
t3 = t1 * t2
|
||||
tm1 = 1.0d+00 / u_(2,1)
|
||||
tm2 = tm1 * tm1
|
||||
tm3 = tm1 * tm2
|
||||
tmp11 = dt * tx1
|
||||
tmp22 = dt * tx2
|
||||
lhs_(1,1,1) = (-(tmp11)) * dx1
|
||||
lhs_(1,2,1) = (-(tmp22))
|
||||
lhs_(1,3,1) = 0.
|
||||
lhs_(1,4,1) = 0.
|
||||
lhs_(1,5,1) = 0.
|
||||
lhs_(2,1,1) = (-(tmp22)) * ((-(u_(0,2) * t2 * u_(0,2))) +
|
||||
& c2 * 0.50d+00 * (u_(0,2) * u_(0,2) + u_(0,3) * u_(0,3) + u_(0,4)
|
||||
&* u_(0,4)) * t2) - tmp11 * ((-(con43)) * c3c4 * t2 * u_(0,2))
|
||||
lhs_(2,2,1) = (-(tmp22)) * ((2.0d+00 - c2) * (u_(0,2) / u
|
||||
&_(0,1))) - tmp11 * con43 * c3c4 * t1 - tmp11 * dx2
|
||||
lhs_(2,3,1) = (-(tmp22)) * ((-(c2)) * (u_(0,3) * t1))
|
||||
lhs_(2,4,1) = (-(tmp22)) * ((-(c2)) * (u_(0,4) * t1))
|
||||
lhs_(2,5,1) = (-(tmp22)) * c2
|
||||
lhs_(3,1,1) = (-(tmp22)) * ((-(u_(0,2) * u_(0,3))) * t2)
|
||||
&- tmp11 * ((-(c3c4)) * t2 * u_(0,3))
|
||||
lhs_(3,2,1) = (-(tmp22)) * u_(0,3) * t1
|
||||
lhs_(3,3,1) = (-(tmp22)) * u_(0,2) * t1 - tmp11 * c3c4 *
|
||||
&t1 - tmp11 * dx3
|
||||
lhs_(3,4,1) = 0.
|
||||
lhs_(3,5,1) = 0.
|
||||
lhs_(4,1,1) = (-(tmp22)) * ((-(u_(0,2) * u_(0,4))) * t2)
|
||||
&- tmp11 * ((-(c3c4)) * t2 * u_(0,4))
|
||||
lhs_(4,2,1) = (-(tmp22)) * u_(0,4) * t1
|
||||
lhs_(4,3,1) = 0.
|
||||
lhs_(4,4,1) = (-(tmp22)) * u_(0,2) * t1 - tmp11 * c3c4 *
|
||||
&t1 - tmp11 * dx4
|
||||
lhs_(4,5,1) = 0.
|
||||
lhs_(5,1,1) = (-(tmp22)) * ((c2 * (u_(0,2) * u_(0,2) + u_
|
||||
&(0,3) * u_(0,3) + u_(0,4) * u_(0,4)) * t2 - c1 * (u_(0,5) * t1)) *
|
||||
& (u_(0,2) * t1)) - tmp11 * ((-(con43 * c3c4 - c1345)) * t3 * u_(0,
|
||||
&2)** 2 - (c3c4 - c1345) * t3 * u_(0,3)** 2 - (c3c4 - c1345) * t3 *
|
||||
& u_(0,4)** 2 - c1345 * t2 * u_(0,5))
|
||||
lhs_(5,2,1) = (-(tmp22)) * (c1 * u_(0,5) * t1 - 0.50d+00
|
||||
&* c2 * (3.0d+00 * u_(0,2) * u_(0,2) + u_(0,3) * u_(0,3) + u_(0,4)
|
||||
&* u_(0,4)) * t2) - tmp11 * ((con43 * c3c4 - c1345) * t2 * u_(0,2))
|
||||
lhs_(5,3,1) = (-(tmp22)) * ((-(c2)) * (u_(0,3) * u_(0,2))
|
||||
& * t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,3)
|
||||
lhs_(5,4,1) = (-(tmp22)) * ((-(c2)) * (u_(0,4) * u_(0,2))
|
||||
& * t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,4)
|
||||
lhs_(5,5,1) = (-(tmp22)) * c1 * (u_(0,2) * t1) - tmp11 *
|
||||
&c1345 * t1 - tmp11 * dx5
|
||||
lhs_(1,1,2) = 1.0d+00 + tmp11 * 2.0d+00 * dx1
|
||||
lhs_(1,2,2) = 0.
|
||||
lhs_(1,3,2) = 0.
|
||||
lhs_(1,4,2) = 0.
|
||||
lhs_(1,5,2) = 0.
|
||||
lhs_(2,1,2) = tmp11 * 2.0d+00 * ((-(con43)) * c3c4 * tmp2
|
||||
& * u_(1,2))
|
||||
lhs_(2,2,2) = 1.0d+00 + tmp11 * 2.0d+00 * con43 * c3c4 *
|
||||
&tmp1 + tmp11 * 2.0d+00 * dx2
|
||||
lhs_(2,3,2) = 0.
|
||||
lhs_(2,4,2) = 0.
|
||||
lhs_(2,5,2) = 0.
|
||||
lhs_(3,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1,
|
||||
&3))
|
||||
lhs_(3,2,2) = 0.
|
||||
lhs_(3,3,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t
|
||||
&mp11 * 2.0d+00 * dx3
|
||||
lhs_(3,4,2) = 0.
|
||||
lhs_(3,5,2) = 0.
|
||||
lhs_(4,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1,
|
||||
&4))
|
||||
lhs_(4,2,2) = 0.
|
||||
lhs_(4,3,2) = 0.
|
||||
lhs_(4,4,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t
|
||||
&mp11 * 2.0d+00 * dx4
|
||||
lhs_(4,5,2) = tmp11 * 2.0d+00 * 0
|
||||
lhs_(5,1,2) = tmp11 * 2.0d+00 * ((-(con43 * c3c4 - c1345)
|
||||
&) * tmp3 * u_(1,2)** 2 - (c3c4 - c1345) * tmp3 * u_(1,3)** 2 - (c3
|
||||
&c4 - c1345) * tmp3 * u_(1,4)** 2 - c1345 * tmp2 * u_(1,5))
|
||||
lhs_(5,2,2) = tmp11 * 2.0d+00 * ((con43 * c3c4 - c1345) *
|
||||
& tmp2 * u_(1,2))
|
||||
lhs_(5,3,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u
|
||||
&_(1,3)
|
||||
lhs_(5,4,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u
|
||||
&_(1,4)
|
||||
lhs_(5,5,2) = 1.0d+00 + tmp11 * 2.0d+00 * c1345 * tmp1 +
|
||||
&tmp11 * 2.0d+00 * dx5
|
||||
if (i .ne. 1) then
|
||||
do j__1 = 1,5
|
||||
lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
enddo
|
||||
endif
|
||||
lhs_(1,1,3) = (-(tmp11)) * dx1
|
||||
lhs_(1,2,3) = tmp22
|
||||
lhs_(1,3,3) = 0.
|
||||
lhs_(1,4,3) = 0.
|
||||
lhs_(1,5,3) = 0.
|
||||
lhs_(2,1,3) = tmp22 * ((-(u_(2,2) * tm2 * u_(2,2))) + c2
|
||||
&* 0.50d+00 * (u_(2,2) * u_(2,2) + u_(2,3) * u_(2,3) + u_(2,4) * u_
|
||||
&(2,4)) * tm2) - tmp11 * ((-(con43)) * c3c4 * tm2 * u_(2,2))
|
||||
lhs_(2,2,3) = tmp22 * ((2.0d+00 - c2) * (u_(2,2) / u_(2,1
|
||||
&))) - tmp11 * con43 * c3c4 * tm1 - tmp11 * dx2
|
||||
lhs_(2,3,3) = tmp22 * ((-(c2)) * (u_(2,3) * tm1))
|
||||
lhs_(2,4,3) = tmp22 * ((-(c2)) * (u_(2,4) * tm1))
|
||||
lhs_(2,5,3) = tmp22 * c2
|
||||
lhs_(3,1,3) = tmp22 * ((-(u_(2,2) * u_(2,3))) * tm2) - tm
|
||||
&p11 * ((-(c3c4)) * tm2 * u_(2,3))
|
||||
lhs_(3,2,3) = tmp22 * u_(2,3) * tm1
|
||||
lhs_(3,3,3) = tmp22 * u_(2,2) * tm1 - tmp11 * c3c4 * tm1
|
||||
&- tmp11 * dx3
|
||||
lhs_(3,4,3) = 0.
|
||||
lhs_(3,5,3) = 0.
|
||||
lhs_(4,1,3) = tmp22 * ((-(u_(2,2) * u_(2,4))) * tm2) - tm
|
||||
&p11 * ((-(c3c4)) * tm2 * u_(2,4))
|
||||
lhs_(4,2,3) = tmp22 * u_(2,4) * tm1
|
||||
lhs_(4,3,3) = 0.
|
||||
lhs_(4,4,3) = tmp22 * u_(2,2) * tm1 - tmp11 * c3c4 * tm1
|
||||
&- tmp11 * dx4
|
||||
lhs_(4,5,3) = 0.
|
||||
lhs_(5,1,3) = tmp22 * ((c2 * (u_(2,2) * u_(2,2) + u_(2,3)
|
||||
& * u_(2,3) + u_(2,4) * u_(2,4)) * tm2 - c1 * (u_(2,5) * tm1)) * (u
|
||||
&_(2,2) * tm1)) - tmp11 * ((-(con43 * c3c4 - c1345)) * tm3 * u_(2,2
|
||||
&)** 2 - (c3c4 - c1345) * tm3 * u_(2,3)** 2 - (c3c4 - c1345) * tm3
|
||||
&* u_(2,4)** 2 - c1345 * tm2 * u_(2,5))
|
||||
lhs_(5,2,3) = tmp22 * (c1 * u_(2,5) * tm1 - 0.50d+00 * c2
|
||||
& * (3.0d+00 * u_(2,2) * u_(2,2) + u_(2,3) * u_(2,3) + u_(2,4) * u_
|
||||
&(2,4)) * tm2) - tmp11 * ((con43 * c3c4 - c1345) * tm2 * u_(2,2))
|
||||
lhs_(5,3,3) = tmp22 * ((-(c2)) * (u_(2,3) * u_(2,2)) * tm
|
||||
&2) - tmp11 * (c3c4 - c1345) * tm2 * u_(2,3)
|
||||
lhs_(5,4,3) = tmp22 * ((-(c2)) * (u_(2,4) * u_(2,2)) * tm
|
||||
&2) - tmp11 * (c3c4 - c1345) * tm2 * u_(2,4)
|
||||
lhs_(5,5,3) = tmp22 * c1 * (u_(2,2) * tm1) - tmp11 * c134
|
||||
&5 * tm1 - tmp11 * dx5
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs(m,i,j,k)
|
||||
enddo
|
||||
do i__0 = 1,5
|
||||
rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i - 1
|
||||
&,j,k) - lhs_(i__0,2,1) * rhs(2,i - 1,j,k) - lhs_(i__0,3,1) * rhs(3
|
||||
&,i - 1,j,k) - lhs_(i__0,4,1) * rhs(4,i - 1,j,k) - lhs_(i__0,5,1) *
|
||||
& rhs(5,i - 1,j,k)
|
||||
enddo
|
||||
pivot = 1.00d0 / lhs_(1,1,2)
|
||||
lhs_(1,2,2) = lhs_(1,2,2) * pivot
|
||||
lhs_(1,3,2) = lhs_(1,3,2) * pivot
|
||||
lhs_(1,4,2) = lhs_(1,4,2) * pivot
|
||||
lhs_(1,5,2) = lhs_(1,5,2) * pivot
|
||||
lhs_(1,1,3) = lhs_(1,1,3) * pivot
|
||||
lhs_(1,2,3) = lhs_(1,2,3) * pivot
|
||||
lhs_(1,3,3) = lhs_(1,3,3) * pivot
|
||||
lhs_(1,4,3) = lhs_(1,4,3) * pivot
|
||||
lhs_(1,5,3) = lhs_(1,5,3) * pivot
|
||||
rhs_(1) = rhs_(1) * pivot
|
||||
coeff = lhs_(2,1,2)
|
||||
lhs_(2,2,2) = lhs_(2,2,2) - coeff * lhs_(1,2,2)
|
||||
lhs_(2,3,2) = lhs_(2,3,2) - coeff * lhs_(1,3,2)
|
||||
lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(1,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(1,5,2)
|
||||
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(1,1,3)
|
||||
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(1,2,3)
|
||||
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(1,3,3)
|
||||
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(1,4,3)
|
||||
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(1,5,3)
|
||||
rhs_(2) = rhs_(2) - coeff * rhs_(1)
|
||||
coeff = lhs_(3,1,2)
|
||||
lhs_(3,2,2) = lhs_(3,2,2) - coeff * lhs_(1,2,2)
|
||||
lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(1,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(1,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(1,5,2)
|
||||
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(1,1,3)
|
||||
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(1,2,3)
|
||||
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(1,3,3)
|
||||
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(1,4,3)
|
||||
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(1,5,3)
|
||||
rhs_(3) = rhs_(3) - coeff * rhs_(1)
|
||||
coeff = lhs_(4,1,2)
|
||||
lhs_(4,2,2) = lhs_(4,2,2) - coeff * lhs_(1,2,2)
|
||||
lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(1,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(1,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(1,5,2)
|
||||
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(1,1,3)
|
||||
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(1,2,3)
|
||||
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(1,3,3)
|
||||
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(1,4,3)
|
||||
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(1,5,3)
|
||||
rhs_(4) = rhs_(4) - coeff * rhs_(1)
|
||||
coeff = lhs_(5,1,2)
|
||||
lhs_(5,2,2) = lhs_(5,2,2) - coeff * lhs_(1,2,2)
|
||||
lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(1,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(1,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(1,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(1,1,3)
|
||||
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(1,2,3)
|
||||
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(1,3,3)
|
||||
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(1,4,3)
|
||||
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(1,5,3)
|
||||
rhs_(5) = rhs_(5) - coeff * rhs_(1)
|
||||
pivot = 1.00d0 / lhs_(2,2,2)
|
||||
lhs_(2,3,2) = lhs_(2,3,2) * pivot
|
||||
lhs_(2,4,2) = lhs_(2,4,2) * pivot
|
||||
lhs_(2,5,2) = lhs_(2,5,2) * pivot
|
||||
lhs_(2,1,3) = lhs_(2,1,3) * pivot
|
||||
lhs_(2,2,3) = lhs_(2,2,3) * pivot
|
||||
lhs_(2,3,3) = lhs_(2,3,3) * pivot
|
||||
lhs_(2,4,3) = lhs_(2,4,3) * pivot
|
||||
lhs_(2,5,3) = lhs_(2,5,3) * pivot
|
||||
rhs_(2) = rhs_(2) * pivot
|
||||
coeff = lhs_(1,2,2)
|
||||
lhs_(1,3,2) = lhs_(1,3,2) - coeff * lhs_(2,3,2)
|
||||
lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(2,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(2,5,2)
|
||||
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(2,1,3)
|
||||
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(2,2,3)
|
||||
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(2,3,3)
|
||||
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(2,4,3)
|
||||
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(2,5,3)
|
||||
rhs_(1) = rhs_(1) - coeff * rhs_(2)
|
||||
coeff = lhs_(3,2,2)
|
||||
lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(2,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(2,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(2,5,2)
|
||||
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(2,1,3)
|
||||
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(2,2,3)
|
||||
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(2,3,3)
|
||||
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(2,4,3)
|
||||
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(2,5,3)
|
||||
rhs_(3) = rhs_(3) - coeff * rhs_(2)
|
||||
coeff = lhs_(4,2,2)
|
||||
lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(2,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(2,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(2,5,2)
|
||||
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(2,1,3)
|
||||
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(2,2,3)
|
||||
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(2,3,3)
|
||||
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(2,4,3)
|
||||
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(2,5,3)
|
||||
rhs_(4) = rhs_(4) - coeff * rhs_(2)
|
||||
coeff = lhs_(5,2,2)
|
||||
lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(2,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(2,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(2,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(2,1,3)
|
||||
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(2,2,3)
|
||||
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(2,3,3)
|
||||
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(2,4,3)
|
||||
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(2,5,3)
|
||||
rhs_(5) = rhs_(5) - coeff * rhs_(2)
|
||||
pivot = 1.00d0 / lhs_(3,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) * pivot
|
||||
lhs_(3,5,2) = lhs_(3,5,2) * pivot
|
||||
lhs_(3,1,3) = lhs_(3,1,3) * pivot
|
||||
lhs_(3,2,3) = lhs_(3,2,3) * pivot
|
||||
lhs_(3,3,3) = lhs_(3,3,3) * pivot
|
||||
lhs_(3,4,3) = lhs_(3,4,3) * pivot
|
||||
lhs_(3,5,3) = lhs_(3,5,3) * pivot
|
||||
rhs_(3) = rhs_(3) * pivot
|
||||
coeff = lhs_(1,3,2)
|
||||
lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(3,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(3,5,2)
|
||||
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(3,1,3)
|
||||
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(3,2,3)
|
||||
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(3,3,3)
|
||||
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(3,4,3)
|
||||
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(3,5,3)
|
||||
rhs_(1) = rhs_(1) - coeff * rhs_(3)
|
||||
coeff = lhs_(2,3,2)
|
||||
lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(3,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(3,5,2)
|
||||
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(3,1,3)
|
||||
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(3,2,3)
|
||||
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(3,3,3)
|
||||
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(3,4,3)
|
||||
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(3,5,3)
|
||||
rhs_(2) = rhs_(2) - coeff * rhs_(3)
|
||||
coeff = lhs_(4,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(3,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(3,5,2)
|
||||
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(3,1,3)
|
||||
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(3,2,3)
|
||||
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(3,3,3)
|
||||
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(3,4,3)
|
||||
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(3,5,3)
|
||||
rhs_(4) = rhs_(4) - coeff * rhs_(3)
|
||||
coeff = lhs_(5,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(3,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(3,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(3,1,3)
|
||||
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(3,2,3)
|
||||
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(3,3,3)
|
||||
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(3,4,3)
|
||||
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(3,5,3)
|
||||
rhs_(5) = rhs_(5) - coeff * rhs_(3)
|
||||
pivot = 1.00d0 / lhs_(4,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) * pivot
|
||||
lhs_(4,1,3) = lhs_(4,1,3) * pivot
|
||||
lhs_(4,2,3) = lhs_(4,2,3) * pivot
|
||||
lhs_(4,3,3) = lhs_(4,3,3) * pivot
|
||||
lhs_(4,4,3) = lhs_(4,4,3) * pivot
|
||||
lhs_(4,5,3) = lhs_(4,5,3) * pivot
|
||||
rhs_(4) = rhs_(4) * pivot
|
||||
coeff = lhs_(1,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(4,5,2)
|
||||
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(4,1,3)
|
||||
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(4,2,3)
|
||||
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(4,3,3)
|
||||
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(4,4,3)
|
||||
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(4,5,3)
|
||||
rhs_(1) = rhs_(1) - coeff * rhs_(4)
|
||||
coeff = lhs_(2,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(4,5,2)
|
||||
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(4,1,3)
|
||||
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(4,2,3)
|
||||
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(4,3,3)
|
||||
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(4,4,3)
|
||||
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(4,5,3)
|
||||
rhs_(2) = rhs_(2) - coeff * rhs_(4)
|
||||
coeff = lhs_(3,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(4,5,2)
|
||||
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(4,1,3)
|
||||
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(4,2,3)
|
||||
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(4,3,3)
|
||||
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(4,4,3)
|
||||
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(4,5,3)
|
||||
rhs_(3) = rhs_(3) - coeff * rhs_(4)
|
||||
coeff = lhs_(5,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(4,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(4,1,3)
|
||||
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(4,2,3)
|
||||
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(4,3,3)
|
||||
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(4,4,3)
|
||||
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(4,5,3)
|
||||
rhs_(5) = rhs_(5) - coeff * rhs_(4)
|
||||
pivot = 1.00d0 / lhs_(5,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) * pivot
|
||||
lhs_(5,2,3) = lhs_(5,2,3) * pivot
|
||||
lhs_(5,3,3) = lhs_(5,3,3) * pivot
|
||||
lhs_(5,4,3) = lhs_(5,4,3) * pivot
|
||||
lhs_(5,5,3) = lhs_(5,5,3) * pivot
|
||||
rhs_(5) = rhs_(5) * pivot
|
||||
coeff = lhs_(1,5,2)
|
||||
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(5,1,3)
|
||||
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(5,2,3)
|
||||
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(5,3,3)
|
||||
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(5,4,3)
|
||||
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(5,5,3)
|
||||
rhs_(1) = rhs_(1) - coeff * rhs_(5)
|
||||
coeff = lhs_(2,5,2)
|
||||
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(5,1,3)
|
||||
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(5,2,3)
|
||||
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(5,3,3)
|
||||
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(5,4,3)
|
||||
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(5,5,3)
|
||||
rhs_(2) = rhs_(2) - coeff * rhs_(5)
|
||||
coeff = lhs_(3,5,2)
|
||||
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(5,1,3)
|
||||
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(5,2,3)
|
||||
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(5,3,3)
|
||||
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(5,4,3)
|
||||
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(5,5,3)
|
||||
rhs_(3) = rhs_(3) - coeff * rhs_(5)
|
||||
coeff = lhs_(4,5,2)
|
||||
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(5,1,3)
|
||||
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(5,2,3)
|
||||
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(5,3,3)
|
||||
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(5,4,3)
|
||||
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(5,5,3)
|
||||
rhs_(4) = rhs_(4) - coeff * rhs_(5)
|
||||
do i__0 = 1,5
|
||||
lhs__(i__0,1,i,j,k) = lhs_(i__0,1,3)
|
||||
lhs__(i__0,2,i,j,k) = lhs_(i__0,2,3)
|
||||
lhs__(i__0,3,i,j,k) = lhs_(i__0,3,3)
|
||||
lhs__(i__0,4,i,j,k) = lhs_(i__0,4,3)
|
||||
lhs__(i__0,5,i,j,k) = lhs_(i__0,5,3)
|
||||
enddo
|
||||
do m = 1,5
|
||||
rhs(m,i,j,k) = rhs_(m)
|
||||
u_(0,m) = u_(1,m)
|
||||
u_(1,m) = u_(2,m)
|
||||
enddo
|
||||
enddo
|
||||
|
||||
! else ! ******************* else case *************************
|
||||
do n = 1,5
|
||||
lhs_(1,n,1) = 0.0d0
|
||||
lhs_(1,n,2) = 0.0d0
|
||||
lhs_(1,n,3) = 0.0d0
|
||||
lhs_(2,n,1) = 0.0d0
|
||||
lhs_(2,n,2) = 0.0d0
|
||||
lhs_(2,n,3) = 0.0d0
|
||||
lhs_(3,n,1) = 0.0d0
|
||||
lhs_(3,n,2) = 0.0d0
|
||||
lhs_(3,n,3) = 0.0d0
|
||||
lhs_(4,n,1) = 0.0d0
|
||||
lhs_(4,n,2) = 0.0d0
|
||||
lhs_(4,n,3) = 0.0d0
|
||||
lhs_(5,n,1) = 0.0d0
|
||||
lhs_(5,n,2) = 0.0d0
|
||||
lhs_(5,n,3) = 0.0d0
|
||||
enddo
|
||||
do m = 1,5
|
||||
lhs_(m,m,2) = 1.0d0
|
||||
enddo
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs(m,i,j,k)
|
||||
enddo
|
||||
do i__0 = 1,5
|
||||
rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i - 1,j,
|
||||
&k) - lhs_(i__0,2,1) * rhs(2,i - 1,j,k) - lhs_(i__0,3,1) * rhs(3,i
|
||||
&- 1,j,k) - lhs_(i__0,4,1) * rhs(4,i - 1,j,k) - lhs_(i__0,5,1) * rh
|
||||
&s(5,i - 1,j,k)
|
||||
enddo
|
||||
do j__1 = 1,5
|
||||
lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1,3)
|
||||
lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1,3)
|
||||
lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1,3)
|
||||
lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1,3)
|
||||
lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1,3)
|
||||
enddo
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!
|
||||
!---------------------------------------------------------------------
|
||||
pivot__3 = 1.00d0 / lhs_(1,1,2)
|
||||
lhs_(1,2,2) = lhs_(1,2,2) * pivot__3
|
||||
lhs_(1,3,2) = lhs_(1,3,2) * pivot__3
|
||||
lhs_(1,4,2) = lhs_(1,4,2) * pivot__3
|
||||
lhs_(1,5,2) = lhs_(1,5,2) * pivot__3
|
||||
rhs_(1) = rhs_(1) * pivot__3
|
||||
coeff__2 = lhs_(2,1,2)
|
||||
lhs_(2,2,2) = lhs_(2,2,2) - coeff__2 * lhs_(1,2,2)
|
||||
lhs_(2,3,2) = lhs_(2,3,2) - coeff__2 * lhs_(1,3,2)
|
||||
lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(1,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(1,5,2)
|
||||
rhs_(2) = rhs_(2) - coeff__2 * rhs_(1)
|
||||
coeff__2 = lhs_(3,1,2)
|
||||
lhs_(3,2,2) = lhs_(3,2,2) - coeff__2 * lhs_(1,2,2)
|
||||
lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(1,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(1,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(1,5,2)
|
||||
rhs_(3) = rhs_(3) - coeff__2 * rhs_(1)
|
||||
coeff__2 = lhs_(4,1,2)
|
||||
lhs_(4,2,2) = lhs_(4,2,2) - coeff__2 * lhs_(1,2,2)
|
||||
lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(1,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(1,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(1,5,2)
|
||||
rhs_(4) = rhs_(4) - coeff__2 * rhs_(1)
|
||||
coeff__2 = lhs_(5,1,2)
|
||||
lhs_(5,2,2) = lhs_(5,2,2) - coeff__2 * lhs_(1,2,2)
|
||||
lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(1,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(1,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(1,5,2)
|
||||
rhs_(5) = rhs_(5) - coeff__2 * rhs_(1)
|
||||
pivot__3 = 1.00d0 / lhs_(2,2,2)
|
||||
lhs_(2,3,2) = lhs_(2,3,2) * pivot__3
|
||||
lhs_(2,4,2) = lhs_(2,4,2) * pivot__3
|
||||
lhs_(2,5,2) = lhs_(2,5,2) * pivot__3
|
||||
rhs_(2) = rhs_(2) * pivot__3
|
||||
coeff__2 = lhs_(1,2,2)
|
||||
lhs_(1,3,2) = lhs_(1,3,2) - coeff__2 * lhs_(2,3,2)
|
||||
lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(2,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(2,5,2)
|
||||
rhs_(1) = rhs_(1) - coeff__2 * rhs_(2)
|
||||
coeff__2 = lhs_(3,2,2)
|
||||
lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(2,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(2,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(2,5,2)
|
||||
rhs_(3) = rhs_(3) - coeff__2 * rhs_(2)
|
||||
coeff__2 = lhs_(4,2,2)
|
||||
lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(2,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(2,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(2,5,2)
|
||||
rhs_(4) = rhs_(4) - coeff__2 * rhs_(2)
|
||||
coeff__2 = lhs_(5,2,2)
|
||||
lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(2,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(2,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(2,5,2)
|
||||
rhs_(5) = rhs_(5) - coeff__2 * rhs_(2)
|
||||
pivot__3 = 1.00d0 / lhs_(3,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) * pivot__3
|
||||
lhs_(3,5,2) = lhs_(3,5,2) * pivot__3
|
||||
rhs_(3) = rhs_(3) * pivot__3
|
||||
coeff__2 = lhs_(1,3,2)
|
||||
lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(3,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(3,5,2)
|
||||
rhs_(1) = rhs_(1) - coeff__2 * rhs_(3)
|
||||
coeff__2 = lhs_(2,3,2)
|
||||
lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(3,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(3,5,2)
|
||||
rhs_(2) = rhs_(2) - coeff__2 * rhs_(3)
|
||||
coeff__2 = lhs_(4,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(3,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(3,5,2)
|
||||
rhs_(4) = rhs_(4) - coeff__2 * rhs_(3)
|
||||
coeff__2 = lhs_(5,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(3,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(3,5,2)
|
||||
rhs_(5) = rhs_(5) - coeff__2 * rhs_(3)
|
||||
pivot__3 = 1.00d0 / lhs_(4,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) * pivot__3
|
||||
rhs_(4) = rhs_(4) * pivot__3
|
||||
coeff__2 = lhs_(1,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(4,5,2)
|
||||
rhs_(1) = rhs_(1) - coeff__2 * rhs_(4)
|
||||
coeff__2 = lhs_(2,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(4,5,2)
|
||||
rhs_(2) = rhs_(2) - coeff__2 * rhs_(4)
|
||||
coeff__2 = lhs_(3,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(4,5,2)
|
||||
rhs_(3) = rhs_(3) - coeff__2 * rhs_(4)
|
||||
coeff__2 = lhs_(5,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(4,5,2)
|
||||
rhs_(5) = rhs_(5) - coeff__2 * rhs_(4)
|
||||
pivot__3 = 1.00d0 / lhs_(5,5,2)
|
||||
rhs_(5) = rhs_(5) * pivot__3
|
||||
coeff__2 = lhs_(1,5,2)
|
||||
rhs_(1) = rhs_(1) - coeff__2 * rhs_(5)
|
||||
coeff__2 = lhs_(2,5,2)
|
||||
rhs_(2) = rhs_(2) - coeff__2 * rhs_(5)
|
||||
coeff__2 = lhs_(3,5,2)
|
||||
rhs_(3) = rhs_(3) - coeff__2 * rhs_(5)
|
||||
coeff__2 = lhs_(4,5,2)
|
||||
rhs_(4) = rhs_(4) - coeff__2 * rhs_(5)
|
||||
|
||||
! endif
|
||||
do m = 1,5
|
||||
rhs(m,i,j,k) = rhs_(m)
|
||||
enddo
|
||||
|
||||
! enddo
|
||||
do i = problem_size - 2,0,(-(1))
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs(m,i,j,k)
|
||||
enddo
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - lhs__(m,1,i,j,k) * rhs(1,i + 1,j,k)
|
||||
rhs_(m) = rhs_(m) - lhs__(m,2,i,j,k) * rhs(2,i + 1,j,k)
|
||||
rhs_(m) = rhs_(m) - lhs__(m,3,i,j,k) * rhs(3,i + 1,j,k)
|
||||
rhs_(m) = rhs_(m) - lhs__(m,4,i,j,k) * rhs(4,i + 1,j,k)
|
||||
rhs_(m) = rhs_(m) - lhs__(m,5,i,j,k) * rhs(5,i + 1,j,k)
|
||||
enddo
|
||||
do m = 1,5
|
||||
rhs(m,i,j,k) = rhs_(m)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!DVM$ end region
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,640 @@
|
||||
!---------------------------------------------------------------------
|
||||
! performs guaussian elimination on this cell.
|
||||
!
|
||||
! assumes that unpacking routines for non-first cells
|
||||
! preload C' and rhs' from previous cell.
|
||||
!
|
||||
! assumed send happens outside this routine, but that
|
||||
! c'(IMAX) and rhs'(IMAX) will be sent to next cell
|
||||
!---------------------------------------------------------------------
|
||||
subroutine x_solve ()
|
||||
|
||||
include 'header3d.h'
|
||||
double precision pivot,coeff
|
||||
integer i__0,j__1
|
||||
integer m,n
|
||||
double precision coeff__2
|
||||
double precision pivot__3
|
||||
double precision lhs_(5,5,3),u_(0:3,5)
|
||||
double precision rhs_(5)
|
||||
integer i,j,k,isize,low_k,high_k,k1,maxBlK
|
||||
isize = problem_size - 1
|
||||
if(mod((problem_size - 2), BL) .eq. 0) then
|
||||
maxBlK = (problem_size - 2) / BL
|
||||
else
|
||||
maxblK = (problem_size - 2) / BL + 1
|
||||
endif
|
||||
!---------------------------------------------------------------------
|
||||
! outer most do loops - sweeping in i direction
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! begin inner most do loop
|
||||
! do all the elements of the cell unless last
|
||||
!---------------------------------------------------------------------
|
||||
!, ACROSS(rhs(1:0,0:0,0:0,0:0),lhs__(1:0,0:0,0:0,0:0,0:0))
|
||||
!DVM$ region local(lhs__)
|
||||
!DVM$ PARALLEL (k1, j) ON rhs(*,*,j,k1),private(u_,i,rhs_,tmp1,tmp2,
|
||||
!DVM$&tmp3,t1,t2,t3,tm1,tm2,pivot,coeff, tm3,i__0,j__1,tmp11,
|
||||
!DVM$&tmp22,lhs_,m,n,coeff__2, pivot__3,low_k,high_k,k),
|
||||
!DVM$&cuda_block(32)
|
||||
do k1 = 1, maxblK
|
||||
do j = 1, problem_size - 2
|
||||
low_k = (k1 - 1) * BL + 1
|
||||
high_k = k1 * BL
|
||||
if(high_k .gt. problem_size - 2) then
|
||||
high_k = problem_size - 2
|
||||
endif
|
||||
do k = low_k, high_k
|
||||
do m = 1,5
|
||||
u_(0,m) = u(m,0,j,k)
|
||||
u_(1,m) = u(m,1,j,k)
|
||||
enddo
|
||||
do i = 1,isize - 1
|
||||
do m = 1,5
|
||||
u_(2,m) = u(m,i + 1,j,k)
|
||||
enddo
|
||||
|
||||
! if(i .ne. isize) then
|
||||
tmp1 = 1.0d+00 / u_(1,1)
|
||||
tmp2 = tmp1 * tmp1
|
||||
tmp3 = tmp1 * tmp2
|
||||
t1 = 1.0d+00 / u_(0,1)
|
||||
t2 = t1 * t1
|
||||
t3 = t1 * t2
|
||||
tm1 = 1.0d+00 / u_(2,1)
|
||||
tm2 = tm1 * tm1
|
||||
tm3 = tm1 * tm2
|
||||
tmp11 = dt * tx1
|
||||
tmp22 = dt * tx2
|
||||
lhs_(1,1,1) = (-(tmp11)) * dx1
|
||||
lhs_(1,2,1) = (-(tmp22))
|
||||
lhs_(1,3,1) = 0.
|
||||
lhs_(1,4,1) = 0.
|
||||
lhs_(1,5,1) = 0.
|
||||
lhs_(2,1,1) = (-(tmp22)) * ((-(u_(0,2) * t2 * u_(0,2))) +
|
||||
& c2 * 0.50d+00 * (u_(0,2) * u_(0,2) + u_(0,3) * u_(0,3) + u_(0,4)
|
||||
&* u_(0,4)) * t2) - tmp11 * ((-(con43)) * c3c4 * t2 * u_(0,2))
|
||||
lhs_(2,2,1) = (-(tmp22)) * ((2.0d+00 - c2) * (u_(0,2) / u
|
||||
&_(0,1))) - tmp11 * con43 * c3c4 * t1 - tmp11 * dx2
|
||||
lhs_(2,3,1) = (-(tmp22)) * ((-(c2)) * (u_(0,3) * t1))
|
||||
lhs_(2,4,1) = (-(tmp22)) * ((-(c2)) * (u_(0,4) * t1))
|
||||
lhs_(2,5,1) = (-(tmp22)) * c2
|
||||
lhs_(3,1,1) = (-(tmp22)) * ((-(u_(0,2) * u_(0,3))) * t2)
|
||||
&- tmp11 * ((-(c3c4)) * t2 * u_(0,3))
|
||||
lhs_(3,2,1) = (-(tmp22)) * u_(0,3) * t1
|
||||
lhs_(3,3,1) = (-(tmp22)) * u_(0,2) * t1 - tmp11 * c3c4 *
|
||||
&t1 - tmp11 * dx3
|
||||
lhs_(3,4,1) = 0.
|
||||
lhs_(3,5,1) = 0.
|
||||
lhs_(4,1,1) = (-(tmp22)) * ((-(u_(0,2) * u_(0,4))) * t2)
|
||||
&- tmp11 * ((-(c3c4)) * t2 * u_(0,4))
|
||||
lhs_(4,2,1) = (-(tmp22)) * u_(0,4) * t1
|
||||
lhs_(4,3,1) = 0.
|
||||
lhs_(4,4,1) = (-(tmp22)) * u_(0,2) * t1 - tmp11 * c3c4 *
|
||||
&t1 - tmp11 * dx4
|
||||
lhs_(4,5,1) = 0.
|
||||
lhs_(5,1,1) = (-(tmp22)) * ((c2 * (u_(0,2) * u_(0,2) + u_
|
||||
&(0,3) * u_(0,3) + u_(0,4) * u_(0,4)) * t2 - c1 * (u_(0,5) * t1)) *
|
||||
& (u_(0,2) * t1)) - tmp11 * ((-(con43 * c3c4 - c1345)) * t3 * u_(0,
|
||||
&2)** 2 - (c3c4 - c1345) * t3 * u_(0,3)** 2 - (c3c4 - c1345) * t3 *
|
||||
& u_(0,4)** 2 - c1345 * t2 * u_(0,5))
|
||||
lhs_(5,2,1) = (-(tmp22)) * (c1 * u_(0,5) * t1 - 0.50d+00
|
||||
&* c2 * (3.0d+00 * u_(0,2) * u_(0,2) + u_(0,3) * u_(0,3) + u_(0,4)
|
||||
&* u_(0,4)) * t2) - tmp11 * ((con43 * c3c4 - c1345) * t2 * u_(0,2))
|
||||
lhs_(5,3,1) = (-(tmp22)) * ((-(c2)) * (u_(0,3) * u_(0,2))
|
||||
& * t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,3)
|
||||
lhs_(5,4,1) = (-(tmp22)) * ((-(c2)) * (u_(0,4) * u_(0,2))
|
||||
& * t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,4)
|
||||
lhs_(5,5,1) = (-(tmp22)) * c1 * (u_(0,2) * t1) - tmp11 *
|
||||
&c1345 * t1 - tmp11 * dx5
|
||||
lhs_(1,1,2) = 1.0d+00 + tmp11 * 2.0d+00 * dx1
|
||||
lhs_(1,2,2) = 0.
|
||||
lhs_(1,3,2) = 0.
|
||||
lhs_(1,4,2) = 0.
|
||||
lhs_(1,5,2) = 0.
|
||||
lhs_(2,1,2) = tmp11 * 2.0d+00 * ((-(con43)) * c3c4 * tmp2
|
||||
& * u_(1,2))
|
||||
lhs_(2,2,2) = 1.0d+00 + tmp11 * 2.0d+00 * con43 * c3c4 *
|
||||
&tmp1 + tmp11 * 2.0d+00 * dx2
|
||||
lhs_(2,3,2) = 0.
|
||||
lhs_(2,4,2) = 0.
|
||||
lhs_(2,5,2) = 0.
|
||||
lhs_(3,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1,
|
||||
&3))
|
||||
lhs_(3,2,2) = 0.
|
||||
lhs_(3,3,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t
|
||||
&mp11 * 2.0d+00 * dx3
|
||||
lhs_(3,4,2) = 0.
|
||||
lhs_(3,5,2) = 0.
|
||||
lhs_(4,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1,
|
||||
&4))
|
||||
lhs_(4,2,2) = 0.
|
||||
lhs_(4,3,2) = 0.
|
||||
lhs_(4,4,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t
|
||||
&mp11 * 2.0d+00 * dx4
|
||||
lhs_(4,5,2) = tmp11 * 2.0d+00 * 0
|
||||
lhs_(5,1,2) = tmp11 * 2.0d+00 * ((-(con43 * c3c4 - c1345)
|
||||
&) * tmp3 * u_(1,2)** 2 - (c3c4 - c1345) * tmp3 * u_(1,3)** 2 - (c3
|
||||
&c4 - c1345) * tmp3 * u_(1,4)** 2 - c1345 * tmp2 * u_(1,5))
|
||||
lhs_(5,2,2) = tmp11 * 2.0d+00 * ((con43 * c3c4 - c1345) *
|
||||
& tmp2 * u_(1,2))
|
||||
lhs_(5,3,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u
|
||||
&_(1,3)
|
||||
lhs_(5,4,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u
|
||||
&_(1,4)
|
||||
lhs_(5,5,2) = 1.0d+00 + tmp11 * 2.0d+00 * c1345 * tmp1 +
|
||||
&tmp11 * 2.0d+00 * dx5
|
||||
if (i .ne. 1) then
|
||||
do j__1 = 1,5
|
||||
lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
enddo
|
||||
endif
|
||||
lhs_(1,1,3) = (-(tmp11)) * dx1
|
||||
lhs_(1,2,3) = tmp22
|
||||
lhs_(1,3,3) = 0.
|
||||
lhs_(1,4,3) = 0.
|
||||
lhs_(1,5,3) = 0.
|
||||
lhs_(2,1,3) = tmp22 * ((-(u_(2,2) * tm2 * u_(2,2))) + c2
|
||||
&* 0.50d+00 * (u_(2,2) * u_(2,2) + u_(2,3) * u_(2,3) + u_(2,4) * u_
|
||||
&(2,4)) * tm2) - tmp11 * ((-(con43)) * c3c4 * tm2 * u_(2,2))
|
||||
lhs_(2,2,3) = tmp22 * ((2.0d+00 - c2) * (u_(2,2) / u_(2,1
|
||||
&))) - tmp11 * con43 * c3c4 * tm1 - tmp11 * dx2
|
||||
lhs_(2,3,3) = tmp22 * ((-(c2)) * (u_(2,3) * tm1))
|
||||
lhs_(2,4,3) = tmp22 * ((-(c2)) * (u_(2,4) * tm1))
|
||||
lhs_(2,5,3) = tmp22 * c2
|
||||
lhs_(3,1,3) = tmp22 * ((-(u_(2,2) * u_(2,3))) * tm2) - tm
|
||||
&p11 * ((-(c3c4)) * tm2 * u_(2,3))
|
||||
lhs_(3,2,3) = tmp22 * u_(2,3) * tm1
|
||||
lhs_(3,3,3) = tmp22 * u_(2,2) * tm1 - tmp11 * c3c4 * tm1
|
||||
&- tmp11 * dx3
|
||||
lhs_(3,4,3) = 0.
|
||||
lhs_(3,5,3) = 0.
|
||||
lhs_(4,1,3) = tmp22 * ((-(u_(2,2) * u_(2,4))) * tm2) - tm
|
||||
&p11 * ((-(c3c4)) * tm2 * u_(2,4))
|
||||
lhs_(4,2,3) = tmp22 * u_(2,4) * tm1
|
||||
lhs_(4,3,3) = 0.
|
||||
lhs_(4,4,3) = tmp22 * u_(2,2) * tm1 - tmp11 * c3c4 * tm1
|
||||
&- tmp11 * dx4
|
||||
lhs_(4,5,3) = 0.
|
||||
lhs_(5,1,3) = tmp22 * ((c2 * (u_(2,2) * u_(2,2) + u_(2,3)
|
||||
& * u_(2,3) + u_(2,4) * u_(2,4)) * tm2 - c1 * (u_(2,5) * tm1)) * (u
|
||||
&_(2,2) * tm1)) - tmp11 * ((-(con43 * c3c4 - c1345)) * tm3 * u_(2,2
|
||||
&)** 2 - (c3c4 - c1345) * tm3 * u_(2,3)** 2 - (c3c4 - c1345) * tm3
|
||||
&* u_(2,4)** 2 - c1345 * tm2 * u_(2,5))
|
||||
lhs_(5,2,3) = tmp22 * (c1 * u_(2,5) * tm1 - 0.50d+00 * c2
|
||||
& * (3.0d+00 * u_(2,2) * u_(2,2) + u_(2,3) * u_(2,3) + u_(2,4) * u_
|
||||
&(2,4)) * tm2) - tmp11 * ((con43 * c3c4 - c1345) * tm2 * u_(2,2))
|
||||
lhs_(5,3,3) = tmp22 * ((-(c2)) * (u_(2,3) * u_(2,2)) * tm
|
||||
&2) - tmp11 * (c3c4 - c1345) * tm2 * u_(2,3)
|
||||
lhs_(5,4,3) = tmp22 * ((-(c2)) * (u_(2,4) * u_(2,2)) * tm
|
||||
&2) - tmp11 * (c3c4 - c1345) * tm2 * u_(2,4)
|
||||
lhs_(5,5,3) = tmp22 * c1 * (u_(2,2) * tm1) - tmp11 * c134
|
||||
&5 * tm1 - tmp11 * dx5
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs(m,i,j,k)
|
||||
enddo
|
||||
do i__0 = 1,5
|
||||
rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i - 1
|
||||
&,j,k) - lhs_(i__0,2,1) * rhs(2,i - 1,j,k) - lhs_(i__0,3,1) * rhs(3
|
||||
&,i - 1,j,k) - lhs_(i__0,4,1) * rhs(4,i - 1,j,k) - lhs_(i__0,5,1) *
|
||||
& rhs(5,i - 1,j,k)
|
||||
enddo
|
||||
pivot = 1.00d0 / lhs_(1,1,2)
|
||||
lhs_(1,2,2) = lhs_(1,2,2) * pivot
|
||||
lhs_(1,3,2) = lhs_(1,3,2) * pivot
|
||||
lhs_(1,4,2) = lhs_(1,4,2) * pivot
|
||||
lhs_(1,5,2) = lhs_(1,5,2) * pivot
|
||||
lhs_(1,1,3) = lhs_(1,1,3) * pivot
|
||||
lhs_(1,2,3) = lhs_(1,2,3) * pivot
|
||||
lhs_(1,3,3) = lhs_(1,3,3) * pivot
|
||||
lhs_(1,4,3) = lhs_(1,4,3) * pivot
|
||||
lhs_(1,5,3) = lhs_(1,5,3) * pivot
|
||||
rhs_(1) = rhs_(1) * pivot
|
||||
coeff = lhs_(2,1,2)
|
||||
lhs_(2,2,2) = lhs_(2,2,2) - coeff * lhs_(1,2,2)
|
||||
lhs_(2,3,2) = lhs_(2,3,2) - coeff * lhs_(1,3,2)
|
||||
lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(1,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(1,5,2)
|
||||
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(1,1,3)
|
||||
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(1,2,3)
|
||||
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(1,3,3)
|
||||
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(1,4,3)
|
||||
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(1,5,3)
|
||||
rhs_(2) = rhs_(2) - coeff * rhs_(1)
|
||||
coeff = lhs_(3,1,2)
|
||||
lhs_(3,2,2) = lhs_(3,2,2) - coeff * lhs_(1,2,2)
|
||||
lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(1,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(1,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(1,5,2)
|
||||
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(1,1,3)
|
||||
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(1,2,3)
|
||||
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(1,3,3)
|
||||
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(1,4,3)
|
||||
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(1,5,3)
|
||||
rhs_(3) = rhs_(3) - coeff * rhs_(1)
|
||||
coeff = lhs_(4,1,2)
|
||||
lhs_(4,2,2) = lhs_(4,2,2) - coeff * lhs_(1,2,2)
|
||||
lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(1,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(1,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(1,5,2)
|
||||
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(1,1,3)
|
||||
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(1,2,3)
|
||||
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(1,3,3)
|
||||
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(1,4,3)
|
||||
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(1,5,3)
|
||||
rhs_(4) = rhs_(4) - coeff * rhs_(1)
|
||||
coeff = lhs_(5,1,2)
|
||||
lhs_(5,2,2) = lhs_(5,2,2) - coeff * lhs_(1,2,2)
|
||||
lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(1,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(1,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(1,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(1,1,3)
|
||||
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(1,2,3)
|
||||
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(1,3,3)
|
||||
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(1,4,3)
|
||||
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(1,5,3)
|
||||
rhs_(5) = rhs_(5) - coeff * rhs_(1)
|
||||
pivot = 1.00d0 / lhs_(2,2,2)
|
||||
lhs_(2,3,2) = lhs_(2,3,2) * pivot
|
||||
lhs_(2,4,2) = lhs_(2,4,2) * pivot
|
||||
lhs_(2,5,2) = lhs_(2,5,2) * pivot
|
||||
lhs_(2,1,3) = lhs_(2,1,3) * pivot
|
||||
lhs_(2,2,3) = lhs_(2,2,3) * pivot
|
||||
lhs_(2,3,3) = lhs_(2,3,3) * pivot
|
||||
lhs_(2,4,3) = lhs_(2,4,3) * pivot
|
||||
lhs_(2,5,3) = lhs_(2,5,3) * pivot
|
||||
rhs_(2) = rhs_(2) * pivot
|
||||
coeff = lhs_(1,2,2)
|
||||
lhs_(1,3,2) = lhs_(1,3,2) - coeff * lhs_(2,3,2)
|
||||
lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(2,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(2,5,2)
|
||||
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(2,1,3)
|
||||
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(2,2,3)
|
||||
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(2,3,3)
|
||||
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(2,4,3)
|
||||
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(2,5,3)
|
||||
rhs_(1) = rhs_(1) - coeff * rhs_(2)
|
||||
coeff = lhs_(3,2,2)
|
||||
lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(2,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(2,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(2,5,2)
|
||||
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(2,1,3)
|
||||
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(2,2,3)
|
||||
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(2,3,3)
|
||||
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(2,4,3)
|
||||
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(2,5,3)
|
||||
rhs_(3) = rhs_(3) - coeff * rhs_(2)
|
||||
coeff = lhs_(4,2,2)
|
||||
lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(2,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(2,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(2,5,2)
|
||||
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(2,1,3)
|
||||
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(2,2,3)
|
||||
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(2,3,3)
|
||||
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(2,4,3)
|
||||
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(2,5,3)
|
||||
rhs_(4) = rhs_(4) - coeff * rhs_(2)
|
||||
coeff = lhs_(5,2,2)
|
||||
lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(2,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(2,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(2,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(2,1,3)
|
||||
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(2,2,3)
|
||||
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(2,3,3)
|
||||
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(2,4,3)
|
||||
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(2,5,3)
|
||||
rhs_(5) = rhs_(5) - coeff * rhs_(2)
|
||||
pivot = 1.00d0 / lhs_(3,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) * pivot
|
||||
lhs_(3,5,2) = lhs_(3,5,2) * pivot
|
||||
lhs_(3,1,3) = lhs_(3,1,3) * pivot
|
||||
lhs_(3,2,3) = lhs_(3,2,3) * pivot
|
||||
lhs_(3,3,3) = lhs_(3,3,3) * pivot
|
||||
lhs_(3,4,3) = lhs_(3,4,3) * pivot
|
||||
lhs_(3,5,3) = lhs_(3,5,3) * pivot
|
||||
rhs_(3) = rhs_(3) * pivot
|
||||
coeff = lhs_(1,3,2)
|
||||
lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(3,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(3,5,2)
|
||||
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(3,1,3)
|
||||
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(3,2,3)
|
||||
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(3,3,3)
|
||||
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(3,4,3)
|
||||
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(3,5,3)
|
||||
rhs_(1) = rhs_(1) - coeff * rhs_(3)
|
||||
coeff = lhs_(2,3,2)
|
||||
lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(3,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(3,5,2)
|
||||
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(3,1,3)
|
||||
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(3,2,3)
|
||||
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(3,3,3)
|
||||
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(3,4,3)
|
||||
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(3,5,3)
|
||||
rhs_(2) = rhs_(2) - coeff * rhs_(3)
|
||||
coeff = lhs_(4,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(3,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(3,5,2)
|
||||
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(3,1,3)
|
||||
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(3,2,3)
|
||||
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(3,3,3)
|
||||
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(3,4,3)
|
||||
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(3,5,3)
|
||||
rhs_(4) = rhs_(4) - coeff * rhs_(3)
|
||||
coeff = lhs_(5,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(3,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(3,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(3,1,3)
|
||||
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(3,2,3)
|
||||
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(3,3,3)
|
||||
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(3,4,3)
|
||||
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(3,5,3)
|
||||
rhs_(5) = rhs_(5) - coeff * rhs_(3)
|
||||
pivot = 1.00d0 / lhs_(4,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) * pivot
|
||||
lhs_(4,1,3) = lhs_(4,1,3) * pivot
|
||||
lhs_(4,2,3) = lhs_(4,2,3) * pivot
|
||||
lhs_(4,3,3) = lhs_(4,3,3) * pivot
|
||||
lhs_(4,4,3) = lhs_(4,4,3) * pivot
|
||||
lhs_(4,5,3) = lhs_(4,5,3) * pivot
|
||||
rhs_(4) = rhs_(4) * pivot
|
||||
coeff = lhs_(1,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(4,5,2)
|
||||
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(4,1,3)
|
||||
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(4,2,3)
|
||||
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(4,3,3)
|
||||
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(4,4,3)
|
||||
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(4,5,3)
|
||||
rhs_(1) = rhs_(1) - coeff * rhs_(4)
|
||||
coeff = lhs_(2,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(4,5,2)
|
||||
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(4,1,3)
|
||||
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(4,2,3)
|
||||
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(4,3,3)
|
||||
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(4,4,3)
|
||||
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(4,5,3)
|
||||
rhs_(2) = rhs_(2) - coeff * rhs_(4)
|
||||
coeff = lhs_(3,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(4,5,2)
|
||||
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(4,1,3)
|
||||
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(4,2,3)
|
||||
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(4,3,3)
|
||||
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(4,4,3)
|
||||
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(4,5,3)
|
||||
rhs_(3) = rhs_(3) - coeff * rhs_(4)
|
||||
coeff = lhs_(5,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(4,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(4,1,3)
|
||||
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(4,2,3)
|
||||
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(4,3,3)
|
||||
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(4,4,3)
|
||||
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(4,5,3)
|
||||
rhs_(5) = rhs_(5) - coeff * rhs_(4)
|
||||
pivot = 1.00d0 / lhs_(5,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) * pivot
|
||||
lhs_(5,2,3) = lhs_(5,2,3) * pivot
|
||||
lhs_(5,3,3) = lhs_(5,3,3) * pivot
|
||||
lhs_(5,4,3) = lhs_(5,4,3) * pivot
|
||||
lhs_(5,5,3) = lhs_(5,5,3) * pivot
|
||||
rhs_(5) = rhs_(5) * pivot
|
||||
coeff = lhs_(1,5,2)
|
||||
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(5,1,3)
|
||||
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(5,2,3)
|
||||
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(5,3,3)
|
||||
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(5,4,3)
|
||||
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(5,5,3)
|
||||
rhs_(1) = rhs_(1) - coeff * rhs_(5)
|
||||
coeff = lhs_(2,5,2)
|
||||
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(5,1,3)
|
||||
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(5,2,3)
|
||||
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(5,3,3)
|
||||
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(5,4,3)
|
||||
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(5,5,3)
|
||||
rhs_(2) = rhs_(2) - coeff * rhs_(5)
|
||||
coeff = lhs_(3,5,2)
|
||||
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(5,1,3)
|
||||
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(5,2,3)
|
||||
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(5,3,3)
|
||||
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(5,4,3)
|
||||
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(5,5,3)
|
||||
rhs_(3) = rhs_(3) - coeff * rhs_(5)
|
||||
coeff = lhs_(4,5,2)
|
||||
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(5,1,3)
|
||||
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(5,2,3)
|
||||
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(5,3,3)
|
||||
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(5,4,3)
|
||||
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(5,5,3)
|
||||
rhs_(4) = rhs_(4) - coeff * rhs_(5)
|
||||
do i__0 = 1,5
|
||||
lhs__(i__0,1,i,j,k1) = lhs_(i__0,1,3)
|
||||
lhs__(i__0,2,i,j,k1) = lhs_(i__0,2,3)
|
||||
lhs__(i__0,3,i,j,k1) = lhs_(i__0,3,3)
|
||||
lhs__(i__0,4,i,j,k1) = lhs_(i__0,4,3)
|
||||
lhs__(i__0,5,i,j,k1) = lhs_(i__0,5,3)
|
||||
enddo
|
||||
do m = 1,5
|
||||
rhs(m,i,j,k) = rhs_(m)
|
||||
u_(0,m) = u_(1,m)
|
||||
u_(1,m) = u_(2,m)
|
||||
enddo
|
||||
enddo
|
||||
|
||||
! else ! ******************* else case *************************
|
||||
do n = 1,5
|
||||
lhs_(1,n,1) = 0.0d0
|
||||
lhs_(1,n,2) = 0.0d0
|
||||
lhs_(1,n,3) = 0.0d0
|
||||
lhs_(2,n,1) = 0.0d0
|
||||
lhs_(2,n,2) = 0.0d0
|
||||
lhs_(2,n,3) = 0.0d0
|
||||
lhs_(3,n,1) = 0.0d0
|
||||
lhs_(3,n,2) = 0.0d0
|
||||
lhs_(3,n,3) = 0.0d0
|
||||
lhs_(4,n,1) = 0.0d0
|
||||
lhs_(4,n,2) = 0.0d0
|
||||
lhs_(4,n,3) = 0.0d0
|
||||
lhs_(5,n,1) = 0.0d0
|
||||
lhs_(5,n,2) = 0.0d0
|
||||
lhs_(5,n,3) = 0.0d0
|
||||
enddo
|
||||
do m = 1,5
|
||||
lhs_(m,m,2) = 1.0d0
|
||||
enddo
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs(m,i,j,k)
|
||||
enddo
|
||||
do i__0 = 1,5
|
||||
rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i - 1,j,
|
||||
&k) - lhs_(i__0,2,1) * rhs(2,i - 1,j,k) - lhs_(i__0,3,1) * rhs(3,i
|
||||
&- 1,j,k) - lhs_(i__0,4,1) * rhs(4,i - 1,j,k) - lhs_(i__0,5,1) * rh
|
||||
&s(5,i - 1,j,k)
|
||||
enddo
|
||||
do j__1 = 1,5
|
||||
lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1,3)
|
||||
lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1,3)
|
||||
lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1,3)
|
||||
lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1,3)
|
||||
lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1,3)
|
||||
enddo
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!
|
||||
!---------------------------------------------------------------------
|
||||
pivot__3 = 1.00d0 / lhs_(1,1,2)
|
||||
lhs_(1,2,2) = lhs_(1,2,2) * pivot__3
|
||||
lhs_(1,3,2) = lhs_(1,3,2) * pivot__3
|
||||
lhs_(1,4,2) = lhs_(1,4,2) * pivot__3
|
||||
lhs_(1,5,2) = lhs_(1,5,2) * pivot__3
|
||||
rhs_(1) = rhs_(1) * pivot__3
|
||||
coeff__2 = lhs_(2,1,2)
|
||||
lhs_(2,2,2) = lhs_(2,2,2) - coeff__2 * lhs_(1,2,2)
|
||||
lhs_(2,3,2) = lhs_(2,3,2) - coeff__2 * lhs_(1,3,2)
|
||||
lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(1,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(1,5,2)
|
||||
rhs_(2) = rhs_(2) - coeff__2 * rhs_(1)
|
||||
coeff__2 = lhs_(3,1,2)
|
||||
lhs_(3,2,2) = lhs_(3,2,2) - coeff__2 * lhs_(1,2,2)
|
||||
lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(1,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(1,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(1,5,2)
|
||||
rhs_(3) = rhs_(3) - coeff__2 * rhs_(1)
|
||||
coeff__2 = lhs_(4,1,2)
|
||||
lhs_(4,2,2) = lhs_(4,2,2) - coeff__2 * lhs_(1,2,2)
|
||||
lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(1,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(1,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(1,5,2)
|
||||
rhs_(4) = rhs_(4) - coeff__2 * rhs_(1)
|
||||
coeff__2 = lhs_(5,1,2)
|
||||
lhs_(5,2,2) = lhs_(5,2,2) - coeff__2 * lhs_(1,2,2)
|
||||
lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(1,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(1,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(1,5,2)
|
||||
rhs_(5) = rhs_(5) - coeff__2 * rhs_(1)
|
||||
pivot__3 = 1.00d0 / lhs_(2,2,2)
|
||||
lhs_(2,3,2) = lhs_(2,3,2) * pivot__3
|
||||
lhs_(2,4,2) = lhs_(2,4,2) * pivot__3
|
||||
lhs_(2,5,2) = lhs_(2,5,2) * pivot__3
|
||||
rhs_(2) = rhs_(2) * pivot__3
|
||||
coeff__2 = lhs_(1,2,2)
|
||||
lhs_(1,3,2) = lhs_(1,3,2) - coeff__2 * lhs_(2,3,2)
|
||||
lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(2,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(2,5,2)
|
||||
rhs_(1) = rhs_(1) - coeff__2 * rhs_(2)
|
||||
coeff__2 = lhs_(3,2,2)
|
||||
lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(2,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(2,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(2,5,2)
|
||||
rhs_(3) = rhs_(3) - coeff__2 * rhs_(2)
|
||||
coeff__2 = lhs_(4,2,2)
|
||||
lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(2,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(2,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(2,5,2)
|
||||
rhs_(4) = rhs_(4) - coeff__2 * rhs_(2)
|
||||
coeff__2 = lhs_(5,2,2)
|
||||
lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(2,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(2,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(2,5,2)
|
||||
rhs_(5) = rhs_(5) - coeff__2 * rhs_(2)
|
||||
pivot__3 = 1.00d0 / lhs_(3,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) * pivot__3
|
||||
lhs_(3,5,2) = lhs_(3,5,2) * pivot__3
|
||||
rhs_(3) = rhs_(3) * pivot__3
|
||||
coeff__2 = lhs_(1,3,2)
|
||||
lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(3,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(3,5,2)
|
||||
rhs_(1) = rhs_(1) - coeff__2 * rhs_(3)
|
||||
coeff__2 = lhs_(2,3,2)
|
||||
lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(3,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(3,5,2)
|
||||
rhs_(2) = rhs_(2) - coeff__2 * rhs_(3)
|
||||
coeff__2 = lhs_(4,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(3,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(3,5,2)
|
||||
rhs_(4) = rhs_(4) - coeff__2 * rhs_(3)
|
||||
coeff__2 = lhs_(5,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(3,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(3,5,2)
|
||||
rhs_(5) = rhs_(5) - coeff__2 * rhs_(3)
|
||||
pivot__3 = 1.00d0 / lhs_(4,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) * pivot__3
|
||||
rhs_(4) = rhs_(4) * pivot__3
|
||||
coeff__2 = lhs_(1,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(4,5,2)
|
||||
rhs_(1) = rhs_(1) - coeff__2 * rhs_(4)
|
||||
coeff__2 = lhs_(2,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(4,5,2)
|
||||
rhs_(2) = rhs_(2) - coeff__2 * rhs_(4)
|
||||
coeff__2 = lhs_(3,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(4,5,2)
|
||||
rhs_(3) = rhs_(3) - coeff__2 * rhs_(4)
|
||||
coeff__2 = lhs_(5,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(4,5,2)
|
||||
rhs_(5) = rhs_(5) - coeff__2 * rhs_(4)
|
||||
pivot__3 = 1.00d0 / lhs_(5,5,2)
|
||||
rhs_(5) = rhs_(5) * pivot__3
|
||||
coeff__2 = lhs_(1,5,2)
|
||||
rhs_(1) = rhs_(1) - coeff__2 * rhs_(5)
|
||||
coeff__2 = lhs_(2,5,2)
|
||||
rhs_(2) = rhs_(2) - coeff__2 * rhs_(5)
|
||||
coeff__2 = lhs_(3,5,2)
|
||||
rhs_(3) = rhs_(3) - coeff__2 * rhs_(5)
|
||||
coeff__2 = lhs_(4,5,2)
|
||||
rhs_(4) = rhs_(4) - coeff__2 * rhs_(5)
|
||||
|
||||
! endif
|
||||
do m = 1,5
|
||||
rhs(m,i,j,k) = rhs_(m)
|
||||
enddo
|
||||
|
||||
! enddo
|
||||
do i = problem_size - 2,0,(-(1))
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs(m,i,j,k)
|
||||
enddo
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - lhs__(m,1,i,j,k1) * rhs(1,i + 1,j,k)
|
||||
rhs_(m) = rhs_(m) - lhs__(m,2,i,j,k1) * rhs(2,i + 1,j,k)
|
||||
rhs_(m) = rhs_(m) - lhs__(m,3,i,j,k1) * rhs(3,i + 1,j,k)
|
||||
rhs_(m) = rhs_(m) - lhs__(m,4,i,j,k1) * rhs(4,i + 1,j,k)
|
||||
rhs_(m) = rhs_(m) - lhs__(m,5,i,j,k1) * rhs(5,i + 1,j,k)
|
||||
enddo
|
||||
do m = 1,5
|
||||
rhs(m,i,j,k) = rhs_(m)
|
||||
enddo
|
||||
enddo
|
||||
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!DVM$ end region
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,640 @@
|
||||
!---------------------------------------------------------------------
|
||||
! performs guaussian elimination on this cell.
|
||||
!
|
||||
! assumes that unpacking routines for non-first cells
|
||||
! preload C' and rhs' from previous cell.
|
||||
!
|
||||
! assumed send happens outside this routine, but that
|
||||
! c'(IMAX) and rhs'(IMAX) will be sent to next cell
|
||||
!---------------------------------------------------------------------
|
||||
subroutine x_solve ()
|
||||
|
||||
include 'header3d.h'
|
||||
double precision pivot,coeff
|
||||
integer i__0,j__1
|
||||
integer m,n
|
||||
double precision coeff__2
|
||||
double precision pivot__3
|
||||
double precision lhs_(5,5,3),u_(0:3,5)
|
||||
double precision rhs_(5),rhsp_(5)
|
||||
integer i,j,k,isize
|
||||
isize = problem_size - 1
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! outer most do loops - sweeping in i direction
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! begin inner most do loop
|
||||
! do all the elements of the cell unless last
|
||||
!---------------------------------------------------------------------
|
||||
|
||||
!DVM$ region local(lhs__)
|
||||
!DVM$ PARALLEL (k,j,i) ON rhs(*,i,j,k),private(u_,rhs_,tmp1,tmp2,
|
||||
!DVM$&tmp3,t1,t2,t3,tm1,tm2,pivot,coeff, tm3,i__0,j__1,tmp11,
|
||||
!DVM$&tmp22,lhs_,m,n,coeff__2, pivot__3),stage(stage_n),
|
||||
!DVM$& ACROSS(rhs(0:0,1:0,0:0,0:0),lhs__(0:0,0:0,1:0,0:0,0:0))
|
||||
do k = 1,problem_size - 2
|
||||
do j = 1,problem_size - 2
|
||||
do i = 1,isize - 1
|
||||
if(i .ne. isize) then
|
||||
do m = 1,5
|
||||
lhs_(m,1,3) = lhs__(m,1,i-1,j,k)
|
||||
lhs_(m,2,3) = lhs__(m,2,i-1,j,k)
|
||||
lhs_(m,3,3) = lhs__(m,3,i-1,j,k)
|
||||
lhs_(m,4,3) = lhs__(m,4,i-1,j,k)
|
||||
lhs_(m,5,3) = lhs__(m,5,i-1,j,k)
|
||||
|
||||
u_(0,m) = u(m,i-1,j,k)
|
||||
u_(1,m) = u(m,i,j,k)
|
||||
u_(2,m) = u(m,i+1,j,k)
|
||||
enddo
|
||||
|
||||
tmp1 = 1.0d+00 / u_(1,1)
|
||||
tmp2 = tmp1 * tmp1
|
||||
tmp3 = tmp1 * tmp2
|
||||
t1 = 1.0d+00 / u_(0,1)
|
||||
t2 = t1 * t1
|
||||
t3 = t1 * t2
|
||||
tm1 = 1.0d+00 / u_(2,1)
|
||||
tm2 = tm1 * tm1
|
||||
tm3 = tm1 * tm2
|
||||
tmp11 = dt * tx1
|
||||
tmp22 = dt * tx2
|
||||
lhs_(1,1,1) = (-(tmp11)) * dx1
|
||||
lhs_(1,2,1) = (-(tmp22))
|
||||
lhs_(1,3,1) = 0.
|
||||
lhs_(1,4,1) = 0.
|
||||
lhs_(1,5,1) = 0.
|
||||
lhs_(2,1,1) = (-(tmp22)) * ((-(u_(0,2) * t2 * u_(0,2))) +
|
||||
& c2 * 0.50d+00 * (u_(0,2) * u_(0,2) + u_(0,3) * u_(0,3) + u_(0,4)
|
||||
&* u_(0,4)) * t2) - tmp11 * ((-(con43)) * c3c4 * t2 * u_(0,2))
|
||||
lhs_(2,2,1) = (-(tmp22)) * ((2.0d+00 - c2) * (u_(0,2) / u
|
||||
&_(0,1))) - tmp11 * con43 * c3c4 * t1 - tmp11 * dx2
|
||||
lhs_(2,3,1) = (-(tmp22)) * ((-(c2)) * (u_(0,3) * t1))
|
||||
lhs_(2,4,1) = (-(tmp22)) * ((-(c2)) * (u_(0,4) * t1))
|
||||
lhs_(2,5,1) = (-(tmp22)) * c2
|
||||
lhs_(3,1,1) = (-(tmp22)) * ((-(u_(0,2) * u_(0,3))) * t2)
|
||||
&- tmp11 * ((-(c3c4)) * t2 * u_(0,3))
|
||||
lhs_(3,2,1) = (-(tmp22)) * u_(0,3) * t1
|
||||
lhs_(3,3,1) = (-(tmp22)) * u_(0,2) * t1 - tmp11 * c3c4 *
|
||||
&t1 - tmp11 * dx3
|
||||
lhs_(3,4,1) = 0.
|
||||
lhs_(3,5,1) = 0.
|
||||
lhs_(4,1,1) = (-(tmp22)) * ((-(u_(0,2) * u_(0,4))) * t2)
|
||||
&- tmp11 * ((-(c3c4)) * t2 * u_(0,4))
|
||||
lhs_(4,2,1) = (-(tmp22)) * u_(0,4) * t1
|
||||
lhs_(4,3,1) = 0.
|
||||
lhs_(4,4,1) = (-(tmp22)) * u_(0,2) * t1 - tmp11 * c3c4 *
|
||||
&t1 - tmp11 * dx4
|
||||
lhs_(4,5,1) = 0.
|
||||
lhs_(5,1,1) = (-(tmp22)) * ((c2 * (u_(0,2) * u_(0,2) + u_
|
||||
&(0,3) * u_(0,3) + u_(0,4) * u_(0,4)) * t2 - c1 * (u_(0,5) * t1)) *
|
||||
& (u_(0,2) * t1)) - tmp11 * ((-(con43 * c3c4 - c1345)) * t3 * u_(0,
|
||||
&2)** 2 - (c3c4 - c1345) * t3 * u_(0,3)** 2 - (c3c4 - c1345) * t3 *
|
||||
& u_(0,4)** 2 - c1345 * t2 * u_(0,5))
|
||||
lhs_(5,2,1) = (-(tmp22)) * (c1 * u_(0,5) * t1 - 0.50d+00
|
||||
&* c2 * (3.0d+00 * u_(0,2) * u_(0,2) + u_(0,3) * u_(0,3) + u_(0,4)
|
||||
&* u_(0,4)) * t2) - tmp11 * ((con43 * c3c4 - c1345) * t2 * u_(0,2))
|
||||
lhs_(5,3,1) = (-(tmp22)) * ((-(c2)) * (u_(0,3) * u_(0,2))
|
||||
& * t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,3)
|
||||
lhs_(5,4,1) = (-(tmp22)) * ((-(c2)) * (u_(0,4) * u_(0,2))
|
||||
& * t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,4)
|
||||
lhs_(5,5,1) = (-(tmp22)) * c1 * (u_(0,2) * t1) - tmp11 *
|
||||
&c1345 * t1 - tmp11 * dx5
|
||||
lhs_(1,1,2) = 1.0d+00 + tmp11 * 2.0d+00 * dx1
|
||||
lhs_(1,2,2) = 0.
|
||||
lhs_(1,3,2) = 0.
|
||||
lhs_(1,4,2) = 0.
|
||||
lhs_(1,5,2) = 0.
|
||||
lhs_(2,1,2) = tmp11 * 2.0d+00 * ((-(con43)) * c3c4 * tmp2
|
||||
& * u_(1,2))
|
||||
lhs_(2,2,2) = 1.0d+00 + tmp11 * 2.0d+00 * con43 * c3c4 *
|
||||
&tmp1 + tmp11 * 2.0d+00 * dx2
|
||||
lhs_(2,3,2) = 0.
|
||||
lhs_(2,4,2) = 0.
|
||||
lhs_(2,5,2) = 0.
|
||||
lhs_(3,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1,
|
||||
&3))
|
||||
lhs_(3,2,2) = 0.
|
||||
lhs_(3,3,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t
|
||||
&mp11 * 2.0d+00 * dx3
|
||||
lhs_(3,4,2) = 0.
|
||||
lhs_(3,5,2) = 0.
|
||||
lhs_(4,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1,
|
||||
&4))
|
||||
lhs_(4,2,2) = 0.
|
||||
lhs_(4,3,2) = 0.
|
||||
lhs_(4,4,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t
|
||||
&mp11 * 2.0d+00 * dx4
|
||||
lhs_(4,5,2) = tmp11 * 2.0d+00 * 0
|
||||
lhs_(5,1,2) = tmp11 * 2.0d+00 * ((-(con43 * c3c4 - c1345)
|
||||
&) * tmp3 * u_(1,2)** 2 - (c3c4 - c1345) * tmp3 * u_(1,3)** 2 - (c3
|
||||
&c4 - c1345) * tmp3 * u_(1,4)** 2 - c1345 * tmp2 * u_(1,5))
|
||||
lhs_(5,2,2) = tmp11 * 2.0d+00 * ((con43 * c3c4 - c1345) *
|
||||
& tmp2 * u_(1,2))
|
||||
lhs_(5,3,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u
|
||||
&_(1,3)
|
||||
lhs_(5,4,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u
|
||||
&_(1,4)
|
||||
lhs_(5,5,2) = 1.0d+00 + tmp11 * 2.0d+00 * c1345 * tmp1 +
|
||||
&tmp11 * 2.0d+00 * dx5
|
||||
if (i .ne. 1) then
|
||||
do j__1 = 1,5
|
||||
lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
enddo
|
||||
endif
|
||||
lhs_(1,1,3) = (-(tmp11)) * dx1
|
||||
lhs_(1,2,3) = tmp22
|
||||
lhs_(1,3,3) = 0.
|
||||
lhs_(1,4,3) = 0.
|
||||
lhs_(1,5,3) = 0.
|
||||
lhs_(2,1,3) = tmp22 * ((-(u_(2,2) * tm2 * u_(2,2))) + c2
|
||||
&* 0.50d+00 * (u_(2,2) * u_(2,2) + u_(2,3) * u_(2,3) + u_(2,4) * u_
|
||||
&(2,4)) * tm2) - tmp11 * ((-(con43)) * c3c4 * tm2 * u_(2,2))
|
||||
lhs_(2,2,3) = tmp22 * ((2.0d+00 - c2) * (u_(2,2) / u_(2,1
|
||||
&))) - tmp11 * con43 * c3c4 * tm1 - tmp11 * dx2
|
||||
lhs_(2,3,3) = tmp22 * ((-(c2)) * (u_(2,3) * tm1))
|
||||
lhs_(2,4,3) = tmp22 * ((-(c2)) * (u_(2,4) * tm1))
|
||||
lhs_(2,5,3) = tmp22 * c2
|
||||
lhs_(3,1,3) = tmp22 * ((-(u_(2,2) * u_(2,3))) * tm2) - tm
|
||||
&p11 * ((-(c3c4)) * tm2 * u_(2,3))
|
||||
lhs_(3,2,3) = tmp22 * u_(2,3) * tm1
|
||||
lhs_(3,3,3) = tmp22 * u_(2,2) * tm1 - tmp11 * c3c4 * tm1
|
||||
&- tmp11 * dx3
|
||||
lhs_(3,4,3) = 0.
|
||||
lhs_(3,5,3) = 0.
|
||||
lhs_(4,1,3) = tmp22 * ((-(u_(2,2) * u_(2,4))) * tm2) - tm
|
||||
&p11 * ((-(c3c4)) * tm2 * u_(2,4))
|
||||
lhs_(4,2,3) = tmp22 * u_(2,4) * tm1
|
||||
lhs_(4,3,3) = 0.
|
||||
lhs_(4,4,3) = tmp22 * u_(2,2) * tm1 - tmp11 * c3c4 * tm1
|
||||
&- tmp11 * dx4
|
||||
lhs_(4,5,3) = 0.
|
||||
lhs_(5,1,3) = tmp22 * ((c2 * (u_(2,2) * u_(2,2) + u_(2,3)
|
||||
& * u_(2,3) + u_(2,4) * u_(2,4)) * tm2 - c1 * (u_(2,5) * tm1)) * (u
|
||||
&_(2,2) * tm1)) - tmp11 * ((-(con43 * c3c4 - c1345)) * tm3 * u_(2,2
|
||||
&)** 2 - (c3c4 - c1345) * tm3 * u_(2,3)** 2 - (c3c4 - c1345) * tm3
|
||||
&* u_(2,4)** 2 - c1345 * tm2 * u_(2,5))
|
||||
lhs_(5,2,3) = tmp22 * (c1 * u_(2,5) * tm1 - 0.50d+00 * c2
|
||||
& * (3.0d+00 * u_(2,2) * u_(2,2) + u_(2,3) * u_(2,3) + u_(2,4) * u_
|
||||
&(2,4)) * tm2) - tmp11 * ((con43 * c3c4 - c1345) * tm2 * u_(2,2))
|
||||
lhs_(5,3,3) = tmp22 * ((-(c2)) * (u_(2,3) * u_(2,2)) * tm
|
||||
&2) - tmp11 * (c3c4 - c1345) * tm2 * u_(2,3)
|
||||
lhs_(5,4,3) = tmp22 * ((-(c2)) * (u_(2,4) * u_(2,2)) * tm
|
||||
&2) - tmp11 * (c3c4 - c1345) * tm2 * u_(2,4)
|
||||
lhs_(5,5,3) = tmp22 * c1 * (u_(2,2) * tm1) - tmp11 * c134
|
||||
&5 * tm1 - tmp11 * dx5
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs(m,i,j,k)
|
||||
enddo
|
||||
do i__0 = 1,5
|
||||
rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i - 1
|
||||
&,j,k) - lhs_(i__0,2,1) * rhs(2,i - 1,j,k) - lhs_(i__0,3,1) * rhs(3
|
||||
&,i - 1,j,k) - lhs_(i__0,4,1) * rhs(4,i - 1,j,k) - lhs_(i__0,5,1) *
|
||||
& rhs(5,i - 1,j,k)
|
||||
enddo
|
||||
pivot = 1.00d0 / lhs_(1,1,2)
|
||||
lhs_(1,2,2) = lhs_(1,2,2) * pivot
|
||||
lhs_(1,3,2) = lhs_(1,3,2) * pivot
|
||||
lhs_(1,4,2) = lhs_(1,4,2) * pivot
|
||||
lhs_(1,5,2) = lhs_(1,5,2) * pivot
|
||||
lhs_(1,1,3) = lhs_(1,1,3) * pivot
|
||||
lhs_(1,2,3) = lhs_(1,2,3) * pivot
|
||||
lhs_(1,3,3) = lhs_(1,3,3) * pivot
|
||||
lhs_(1,4,3) = lhs_(1,4,3) * pivot
|
||||
lhs_(1,5,3) = lhs_(1,5,3) * pivot
|
||||
rhs_(1) = rhs_(1) * pivot
|
||||
coeff = lhs_(2,1,2)
|
||||
lhs_(2,2,2) = lhs_(2,2,2) - coeff * lhs_(1,2,2)
|
||||
lhs_(2,3,2) = lhs_(2,3,2) - coeff * lhs_(1,3,2)
|
||||
lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(1,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(1,5,2)
|
||||
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(1,1,3)
|
||||
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(1,2,3)
|
||||
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(1,3,3)
|
||||
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(1,4,3)
|
||||
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(1,5,3)
|
||||
rhs_(2) = rhs_(2) - coeff * rhs_(1)
|
||||
coeff = lhs_(3,1,2)
|
||||
lhs_(3,2,2) = lhs_(3,2,2) - coeff * lhs_(1,2,2)
|
||||
lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(1,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(1,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(1,5,2)
|
||||
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(1,1,3)
|
||||
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(1,2,3)
|
||||
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(1,3,3)
|
||||
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(1,4,3)
|
||||
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(1,5,3)
|
||||
rhs_(3) = rhs_(3) - coeff * rhs_(1)
|
||||
coeff = lhs_(4,1,2)
|
||||
lhs_(4,2,2) = lhs_(4,2,2) - coeff * lhs_(1,2,2)
|
||||
lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(1,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(1,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(1,5,2)
|
||||
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(1,1,3)
|
||||
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(1,2,3)
|
||||
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(1,3,3)
|
||||
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(1,4,3)
|
||||
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(1,5,3)
|
||||
rhs_(4) = rhs_(4) - coeff * rhs_(1)
|
||||
coeff = lhs_(5,1,2)
|
||||
lhs_(5,2,2) = lhs_(5,2,2) - coeff * lhs_(1,2,2)
|
||||
lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(1,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(1,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(1,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(1,1,3)
|
||||
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(1,2,3)
|
||||
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(1,3,3)
|
||||
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(1,4,3)
|
||||
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(1,5,3)
|
||||
rhs_(5) = rhs_(5) - coeff * rhs_(1)
|
||||
pivot = 1.00d0 / lhs_(2,2,2)
|
||||
lhs_(2,3,2) = lhs_(2,3,2) * pivot
|
||||
lhs_(2,4,2) = lhs_(2,4,2) * pivot
|
||||
lhs_(2,5,2) = lhs_(2,5,2) * pivot
|
||||
lhs_(2,1,3) = lhs_(2,1,3) * pivot
|
||||
lhs_(2,2,3) = lhs_(2,2,3) * pivot
|
||||
lhs_(2,3,3) = lhs_(2,3,3) * pivot
|
||||
lhs_(2,4,3) = lhs_(2,4,3) * pivot
|
||||
lhs_(2,5,3) = lhs_(2,5,3) * pivot
|
||||
rhs_(2) = rhs_(2) * pivot
|
||||
coeff = lhs_(1,2,2)
|
||||
lhs_(1,3,2) = lhs_(1,3,2) - coeff * lhs_(2,3,2)
|
||||
lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(2,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(2,5,2)
|
||||
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(2,1,3)
|
||||
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(2,2,3)
|
||||
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(2,3,3)
|
||||
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(2,4,3)
|
||||
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(2,5,3)
|
||||
rhs_(1) = rhs_(1) - coeff * rhs_(2)
|
||||
coeff = lhs_(3,2,2)
|
||||
lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(2,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(2,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(2,5,2)
|
||||
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(2,1,3)
|
||||
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(2,2,3)
|
||||
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(2,3,3)
|
||||
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(2,4,3)
|
||||
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(2,5,3)
|
||||
rhs_(3) = rhs_(3) - coeff * rhs_(2)
|
||||
coeff = lhs_(4,2,2)
|
||||
lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(2,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(2,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(2,5,2)
|
||||
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(2,1,3)
|
||||
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(2,2,3)
|
||||
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(2,3,3)
|
||||
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(2,4,3)
|
||||
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(2,5,3)
|
||||
rhs_(4) = rhs_(4) - coeff * rhs_(2)
|
||||
coeff = lhs_(5,2,2)
|
||||
lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(2,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(2,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(2,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(2,1,3)
|
||||
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(2,2,3)
|
||||
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(2,3,3)
|
||||
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(2,4,3)
|
||||
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(2,5,3)
|
||||
rhs_(5) = rhs_(5) - coeff * rhs_(2)
|
||||
pivot = 1.00d0 / lhs_(3,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) * pivot
|
||||
lhs_(3,5,2) = lhs_(3,5,2) * pivot
|
||||
lhs_(3,1,3) = lhs_(3,1,3) * pivot
|
||||
lhs_(3,2,3) = lhs_(3,2,3) * pivot
|
||||
lhs_(3,3,3) = lhs_(3,3,3) * pivot
|
||||
lhs_(3,4,3) = lhs_(3,4,3) * pivot
|
||||
lhs_(3,5,3) = lhs_(3,5,3) * pivot
|
||||
rhs_(3) = rhs_(3) * pivot
|
||||
coeff = lhs_(1,3,2)
|
||||
lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(3,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(3,5,2)
|
||||
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(3,1,3)
|
||||
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(3,2,3)
|
||||
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(3,3,3)
|
||||
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(3,4,3)
|
||||
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(3,5,3)
|
||||
rhs_(1) = rhs_(1) - coeff * rhs_(3)
|
||||
coeff = lhs_(2,3,2)
|
||||
lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(3,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(3,5,2)
|
||||
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(3,1,3)
|
||||
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(3,2,3)
|
||||
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(3,3,3)
|
||||
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(3,4,3)
|
||||
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(3,5,3)
|
||||
rhs_(2) = rhs_(2) - coeff * rhs_(3)
|
||||
coeff = lhs_(4,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(3,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(3,5,2)
|
||||
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(3,1,3)
|
||||
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(3,2,3)
|
||||
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(3,3,3)
|
||||
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(3,4,3)
|
||||
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(3,5,3)
|
||||
rhs_(4) = rhs_(4) - coeff * rhs_(3)
|
||||
coeff = lhs_(5,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(3,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(3,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(3,1,3)
|
||||
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(3,2,3)
|
||||
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(3,3,3)
|
||||
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(3,4,3)
|
||||
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(3,5,3)
|
||||
rhs_(5) = rhs_(5) - coeff * rhs_(3)
|
||||
pivot = 1.00d0 / lhs_(4,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) * pivot
|
||||
lhs_(4,1,3) = lhs_(4,1,3) * pivot
|
||||
lhs_(4,2,3) = lhs_(4,2,3) * pivot
|
||||
lhs_(4,3,3) = lhs_(4,3,3) * pivot
|
||||
lhs_(4,4,3) = lhs_(4,4,3) * pivot
|
||||
lhs_(4,5,3) = lhs_(4,5,3) * pivot
|
||||
rhs_(4) = rhs_(4) * pivot
|
||||
coeff = lhs_(1,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(4,5,2)
|
||||
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(4,1,3)
|
||||
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(4,2,3)
|
||||
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(4,3,3)
|
||||
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(4,4,3)
|
||||
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(4,5,3)
|
||||
rhs_(1) = rhs_(1) - coeff * rhs_(4)
|
||||
coeff = lhs_(2,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(4,5,2)
|
||||
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(4,1,3)
|
||||
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(4,2,3)
|
||||
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(4,3,3)
|
||||
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(4,4,3)
|
||||
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(4,5,3)
|
||||
rhs_(2) = rhs_(2) - coeff * rhs_(4)
|
||||
coeff = lhs_(3,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(4,5,2)
|
||||
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(4,1,3)
|
||||
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(4,2,3)
|
||||
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(4,3,3)
|
||||
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(4,4,3)
|
||||
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(4,5,3)
|
||||
rhs_(3) = rhs_(3) - coeff * rhs_(4)
|
||||
coeff = lhs_(5,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(4,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(4,1,3)
|
||||
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(4,2,3)
|
||||
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(4,3,3)
|
||||
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(4,4,3)
|
||||
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(4,5,3)
|
||||
rhs_(5) = rhs_(5) - coeff * rhs_(4)
|
||||
pivot = 1.00d0 / lhs_(5,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) * pivot
|
||||
lhs_(5,2,3) = lhs_(5,2,3) * pivot
|
||||
lhs_(5,3,3) = lhs_(5,3,3) * pivot
|
||||
lhs_(5,4,3) = lhs_(5,4,3) * pivot
|
||||
lhs_(5,5,3) = lhs_(5,5,3) * pivot
|
||||
rhs_(5) = rhs_(5) * pivot
|
||||
coeff = lhs_(1,5,2)
|
||||
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(5,1,3)
|
||||
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(5,2,3)
|
||||
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(5,3,3)
|
||||
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(5,4,3)
|
||||
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(5,5,3)
|
||||
rhs_(1) = rhs_(1) - coeff * rhs_(5)
|
||||
coeff = lhs_(2,5,2)
|
||||
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(5,1,3)
|
||||
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(5,2,3)
|
||||
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(5,3,3)
|
||||
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(5,4,3)
|
||||
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(5,5,3)
|
||||
rhs_(2) = rhs_(2) - coeff * rhs_(5)
|
||||
coeff = lhs_(3,5,2)
|
||||
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(5,1,3)
|
||||
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(5,2,3)
|
||||
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(5,3,3)
|
||||
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(5,4,3)
|
||||
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(5,5,3)
|
||||
rhs_(3) = rhs_(3) - coeff * rhs_(5)
|
||||
coeff = lhs_(4,5,2)
|
||||
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(5,1,3)
|
||||
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(5,2,3)
|
||||
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(5,3,3)
|
||||
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(5,4,3)
|
||||
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(5,5,3)
|
||||
rhs_(4) = rhs_(4) - coeff * rhs_(5)
|
||||
do i__0 = 1,5
|
||||
lhs__(i__0,1,i,j,k) = lhs_(i__0,1,3)
|
||||
lhs__(i__0,2,i,j,k) = lhs_(i__0,2,3)
|
||||
lhs__(i__0,3,i,j,k) = lhs_(i__0,3,3)
|
||||
lhs__(i__0,4,i,j,k) = lhs_(i__0,4,3)
|
||||
lhs__(i__0,5,i,j,k) = lhs_(i__0,5,3)
|
||||
enddo
|
||||
do m = 1,5
|
||||
rhs(m,i,j,k) = rhs_(m)
|
||||
enddo
|
||||
! enddo
|
||||
|
||||
else ! ******************* else case *************************
|
||||
do n = 1,5
|
||||
lhs_(1,n,1) = 0.0d0
|
||||
lhs_(1,n,2) = 0.0d0
|
||||
lhs_(1,n,3) = 0.0d0
|
||||
lhs_(2,n,1) = 0.0d0
|
||||
lhs_(2,n,2) = 0.0d0
|
||||
lhs_(2,n,3) = 0.0d0
|
||||
lhs_(3,n,1) = 0.0d0
|
||||
lhs_(3,n,2) = 0.0d0
|
||||
lhs_(3,n,3) = 0.0d0
|
||||
lhs_(4,n,1) = 0.0d0
|
||||
lhs_(4,n,2) = 0.0d0
|
||||
lhs_(4,n,3) = 0.0d0
|
||||
lhs_(5,n,1) = 0.0d0
|
||||
lhs_(5,n,2) = 0.0d0
|
||||
lhs_(5,n,3) = 0.0d0
|
||||
enddo
|
||||
do m = 1,5
|
||||
lhs_(m,m,2) = 1.0d0
|
||||
enddo
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs(m,i,j,k)
|
||||
enddo
|
||||
do i__0 = 1,5
|
||||
rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i - 1,j,
|
||||
&k) - lhs_(i__0,2,1) * rhs(2,i - 1,j,k) - lhs_(i__0,3,1) * rhs(3,i
|
||||
&- 1,j,k) - lhs_(i__0,4,1) * rhs(4,i - 1,j,k) - lhs_(i__0,5,1) * rh
|
||||
&s(5,i - 1,j,k)
|
||||
enddo
|
||||
do j__1 = 1,5
|
||||
lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1,3)
|
||||
lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1,3)
|
||||
lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1,3)
|
||||
lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1,3)
|
||||
lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1,3)
|
||||
enddo
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!
|
||||
!---------------------------------------------------------------------
|
||||
pivot__3 = 1.00d0 / lhs_(1,1,2)
|
||||
lhs_(1,2,2) = lhs_(1,2,2) * pivot__3
|
||||
lhs_(1,3,2) = lhs_(1,3,2) * pivot__3
|
||||
lhs_(1,4,2) = lhs_(1,4,2) * pivot__3
|
||||
lhs_(1,5,2) = lhs_(1,5,2) * pivot__3
|
||||
rhs_(1) = rhs_(1) * pivot__3
|
||||
coeff__2 = lhs_(2,1,2)
|
||||
lhs_(2,2,2) = lhs_(2,2,2) - coeff__2 * lhs_(1,2,2)
|
||||
lhs_(2,3,2) = lhs_(2,3,2) - coeff__2 * lhs_(1,3,2)
|
||||
lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(1,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(1,5,2)
|
||||
rhs_(2) = rhs_(2) - coeff__2 * rhs_(1)
|
||||
coeff__2 = lhs_(3,1,2)
|
||||
lhs_(3,2,2) = lhs_(3,2,2) - coeff__2 * lhs_(1,2,2)
|
||||
lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(1,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(1,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(1,5,2)
|
||||
rhs_(3) = rhs_(3) - coeff__2 * rhs_(1)
|
||||
coeff__2 = lhs_(4,1,2)
|
||||
lhs_(4,2,2) = lhs_(4,2,2) - coeff__2 * lhs_(1,2,2)
|
||||
lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(1,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(1,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(1,5,2)
|
||||
rhs_(4) = rhs_(4) - coeff__2 * rhs_(1)
|
||||
coeff__2 = lhs_(5,1,2)
|
||||
lhs_(5,2,2) = lhs_(5,2,2) - coeff__2 * lhs_(1,2,2)
|
||||
lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(1,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(1,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(1,5,2)
|
||||
rhs_(5) = rhs_(5) - coeff__2 * rhs_(1)
|
||||
pivot__3 = 1.00d0 / lhs_(2,2,2)
|
||||
lhs_(2,3,2) = lhs_(2,3,2) * pivot__3
|
||||
lhs_(2,4,2) = lhs_(2,4,2) * pivot__3
|
||||
lhs_(2,5,2) = lhs_(2,5,2) * pivot__3
|
||||
rhs_(2) = rhs_(2) * pivot__3
|
||||
coeff__2 = lhs_(1,2,2)
|
||||
lhs_(1,3,2) = lhs_(1,3,2) - coeff__2 * lhs_(2,3,2)
|
||||
lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(2,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(2,5,2)
|
||||
rhs_(1) = rhs_(1) - coeff__2 * rhs_(2)
|
||||
coeff__2 = lhs_(3,2,2)
|
||||
lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(2,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(2,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(2,5,2)
|
||||
rhs_(3) = rhs_(3) - coeff__2 * rhs_(2)
|
||||
coeff__2 = lhs_(4,2,2)
|
||||
lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(2,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(2,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(2,5,2)
|
||||
rhs_(4) = rhs_(4) - coeff__2 * rhs_(2)
|
||||
coeff__2 = lhs_(5,2,2)
|
||||
lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(2,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(2,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(2,5,2)
|
||||
rhs_(5) = rhs_(5) - coeff__2 * rhs_(2)
|
||||
pivot__3 = 1.00d0 / lhs_(3,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) * pivot__3
|
||||
lhs_(3,5,2) = lhs_(3,5,2) * pivot__3
|
||||
rhs_(3) = rhs_(3) * pivot__3
|
||||
coeff__2 = lhs_(1,3,2)
|
||||
lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(3,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(3,5,2)
|
||||
rhs_(1) = rhs_(1) - coeff__2 * rhs_(3)
|
||||
coeff__2 = lhs_(2,3,2)
|
||||
lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(3,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(3,5,2)
|
||||
rhs_(2) = rhs_(2) - coeff__2 * rhs_(3)
|
||||
coeff__2 = lhs_(4,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(3,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(3,5,2)
|
||||
rhs_(4) = rhs_(4) - coeff__2 * rhs_(3)
|
||||
coeff__2 = lhs_(5,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(3,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(3,5,2)
|
||||
rhs_(5) = rhs_(5) - coeff__2 * rhs_(3)
|
||||
pivot__3 = 1.00d0 / lhs_(4,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) * pivot__3
|
||||
rhs_(4) = rhs_(4) * pivot__3
|
||||
coeff__2 = lhs_(1,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(4,5,2)
|
||||
rhs_(1) = rhs_(1) - coeff__2 * rhs_(4)
|
||||
coeff__2 = lhs_(2,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(4,5,2)
|
||||
rhs_(2) = rhs_(2) - coeff__2 * rhs_(4)
|
||||
coeff__2 = lhs_(3,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(4,5,2)
|
||||
rhs_(3) = rhs_(3) - coeff__2 * rhs_(4)
|
||||
coeff__2 = lhs_(5,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(4,5,2)
|
||||
rhs_(5) = rhs_(5) - coeff__2 * rhs_(4)
|
||||
pivot__3 = 1.00d0 / lhs_(5,5,2)
|
||||
rhs_(5) = rhs_(5) * pivot__3
|
||||
coeff__2 = lhs_(1,5,2)
|
||||
rhs_(1) = rhs_(1) - coeff__2 * rhs_(5)
|
||||
coeff__2 = lhs_(2,5,2)
|
||||
rhs_(2) = rhs_(2) - coeff__2 * rhs_(5)
|
||||
coeff__2 = lhs_(3,5,2)
|
||||
rhs_(3) = rhs_(3) - coeff__2 * rhs_(5)
|
||||
coeff__2 = lhs_(4,5,2)
|
||||
rhs_(4) = rhs_(4) - coeff__2 * rhs_(5)
|
||||
|
||||
endif
|
||||
do m = 1,5
|
||||
rhs(m,i,j,k) = rhs_(m)
|
||||
enddo
|
||||
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
! enddo
|
||||
|
||||
!DVM$ PARALLEL (k,j,i) ON rhs(*,i,j,k),private(m,rhsp_,rhs_),
|
||||
!DVM$& ACROSS(rhs(0:0,0:1,0:0,0:0)),stage(stage_n)
|
||||
do k = 1,problem_size - 2
|
||||
do j = 1,problem_size - 2
|
||||
do i = problem_size - 2,0,(-(1))
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs(m,i,j,k)
|
||||
rhsp_(m) = rhs(m,i+1,j,k)
|
||||
enddo
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - lhs__(m,1,i,j,k) * rhsp_(1)
|
||||
rhs_(m) = rhs_(m) - lhs__(m,2,i,j,k) * rhsp_(2)
|
||||
rhs_(m) = rhs_(m) - lhs__(m,3,i,j,k) * rhsp_(3)
|
||||
rhs_(m) = rhs_(m) - lhs__(m,4,i,j,k) * rhsp_(4)
|
||||
rhs_(m) = rhs_(m) - lhs__(m,5,i,j,k) * rhsp_(5)
|
||||
enddo
|
||||
do m = 1,5
|
||||
rhs(m,i,j,k) = rhs_(m)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!DVM$ end region
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,622 @@
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! performs guaussian elimination on this cell.
|
||||
!
|
||||
! assumes that unpacking routines for non-first cells
|
||||
! preload C' and rhs' from previous cell.
|
||||
!
|
||||
! assumed send happens outside this routine, but that
|
||||
! c'(JMAX) and rhs'(JMAX) will be sent to next cell
|
||||
!---------------------------------------------------------------------
|
||||
subroutine y_solve ()
|
||||
|
||||
include 'header3d.h'
|
||||
double precision coeff
|
||||
double precision pivot
|
||||
integer i__0
|
||||
integer j__1,m,n
|
||||
double precision coeff__2
|
||||
double precision pivot__3
|
||||
double precision lhs_(5,5,3),rhs_(5),u_(0:3,5)
|
||||
integer i,j,k,jsize,jstart
|
||||
jstart = 0
|
||||
jsize = problem_size - 1
|
||||
|
||||
!ACROSS (rhs(0:0,1:0,0:0,0:0),lhs__(0:0,1:0,0:0,0:0,0:0))
|
||||
!DVM$ region local(lhs__)
|
||||
!DVM$ PARALLEL (k,i) ON rhs(*,i,*,k), private(u_,j,rhs_,pivot,
|
||||
!DVM$&coeff,tmp1, tmp2, tmp3, t1, t2, t3, tm1, tm2, tm3, tmp11,
|
||||
!DVM$&tmp22, i__0, j__1,lhs_,coeff__2,pivot__3,m,n)
|
||||
do k = 1,problem_size - 2
|
||||
do i = 1,problem_size - 2
|
||||
do m = 1,5
|
||||
u_(0,m) = u(m,i,0,k)
|
||||
u_(1,m) = u(m,i,1,k)
|
||||
enddo
|
||||
do j = 1,jsize - 1
|
||||
do m = 1,5
|
||||
u_(2,m) = u(m,i,j + 1,k)
|
||||
enddo
|
||||
|
||||
! if(j .ne. jsize) then
|
||||
tmp1 = 1.0d+00 / u_(1,1)
|
||||
tmp2 = tmp1 * tmp1
|
||||
tmp3 = tmp1 * tmp2
|
||||
t1 = 1.0d+00 / u_(0,1)
|
||||
t2 = t1 * t1
|
||||
t3 = t1 * t2
|
||||
tm1 = 1.0d+00 / u_(2,1)
|
||||
tm2 = tm1 * tm1
|
||||
tm3 = tm1 * tm2
|
||||
tmp11 = dt * ty1
|
||||
tmp22 = dt * ty2
|
||||
lhs_(1,1,1) = (-(tmp11)) * dy1
|
||||
lhs_(1,2,1) = 0.
|
||||
lhs_(1,3,1) = (-(tmp22))
|
||||
lhs_(1,4,1) = 0.
|
||||
lhs_(1,5,1) = 0.
|
||||
lhs_(2,1,1) = (-(tmp22)) * ((-(u_(0,2) * u_(0,3))) * t2)
|
||||
&- tmp11 * ((-(c3c4)) * t2 * u_(0,2))
|
||||
lhs_(2,2,1) = (-(tmp22)) * u_(0,3) * t1 - tmp11 * c3c4 *
|
||||
&t1 - tmp11 * dy2
|
||||
lhs_(2,3,1) = (-(tmp22)) * u_(0,2) * t1
|
||||
lhs_(2,4,1) = 0.
|
||||
lhs_(2,5,1) = 0.
|
||||
lhs_(3,1,1) = (-(tmp22)) * ((-(u_(0,3) * u_(0,3) * t2)) +
|
||||
& 0.50d+00 * c2 * ((u_(0,2) * u_(0,2) + u_(0,3) * u_(0,3) + u_(0,4)
|
||||
& * u_(0,4)) * t2)) - tmp11 * ((-(con43)) * c3c4 * t2 * u_(0,3))
|
||||
lhs_(3,2,1) = (-(tmp22)) * ((-(c2)) * u_(0,2) * t1)
|
||||
lhs_(3,3,1) = (-(tmp22)) * ((2.0d+00 - c2) * u_(0,3) * t1
|
||||
&) - tmp11 * con43 * c3c4 * t1 - tmp11 * dy3
|
||||
lhs_(3,4,1) = (-(tmp22)) * ((-(c2)) * u_(0,4) * t1)
|
||||
lhs_(3,5,1) = (-(tmp22)) * c2
|
||||
lhs_(4,1,1) = (-(tmp22)) * ((-(u_(0,3) * u_(0,4))) * t2)
|
||||
&- tmp11 * ((-(c3c4)) * t2 * u_(0,4))
|
||||
lhs_(4,2,1) = 0.
|
||||
lhs_(4,3,1) = (-(tmp22)) * u_(0,4) * t1
|
||||
lhs_(4,4,1) = (-(tmp22)) * u_(0,3) * t1 - tmp11 * c3c4 *
|
||||
&t1 - tmp11 * dy4
|
||||
lhs_(4,5,1) = 0.
|
||||
lhs_(5,1,1) = (-(tmp22)) * ((c2 * (u_(0,2) * u_(0,2) + u_
|
||||
&(0,3) * u_(0,3) + u_(0,4) * u_(0,4)) * t2 - c1 * u_(0,5) * t1) * u
|
||||
&_(0,3) * t1) - tmp11 * ((-(c3c4 - c1345)) * t3 * u_(0,2)** 2 - (co
|
||||
&n43 * c3c4 - c1345) * t3 * u_(0,3)** 2 - (c3c4 - c1345) * t3 * u_(
|
||||
&0,4)** 2 - c1345 * t2 * u_(0,5))
|
||||
lhs_(5,2,1) = (-(tmp22)) * ((-(c2)) * u_(0,2) * u_(0,3) *
|
||||
& t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,2)
|
||||
lhs_(5,3,1) = (-(tmp22)) * (c1 * u_(0,5) * t1 - 0.50d+00
|
||||
&* c2 * ((u_(0,2) * u_(0,2) + 3.0d+00 * u_(0,3) * u_(0,3) + u_(0,4)
|
||||
& * u_(0,4)) * t2)) - tmp11 * (con43 * c3c4 - c1345) * t2 * u_(0,3)
|
||||
lhs_(5,4,1) = (-(tmp22)) * ((-(c2)) * (u_(0,3) * u_(0,4))
|
||||
& * t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,4)
|
||||
lhs_(5,5,1) = (-(tmp22)) * c1 * u_(0,3) * t1 - tmp11 * c1
|
||||
&345 * t1 - tmp11 * dy5
|
||||
lhs_(1,1,2) = 1.0d+00 + tmp11 * 2.0d+00 * dy1
|
||||
lhs_(1,2,2) = 0.
|
||||
lhs_(1,3,2) = 0.
|
||||
lhs_(1,4,2) = 0.
|
||||
lhs_(1,5,2) = 0.
|
||||
lhs_(2,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1,
|
||||
&2))
|
||||
lhs_(2,2,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t
|
||||
&mp11 * 2.0d+00 * dy2
|
||||
lhs_(2,3,2) = 0.
|
||||
lhs_(2,4,2) = 0.
|
||||
lhs_(2,5,2) = 0.
|
||||
lhs_(3,1,2) = tmp11 * 2.0d+00 * ((-(con43)) * c3c4 * tmp2
|
||||
& * u_(1,3))
|
||||
lhs_(3,2,2) = 0.
|
||||
lhs_(3,3,2) = 1.0d+00 + tmp11 * 2.0d+00 * con43 * c3c4 *
|
||||
&tmp1 + tmp11 * 2.0d+00 * dy3
|
||||
lhs_(3,4,2) = 0.
|
||||
lhs_(3,5,2) = 0.
|
||||
lhs_(4,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1,
|
||||
&4))
|
||||
lhs_(4,2,2) = 0.
|
||||
lhs_(4,3,2) = 0.
|
||||
lhs_(4,4,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t
|
||||
&mp11 * 2.0d+00 * dy4
|
||||
lhs_(4,5,2) = 0.
|
||||
lhs_(5,1,2) = tmp11 * 2.0d+00 * ((-(c3c4 - c1345)) * tmp3
|
||||
& * u_(1,2)** 2 - (con43 * c3c4 - c1345) * tmp3 * u_(1,3)** 2 - (c3
|
||||
&c4 - c1345) * tmp3 * u_(1,4)** 2 - c1345 * tmp2 * u_(1,5))
|
||||
lhs_(5,2,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u
|
||||
&_(1,2)
|
||||
lhs_(5,3,2) = tmp11 * 2.0d+00 * (con43 * c3c4 - c1345) *
|
||||
&tmp2 * u_(1,3)
|
||||
lhs_(5,4,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u
|
||||
&_(1,4)
|
||||
lhs_(5,5,2) = 1.0d+00 + tmp11 * 2.0d+00 * c1345 * tmp1 +
|
||||
&tmp11 * 2.0d+00 * dy5
|
||||
if (j .ne. 1) then
|
||||
do j__1 = 1,5
|
||||
lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
enddo
|
||||
endif
|
||||
lhs_(1,1,3) = (-(tmp11)) * dy1
|
||||
lhs_(1,2,3) = 0.
|
||||
lhs_(1,3,3) = tmp22
|
||||
lhs_(1,4,3) = 0.
|
||||
lhs_(1,5,3) = 0.
|
||||
lhs_(2,1,3) = tmp22 * ((-(u_(2,2) * u_(2,3))) * tm2) - tm
|
||||
&p11 * ((-(c3c4)) * tm2 * u_(2,2))
|
||||
lhs_(2,2,3) = tmp22 * u_(2,3) * tm1 - tmp11 * c3c4 * tm1
|
||||
&- tmp11 * dy2
|
||||
lhs_(2,3,3) = tmp22 * u_(2,2) * tm1
|
||||
lhs_(2,4,3) = 0.
|
||||
lhs_(2,5,3) = 0.
|
||||
lhs_(3,1,3) = tmp22 * ((-(u_(2,3) * u_(2,3) * tm2)) + 0.5
|
||||
&0d+00 * c2 * ((u_(2,2) * u_(2,2) + u_(2,3) * u_(2,3) + u_(2,4) * u
|
||||
&_(2,4)) * tm2)) - tmp11 * ((-(con43)) * c3c4 * tm2 * u_(2,3))
|
||||
lhs_(3,2,3) = tmp22 * ((-(c2)) * u_(2,2) * tm1)
|
||||
lhs_(3,3,3) = tmp22 * ((2.0d+00 - c2) * u_(2,3) * tm1) -
|
||||
&tmp11 * con43 * c3c4 * tm1 - tmp11 * dy3
|
||||
lhs_(3,4,3) = tmp22 * ((-(c2)) * u_(2,4) * tm1)
|
||||
lhs_(3,5,3) = tmp22 * c2
|
||||
lhs_(4,1,3) = tmp22 * ((-(u_(2,3) * u_(2,4))) * tm2) - tm
|
||||
&p11 * ((-(c3c4)) * tm2 * u_(2,4))
|
||||
lhs_(4,2,3) = 0.
|
||||
lhs_(4,3,3) = tmp22 * u_(2,4) * tm1
|
||||
lhs_(4,4,3) = tmp22 * u_(2,3) * tm1 - tmp11 * c3c4 * tm1
|
||||
&- tmp11 * dy4
|
||||
lhs_(4,5,3) = 0.
|
||||
lhs_(5,1,3) = tmp22 * ((c2 * (u_(2,2) * u_(2,2) + u_(2,3)
|
||||
& * u_(2,3) + u_(2,4) * u_(2,4)) * tm2 - c1 * u_(2,5) * tm1) * u_(2
|
||||
&,3) * tm1) - tmp11 * ((-(c3c4 - c1345)) * tm3 * u_(2,2)** 2 - (con
|
||||
&43 * c3c4 - c1345) * tm3 * u_(2,3)** 2 - (c3c4 - c1345) * tm3 * u_
|
||||
&(2,4)** 2 - c1345 * tm2 * u_(2,5))
|
||||
lhs_(5,2,3) = tmp22 * ((-(c2)) * u_(2,2) * u_(2,3) * tm2)
|
||||
& - tmp11 * (c3c4 - c1345) * tm2 * u_(2,2)
|
||||
lhs_(5,3,3) = tmp22 * (c1 * u_(2,5) * tm1 - 0.50d+00 * c2
|
||||
& * ((u_(2,2) * u_(2,2) + 3.0d+00 * u_(2,3) * u_(2,3) + u_(2,4) * u
|
||||
&_(2,4)) * tm2)) - tmp11 * (con43 * c3c4 - c1345) * tm2 * u_(2,3)
|
||||
lhs_(5,4,3) = tmp22 * ((-(c2)) * (u_(2,3) * u_(2,4)) * tm
|
||||
&2) - tmp11 * (c3c4 - c1345) * tm2 * u_(2,4)
|
||||
lhs_(5,5,3) = tmp22 * c1 * u_(2,3) * tm1 - tmp11 * c1345
|
||||
&* tm1 - tmp11 * dy5
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs(m,i,j,k)
|
||||
enddo
|
||||
do i__0 = 1,5
|
||||
rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i,j -
|
||||
& 1,k) - lhs_(i__0,2,1) * rhs(2,i,j - 1,k) - lhs_(i__0,3,1) * rhs(3
|
||||
&,i,j - 1,k) - lhs_(i__0,4,1) * rhs(4,i,j - 1,k) - lhs_(i__0,5,1) *
|
||||
& rhs(5,i,j - 1,k)
|
||||
enddo
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!
|
||||
!---------------------------------------------------------------------
|
||||
pivot = 1.00d0 / lhs_(1,1,2)
|
||||
lhs_(1,2,2) = lhs_(1,2,2) * pivot
|
||||
lhs_(1,3,2) = lhs_(1,3,2) * pivot
|
||||
lhs_(1,4,2) = lhs_(1,4,2) * pivot
|
||||
lhs_(1,5,2) = lhs_(1,5,2) * pivot
|
||||
lhs_(1,1,3) = lhs_(1,1,3) * pivot
|
||||
lhs_(1,2,3) = lhs_(1,2,3) * pivot
|
||||
lhs_(1,3,3) = lhs_(1,3,3) * pivot
|
||||
lhs_(1,4,3) = lhs_(1,4,3) * pivot
|
||||
lhs_(1,5,3) = lhs_(1,5,3) * pivot
|
||||
rhs_(1) = rhs_(1) * pivot
|
||||
coeff = lhs_(2,1,2)
|
||||
lhs_(2,2,2) = lhs_(2,2,2) - coeff * lhs_(1,2,2)
|
||||
lhs_(2,3,2) = lhs_(2,3,2) - coeff * lhs_(1,3,2)
|
||||
lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(1,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(1,5,2)
|
||||
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(1,1,3)
|
||||
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(1,2,3)
|
||||
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(1,3,3)
|
||||
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(1,4,3)
|
||||
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(1,5,3)
|
||||
rhs_(2) = rhs_(2) - coeff * rhs_(1)
|
||||
coeff = lhs_(3,1,2)
|
||||
lhs_(3,2,2) = lhs_(3,2,2) - coeff * lhs_(1,2,2)
|
||||
lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(1,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(1,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(1,5,2)
|
||||
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(1,1,3)
|
||||
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(1,2,3)
|
||||
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(1,3,3)
|
||||
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(1,4,3)
|
||||
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(1,5,3)
|
||||
rhs_(3) = rhs_(3) - coeff * rhs_(1)
|
||||
coeff = lhs_(4,1,2)
|
||||
lhs_(4,2,2) = lhs_(4,2,2) - coeff * lhs_(1,2,2)
|
||||
lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(1,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(1,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(1,5,2)
|
||||
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(1,1,3)
|
||||
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(1,2,3)
|
||||
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(1,3,3)
|
||||
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(1,4,3)
|
||||
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(1,5,3)
|
||||
rhs_(4) = rhs_(4) - coeff * rhs_(1)
|
||||
coeff = lhs_(5,1,2)
|
||||
lhs_(5,2,2) = lhs_(5,2,2) - coeff * lhs_(1,2,2)
|
||||
lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(1,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(1,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(1,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(1,1,3)
|
||||
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(1,2,3)
|
||||
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(1,3,3)
|
||||
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(1,4,3)
|
||||
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(1,5,3)
|
||||
rhs_(5) = rhs_(5) - coeff * rhs_(1)
|
||||
pivot = 1.00d0 / lhs_(2,2,2)
|
||||
lhs_(2,3,2) = lhs_(2,3,2) * pivot
|
||||
lhs_(2,4,2) = lhs_(2,4,2) * pivot
|
||||
lhs_(2,5,2) = lhs_(2,5,2) * pivot
|
||||
lhs_(2,1,3) = lhs_(2,1,3) * pivot
|
||||
lhs_(2,2,3) = lhs_(2,2,3) * pivot
|
||||
lhs_(2,3,3) = lhs_(2,3,3) * pivot
|
||||
lhs_(2,4,3) = lhs_(2,4,3) * pivot
|
||||
lhs_(2,5,3) = lhs_(2,5,3) * pivot
|
||||
rhs_(2) = rhs_(2) * pivot
|
||||
coeff = lhs_(1,2,2)
|
||||
lhs_(1,3,2) = lhs_(1,3,2) - coeff * lhs_(2,3,2)
|
||||
lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(2,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(2,5,2)
|
||||
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(2,1,3)
|
||||
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(2,2,3)
|
||||
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(2,3,3)
|
||||
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(2,4,3)
|
||||
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(2,5,3)
|
||||
rhs_(1) = rhs_(1) - coeff * rhs_(2)
|
||||
coeff = lhs_(3,2,2)
|
||||
lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(2,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(2,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(2,5,2)
|
||||
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(2,1,3)
|
||||
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(2,2,3)
|
||||
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(2,3,3)
|
||||
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(2,4,3)
|
||||
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(2,5,3)
|
||||
rhs_(3) = rhs_(3) - coeff * rhs_(2)
|
||||
coeff = lhs_(4,2,2)
|
||||
lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(2,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(2,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(2,5,2)
|
||||
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(2,1,3)
|
||||
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(2,2,3)
|
||||
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(2,3,3)
|
||||
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(2,4,3)
|
||||
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(2,5,3)
|
||||
rhs_(4) = rhs_(4) - coeff * rhs_(2)
|
||||
coeff = lhs_(5,2,2)
|
||||
lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(2,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(2,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(2,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(2,1,3)
|
||||
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(2,2,3)
|
||||
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(2,3,3)
|
||||
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(2,4,3)
|
||||
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(2,5,3)
|
||||
rhs_(5) = rhs_(5) - coeff * rhs_(2)
|
||||
pivot = 1.00d0 / lhs_(3,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) * pivot
|
||||
lhs_(3,5,2) = lhs_(3,5,2) * pivot
|
||||
lhs_(3,1,3) = lhs_(3,1,3) * pivot
|
||||
lhs_(3,2,3) = lhs_(3,2,3) * pivot
|
||||
lhs_(3,3,3) = lhs_(3,3,3) * pivot
|
||||
lhs_(3,4,3) = lhs_(3,4,3) * pivot
|
||||
lhs_(3,5,3) = lhs_(3,5,3) * pivot
|
||||
rhs_(3) = rhs_(3) * pivot
|
||||
coeff = lhs_(1,3,2)
|
||||
lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(3,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(3,5,2)
|
||||
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(3,1,3)
|
||||
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(3,2,3)
|
||||
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(3,3,3)
|
||||
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(3,4,3)
|
||||
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(3,5,3)
|
||||
rhs_(1) = rhs_(1) - coeff * rhs_(3)
|
||||
coeff = lhs_(2,3,2)
|
||||
lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(3,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(3,5,2)
|
||||
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(3,1,3)
|
||||
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(3,2,3)
|
||||
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(3,3,3)
|
||||
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(3,4,3)
|
||||
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(3,5,3)
|
||||
rhs_(2) = rhs_(2) - coeff * rhs_(3)
|
||||
coeff = lhs_(4,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(3,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(3,5,2)
|
||||
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(3,1,3)
|
||||
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(3,2,3)
|
||||
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(3,3,3)
|
||||
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(3,4,3)
|
||||
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(3,5,3)
|
||||
rhs_(4) = rhs_(4) - coeff * rhs_(3)
|
||||
coeff = lhs_(5,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(3,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(3,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(3,1,3)
|
||||
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(3,2,3)
|
||||
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(3,3,3)
|
||||
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(3,4,3)
|
||||
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(3,5,3)
|
||||
rhs_(5) = rhs_(5) - coeff * rhs_(3)
|
||||
pivot = 1.00d0 / lhs_(4,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) * pivot
|
||||
lhs_(4,1,3) = lhs_(4,1,3) * pivot
|
||||
lhs_(4,2,3) = lhs_(4,2,3) * pivot
|
||||
lhs_(4,3,3) = lhs_(4,3,3) * pivot
|
||||
lhs_(4,4,3) = lhs_(4,4,3) * pivot
|
||||
lhs_(4,5,3) = lhs_(4,5,3) * pivot
|
||||
rhs_(4) = rhs_(4) * pivot
|
||||
coeff = lhs_(1,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(4,5,2)
|
||||
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(4,1,3)
|
||||
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(4,2,3)
|
||||
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(4,3,3)
|
||||
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(4,4,3)
|
||||
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(4,5,3)
|
||||
rhs_(1) = rhs_(1) - coeff * rhs_(4)
|
||||
coeff = lhs_(2,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(4,5,2)
|
||||
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(4,1,3)
|
||||
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(4,2,3)
|
||||
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(4,3,3)
|
||||
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(4,4,3)
|
||||
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(4,5,3)
|
||||
rhs_(2) = rhs_(2) - coeff * rhs_(4)
|
||||
coeff = lhs_(3,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(4,5,2)
|
||||
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(4,1,3)
|
||||
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(4,2,3)
|
||||
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(4,3,3)
|
||||
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(4,4,3)
|
||||
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(4,5,3)
|
||||
rhs_(3) = rhs_(3) - coeff * rhs_(4)
|
||||
coeff = lhs_(5,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(4,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(4,1,3)
|
||||
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(4,2,3)
|
||||
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(4,3,3)
|
||||
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(4,4,3)
|
||||
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(4,5,3)
|
||||
rhs_(5) = rhs_(5) - coeff * rhs_(4)
|
||||
pivot = 1.00d0 / lhs_(5,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) * pivot
|
||||
lhs_(5,2,3) = lhs_(5,2,3) * pivot
|
||||
lhs_(5,3,3) = lhs_(5,3,3) * pivot
|
||||
lhs_(5,4,3) = lhs_(5,4,3) * pivot
|
||||
lhs_(5,5,3) = lhs_(5,5,3) * pivot
|
||||
rhs_(5) = rhs_(5) * pivot
|
||||
coeff = lhs_(1,5,2)
|
||||
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(5,1,3)
|
||||
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(5,2,3)
|
||||
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(5,3,3)
|
||||
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(5,4,3)
|
||||
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(5,5,3)
|
||||
rhs_(1) = rhs_(1) - coeff * rhs_(5)
|
||||
coeff = lhs_(2,5,2)
|
||||
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(5,1,3)
|
||||
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(5,2,3)
|
||||
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(5,3,3)
|
||||
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(5,4,3)
|
||||
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(5,5,3)
|
||||
rhs_(2) = rhs_(2) - coeff * rhs_(5)
|
||||
coeff = lhs_(3,5,2)
|
||||
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(5,1,3)
|
||||
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(5,2,3)
|
||||
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(5,3,3)
|
||||
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(5,4,3)
|
||||
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(5,5,3)
|
||||
rhs_(3) = rhs_(3) - coeff * rhs_(5)
|
||||
coeff = lhs_(4,5,2)
|
||||
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(5,1,3)
|
||||
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(5,2,3)
|
||||
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(5,3,3)
|
||||
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(5,4,3)
|
||||
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(5,5,3)
|
||||
rhs_(4) = rhs_(4) - coeff * rhs_(5)
|
||||
do i__0 = 1,5
|
||||
lhs__(i__0,1,i,j,k) = lhs_(i__0,1,3)
|
||||
lhs__(i__0,2,i,j,k) = lhs_(i__0,2,3)
|
||||
lhs__(i__0,3,i,j,k) = lhs_(i__0,3,3)
|
||||
lhs__(i__0,4,i,j,k) = lhs_(i__0,4,3)
|
||||
lhs__(i__0,5,i,j,k) = lhs_(i__0,5,3)
|
||||
enddo
|
||||
do m = 1,5
|
||||
rhs(m,i,j,k) = rhs_(m)
|
||||
u_(0,m) = u_(1,m)
|
||||
u_(1,m) = u_(2,m)
|
||||
enddo
|
||||
enddo
|
||||
|
||||
! else ! ******************* else case *************************
|
||||
do n = 1,5
|
||||
lhs_(1,n,1) = 0.0d0
|
||||
lhs_(1,n,2) = 0.0d0
|
||||
lhs_(1,n,3) = 0.0d0
|
||||
lhs_(2,n,1) = 0.0d0
|
||||
lhs_(2,n,2) = 0.0d0
|
||||
lhs_(2,n,3) = 0.0d0
|
||||
lhs_(3,n,1) = 0.0d0
|
||||
lhs_(3,n,2) = 0.0d0
|
||||
lhs_(3,n,3) = 0.0d0
|
||||
lhs_(4,n,1) = 0.0d0
|
||||
lhs_(4,n,2) = 0.0d0
|
||||
lhs_(4,n,3) = 0.0d0
|
||||
lhs_(5,n,1) = 0.0d0
|
||||
lhs_(5,n,2) = 0.0d0
|
||||
lhs_(5,n,3) = 0.0d0
|
||||
enddo
|
||||
do m = 1,5
|
||||
lhs_(m,m,2) = 1.0d0
|
||||
enddo
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs(m,i,j,k)
|
||||
enddo
|
||||
do i__0 = 1,5
|
||||
rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i,jsize
|
||||
&- 1,k) - lhs_(i__0,2,1) * rhs(2,i,jsize - 1,k) - lhs_(i__0,3,1) *
|
||||
&rhs(3,i,jsize - 1,k) - lhs_(i__0,4,1) * rhs(4,i,jsize - 1,k) - lhs
|
||||
&_(i__0,5,1) * rhs(5,i,jsize - 1,k)
|
||||
enddo
|
||||
do j__1 = 1,5
|
||||
lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1,3)
|
||||
lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1,3)
|
||||
lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1,3)
|
||||
lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1,3)
|
||||
lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1,3)
|
||||
enddo
|
||||
pivot__3 = 1.00d0 / lhs_(1,1,2)
|
||||
lhs_(1,2,2) = lhs_(1,2,2) * pivot__3
|
||||
lhs_(1,3,2) = lhs_(1,3,2) * pivot__3
|
||||
lhs_(1,4,2) = lhs_(1,4,2) * pivot__3
|
||||
lhs_(1,5,2) = lhs_(1,5,2) * pivot__3
|
||||
rhs_(1) = rhs_(1) * pivot__3
|
||||
coeff__2 = lhs_(2,1,2)
|
||||
lhs_(2,2,2) = lhs_(2,2,2) - coeff__2 * lhs_(1,2,2)
|
||||
lhs_(2,3,2) = lhs_(2,3,2) - coeff__2 * lhs_(1,3,2)
|
||||
lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(1,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(1,5,2)
|
||||
rhs_(2) = rhs_(2) - coeff__2 * rhs_(1)
|
||||
coeff__2 = lhs_(3,1,2)
|
||||
lhs_(3,2,2) = lhs_(3,2,2) - coeff__2 * lhs_(1,2,2)
|
||||
lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(1,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(1,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(1,5,2)
|
||||
rhs_(3) = rhs_(3) - coeff__2 * rhs_(1)
|
||||
coeff__2 = lhs_(4,1,2)
|
||||
lhs_(4,2,2) = lhs_(4,2,2) - coeff__2 * lhs_(1,2,2)
|
||||
lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(1,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(1,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(1,5,2)
|
||||
rhs_(4) = rhs_(4) - coeff__2 * rhs_(1)
|
||||
coeff__2 = lhs_(5,1,2)
|
||||
lhs_(5,2,2) = lhs_(5,2,2) - coeff__2 * lhs_(1,2,2)
|
||||
lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(1,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(1,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(1,5,2)
|
||||
rhs_(5) = rhs_(5) - coeff__2 * rhs_(1)
|
||||
pivot__3 = 1.00d0 / lhs_(2,2,2)
|
||||
lhs_(2,3,2) = lhs_(2,3,2) * pivot__3
|
||||
lhs_(2,4,2) = lhs_(2,4,2) * pivot__3
|
||||
lhs_(2,5,2) = lhs_(2,5,2) * pivot__3
|
||||
rhs_(2) = rhs_(2) * pivot__3
|
||||
coeff__2 = lhs_(1,2,2)
|
||||
lhs_(1,3,2) = lhs_(1,3,2) - coeff__2 * lhs_(2,3,2)
|
||||
lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(2,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(2,5,2)
|
||||
rhs_(1) = rhs_(1) - coeff__2 * rhs_(2)
|
||||
coeff__2 = lhs_(3,2,2)
|
||||
lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(2,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(2,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(2,5,2)
|
||||
rhs_(3) = rhs_(3) - coeff__2 * rhs_(2)
|
||||
coeff__2 = lhs_(4,2,2)
|
||||
lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(2,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(2,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(2,5,2)
|
||||
rhs_(4) = rhs_(4) - coeff__2 * rhs_(2)
|
||||
coeff__2 = lhs_(5,2,2)
|
||||
lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(2,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(2,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(2,5,2)
|
||||
rhs_(5) = rhs_(5) - coeff__2 * rhs_(2)
|
||||
pivot__3 = 1.00d0 / lhs_(3,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) * pivot__3
|
||||
lhs_(3,5,2) = lhs_(3,5,2) * pivot__3
|
||||
rhs_(3) = rhs_(3) * pivot__3
|
||||
coeff__2 = lhs_(1,3,2)
|
||||
lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(3,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(3,5,2)
|
||||
rhs_(1) = rhs_(1) - coeff__2 * rhs_(3)
|
||||
coeff__2 = lhs_(2,3,2)
|
||||
lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(3,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(3,5,2)
|
||||
rhs_(2) = rhs_(2) - coeff__2 * rhs_(3)
|
||||
coeff__2 = lhs_(4,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(3,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(3,5,2)
|
||||
rhs_(4) = rhs_(4) - coeff__2 * rhs_(3)
|
||||
coeff__2 = lhs_(5,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(3,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(3,5,2)
|
||||
rhs_(5) = rhs_(5) - coeff__2 * rhs_(3)
|
||||
pivot__3 = 1.00d0 / lhs_(4,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) * pivot__3
|
||||
rhs_(4) = rhs_(4) * pivot__3
|
||||
coeff__2 = lhs_(1,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(4,5,2)
|
||||
rhs_(1) = rhs_(1) - coeff__2 * rhs_(4)
|
||||
coeff__2 = lhs_(2,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(4,5,2)
|
||||
rhs_(2) = rhs_(2) - coeff__2 * rhs_(4)
|
||||
coeff__2 = lhs_(3,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(4,5,2)
|
||||
rhs_(3) = rhs_(3) - coeff__2 * rhs_(4)
|
||||
coeff__2 = lhs_(5,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(4,5,2)
|
||||
rhs_(5) = rhs_(5) - coeff__2 * rhs_(4)
|
||||
pivot__3 = 1.00d0 / lhs_(5,5,2)
|
||||
rhs_(5) = rhs_(5) * pivot__3
|
||||
coeff__2 = lhs_(1,5,2)
|
||||
rhs_(1) = rhs_(1) - coeff__2 * rhs_(5)
|
||||
coeff__2 = lhs_(2,5,2)
|
||||
rhs_(2) = rhs_(2) - coeff__2 * rhs_(5)
|
||||
coeff__2 = lhs_(3,5,2)
|
||||
rhs_(3) = rhs_(3) - coeff__2 * rhs_(5)
|
||||
coeff__2 = lhs_(4,5,2)
|
||||
rhs_(4) = rhs_(4) - coeff__2 * rhs_(5)
|
||||
|
||||
! endif
|
||||
do m = 1,5
|
||||
rhs(m,i,j,k) = rhs_(m)
|
||||
enddo
|
||||
|
||||
! enddo
|
||||
do j = problem_size - 2,0,(-(1))
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs(m,i,j,k)
|
||||
enddo
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - lhs__(m,1,i,j,k) * rhs(1,i,j + 1,k)
|
||||
rhs_(m) = rhs_(m) - lhs__(m,2,i,j,k) * rhs(2,i,j + 1,k)
|
||||
rhs_(m) = rhs_(m) - lhs__(m,3,i,j,k) * rhs(3,i,j + 1,k)
|
||||
rhs_(m) = rhs_(m) - lhs__(m,4,i,j,k) * rhs(4,i,j + 1,k)
|
||||
rhs_(m) = rhs_(m) - lhs__(m,5,i,j,k) * rhs(5,i,j + 1,k)
|
||||
enddo
|
||||
do m = 1,5
|
||||
rhs(m,i,j,k) = rhs_(m)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!DVM$ end region
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,635 @@
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! performs guaussian elimination on this cell.
|
||||
!
|
||||
! assumes that unpacking routines for non-first cells
|
||||
! preload C' and rhs' from previous cell.
|
||||
!
|
||||
! assumed send happens outside this routine, but that
|
||||
! c'(JMAX) and rhs'(JMAX) will be sent to next cell
|
||||
!---------------------------------------------------------------------
|
||||
subroutine y_solve ()
|
||||
|
||||
include 'header3d.h'
|
||||
double precision coeff
|
||||
double precision pivot
|
||||
integer i__0
|
||||
integer j__1,m,n
|
||||
double precision coeff__2
|
||||
double precision pivot__3
|
||||
double precision lhs_(5,5,3),rhs_(5),u_(0:3,5)
|
||||
integer i,j,k,jsize,low_k,high_k,k1,maxBlK
|
||||
jsize = problem_size - 1
|
||||
|
||||
if(mod((problem_size - 2), BL) .eq. 0) then
|
||||
maxBlK = (problem_size - 2) / BL
|
||||
else
|
||||
maxblK = (problem_size - 2) / BL + 1
|
||||
endif
|
||||
!ACROSS (rhs(0:0,1:0,0:0,0:0),lhs__(0:0,1:0,0:0,0:0,0:0))
|
||||
!DVM$ region local(lhs__)
|
||||
!DVM$ PARALLEL (k1,i) ON rhs(*,i,*,k1), private(u_,j,rhs_,pivot,
|
||||
!DVM$&coeff,tmp1, tmp2, tmp3, t1, t2, t3, tm1, tm2, tm3, tmp11,
|
||||
!DVM$&tmp22, i__0, j__1,lhs_,coeff__2,pivot__3,m,n,k,low_k,high_k)
|
||||
!DVM$&,cuda_block(32)
|
||||
do k1 = 1, maxBlK
|
||||
do i = 1, problem_size - 2
|
||||
low_k = (k1 - 1) * BL + 1
|
||||
high_k = k1 * BL
|
||||
if(high_k .gt. problem_size - 2) then
|
||||
high_k = problem_size - 2
|
||||
endif
|
||||
do k = low_k, high_k
|
||||
do m = 1,5
|
||||
u_(0,m) = u(m,i,0,k)
|
||||
u_(1,m) = u(m,i,1,k)
|
||||
enddo
|
||||
do j = 1,jsize - 1
|
||||
do m = 1,5
|
||||
u_(2,m) = u(m,i,j + 1,k)
|
||||
enddo
|
||||
|
||||
! if(j .ne. jsize) then
|
||||
tmp1 = 1.0d+00 / u_(1,1)
|
||||
tmp2 = tmp1 * tmp1
|
||||
tmp3 = tmp1 * tmp2
|
||||
t1 = 1.0d+00 / u_(0,1)
|
||||
t2 = t1 * t1
|
||||
t3 = t1 * t2
|
||||
tm1 = 1.0d+00 / u_(2,1)
|
||||
tm2 = tm1 * tm1
|
||||
tm3 = tm1 * tm2
|
||||
tmp11 = dt * ty1
|
||||
tmp22 = dt * ty2
|
||||
lhs_(1,1,1) = (-(tmp11)) * dy1
|
||||
lhs_(1,2,1) = 0.
|
||||
lhs_(1,3,1) = (-(tmp22))
|
||||
lhs_(1,4,1) = 0.
|
||||
lhs_(1,5,1) = 0.
|
||||
lhs_(2,1,1) = (-(tmp22)) * ((-(u_(0,2) * u_(0,3))) * t2)
|
||||
&- tmp11 * ((-(c3c4)) * t2 * u_(0,2))
|
||||
lhs_(2,2,1) = (-(tmp22)) * u_(0,3) * t1 - tmp11 * c3c4 *
|
||||
&t1 - tmp11 * dy2
|
||||
lhs_(2,3,1) = (-(tmp22)) * u_(0,2) * t1
|
||||
lhs_(2,4,1) = 0.
|
||||
lhs_(2,5,1) = 0.
|
||||
lhs_(3,1,1) = (-(tmp22)) * ((-(u_(0,3) * u_(0,3) * t2)) +
|
||||
& 0.50d+00 * c2 * ((u_(0,2) * u_(0,2) + u_(0,3) * u_(0,3) + u_(0,4)
|
||||
& * u_(0,4)) * t2)) - tmp11 * ((-(con43)) * c3c4 * t2 * u_(0,3))
|
||||
lhs_(3,2,1) = (-(tmp22)) * ((-(c2)) * u_(0,2) * t1)
|
||||
lhs_(3,3,1) = (-(tmp22)) * ((2.0d+00 - c2) * u_(0,3) * t1
|
||||
&) - tmp11 * con43 * c3c4 * t1 - tmp11 * dy3
|
||||
lhs_(3,4,1) = (-(tmp22)) * ((-(c2)) * u_(0,4) * t1)
|
||||
lhs_(3,5,1) = (-(tmp22)) * c2
|
||||
lhs_(4,1,1) = (-(tmp22)) * ((-(u_(0,3) * u_(0,4))) * t2)
|
||||
&- tmp11 * ((-(c3c4)) * t2 * u_(0,4))
|
||||
lhs_(4,2,1) = 0.
|
||||
lhs_(4,3,1) = (-(tmp22)) * u_(0,4) * t1
|
||||
lhs_(4,4,1) = (-(tmp22)) * u_(0,3) * t1 - tmp11 * c3c4 *
|
||||
&t1 - tmp11 * dy4
|
||||
lhs_(4,5,1) = 0.
|
||||
lhs_(5,1,1) = (-(tmp22)) * ((c2 * (u_(0,2) * u_(0,2) + u_
|
||||
&(0,3) * u_(0,3) + u_(0,4) * u_(0,4)) * t2 - c1 * u_(0,5) * t1) * u
|
||||
&_(0,3) * t1) - tmp11 * ((-(c3c4 - c1345)) * t3 * u_(0,2)** 2 - (co
|
||||
&n43 * c3c4 - c1345) * t3 * u_(0,3)** 2 - (c3c4 - c1345) * t3 * u_(
|
||||
&0,4)** 2 - c1345 * t2 * u_(0,5))
|
||||
lhs_(5,2,1) = (-(tmp22)) * ((-(c2)) * u_(0,2) * u_(0,3) *
|
||||
& t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,2)
|
||||
lhs_(5,3,1) = (-(tmp22)) * (c1 * u_(0,5) * t1 - 0.50d+00
|
||||
&* c2 * ((u_(0,2) * u_(0,2) + 3.0d+00 * u_(0,3) * u_(0,3) + u_(0,4)
|
||||
& * u_(0,4)) * t2)) - tmp11 * (con43 * c3c4 - c1345) * t2 * u_(0,3)
|
||||
lhs_(5,4,1) = (-(tmp22)) * ((-(c2)) * (u_(0,3) * u_(0,4))
|
||||
& * t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,4)
|
||||
lhs_(5,5,1) = (-(tmp22)) * c1 * u_(0,3) * t1 - tmp11 * c1
|
||||
&345 * t1 - tmp11 * dy5
|
||||
lhs_(1,1,2) = 1.0d+00 + tmp11 * 2.0d+00 * dy1
|
||||
lhs_(1,2,2) = 0.
|
||||
lhs_(1,3,2) = 0.
|
||||
lhs_(1,4,2) = 0.
|
||||
lhs_(1,5,2) = 0.
|
||||
lhs_(2,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1,
|
||||
&2))
|
||||
lhs_(2,2,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t
|
||||
&mp11 * 2.0d+00 * dy2
|
||||
lhs_(2,3,2) = 0.
|
||||
lhs_(2,4,2) = 0.
|
||||
lhs_(2,5,2) = 0.
|
||||
lhs_(3,1,2) = tmp11 * 2.0d+00 * ((-(con43)) * c3c4 * tmp2
|
||||
& * u_(1,3))
|
||||
lhs_(3,2,2) = 0.
|
||||
lhs_(3,3,2) = 1.0d+00 + tmp11 * 2.0d+00 * con43 * c3c4 *
|
||||
&tmp1 + tmp11 * 2.0d+00 * dy3
|
||||
lhs_(3,4,2) = 0.
|
||||
lhs_(3,5,2) = 0.
|
||||
lhs_(4,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1,
|
||||
&4))
|
||||
lhs_(4,2,2) = 0.
|
||||
lhs_(4,3,2) = 0.
|
||||
lhs_(4,4,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t
|
||||
&mp11 * 2.0d+00 * dy4
|
||||
lhs_(4,5,2) = 0.
|
||||
lhs_(5,1,2) = tmp11 * 2.0d+00 * ((-(c3c4 - c1345)) * tmp3
|
||||
& * u_(1,2)** 2 - (con43 * c3c4 - c1345) * tmp3 * u_(1,3)** 2 - (c3
|
||||
&c4 - c1345) * tmp3 * u_(1,4)** 2 - c1345 * tmp2 * u_(1,5))
|
||||
lhs_(5,2,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u
|
||||
&_(1,2)
|
||||
lhs_(5,3,2) = tmp11 * 2.0d+00 * (con43 * c3c4 - c1345) *
|
||||
&tmp2 * u_(1,3)
|
||||
lhs_(5,4,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u
|
||||
&_(1,4)
|
||||
lhs_(5,5,2) = 1.0d+00 + tmp11 * 2.0d+00 * c1345 * tmp1 +
|
||||
&tmp11 * 2.0d+00 * dy5
|
||||
if (j .ne. 1) then
|
||||
do j__1 = 1,5
|
||||
lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
enddo
|
||||
endif
|
||||
lhs_(1,1,3) = (-(tmp11)) * dy1
|
||||
lhs_(1,2,3) = 0.
|
||||
lhs_(1,3,3) = tmp22
|
||||
lhs_(1,4,3) = 0.
|
||||
lhs_(1,5,3) = 0.
|
||||
lhs_(2,1,3) = tmp22 * ((-(u_(2,2) * u_(2,3))) * tm2) - tm
|
||||
&p11 * ((-(c3c4)) * tm2 * u_(2,2))
|
||||
lhs_(2,2,3) = tmp22 * u_(2,3) * tm1 - tmp11 * c3c4 * tm1
|
||||
&- tmp11 * dy2
|
||||
lhs_(2,3,3) = tmp22 * u_(2,2) * tm1
|
||||
lhs_(2,4,3) = 0.
|
||||
lhs_(2,5,3) = 0.
|
||||
lhs_(3,1,3) = tmp22 * ((-(u_(2,3) * u_(2,3) * tm2)) + 0.5
|
||||
&0d+00 * c2 * ((u_(2,2) * u_(2,2) + u_(2,3) * u_(2,3) + u_(2,4) * u
|
||||
&_(2,4)) * tm2)) - tmp11 * ((-(con43)) * c3c4 * tm2 * u_(2,3))
|
||||
lhs_(3,2,3) = tmp22 * ((-(c2)) * u_(2,2) * tm1)
|
||||
lhs_(3,3,3) = tmp22 * ((2.0d+00 - c2) * u_(2,3) * tm1) -
|
||||
&tmp11 * con43 * c3c4 * tm1 - tmp11 * dy3
|
||||
lhs_(3,4,3) = tmp22 * ((-(c2)) * u_(2,4) * tm1)
|
||||
lhs_(3,5,3) = tmp22 * c2
|
||||
lhs_(4,1,3) = tmp22 * ((-(u_(2,3) * u_(2,4))) * tm2) - tm
|
||||
&p11 * ((-(c3c4)) * tm2 * u_(2,4))
|
||||
lhs_(4,2,3) = 0.
|
||||
lhs_(4,3,3) = tmp22 * u_(2,4) * tm1
|
||||
lhs_(4,4,3) = tmp22 * u_(2,3) * tm1 - tmp11 * c3c4 * tm1
|
||||
&- tmp11 * dy4
|
||||
lhs_(4,5,3) = 0.
|
||||
lhs_(5,1,3) = tmp22 * ((c2 * (u_(2,2) * u_(2,2) + u_(2,3)
|
||||
& * u_(2,3) + u_(2,4) * u_(2,4)) * tm2 - c1 * u_(2,5) * tm1) * u_(2
|
||||
&,3) * tm1) - tmp11 * ((-(c3c4 - c1345)) * tm3 * u_(2,2)** 2 - (con
|
||||
&43 * c3c4 - c1345) * tm3 * u_(2,3)** 2 - (c3c4 - c1345) * tm3 * u_
|
||||
&(2,4)** 2 - c1345 * tm2 * u_(2,5))
|
||||
lhs_(5,2,3) = tmp22 * ((-(c2)) * u_(2,2) * u_(2,3) * tm2)
|
||||
& - tmp11 * (c3c4 - c1345) * tm2 * u_(2,2)
|
||||
lhs_(5,3,3) = tmp22 * (c1 * u_(2,5) * tm1 - 0.50d+00 * c2
|
||||
& * ((u_(2,2) * u_(2,2) + 3.0d+00 * u_(2,3) * u_(2,3) + u_(2,4) * u
|
||||
&_(2,4)) * tm2)) - tmp11 * (con43 * c3c4 - c1345) * tm2 * u_(2,3)
|
||||
lhs_(5,4,3) = tmp22 * ((-(c2)) * (u_(2,3) * u_(2,4)) * tm
|
||||
&2) - tmp11 * (c3c4 - c1345) * tm2 * u_(2,4)
|
||||
lhs_(5,5,3) = tmp22 * c1 * u_(2,3) * tm1 - tmp11 * c1345
|
||||
&* tm1 - tmp11 * dy5
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs(m,i,j,k)
|
||||
enddo
|
||||
do i__0 = 1,5
|
||||
rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i,j -
|
||||
& 1,k) - lhs_(i__0,2,1) * rhs(2,i,j - 1,k) - lhs_(i__0,3,1) * rhs(3
|
||||
&,i,j - 1,k) - lhs_(i__0,4,1) * rhs(4,i,j - 1,k) - lhs_(i__0,5,1) *
|
||||
& rhs(5,i,j - 1,k)
|
||||
enddo
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!
|
||||
!---------------------------------------------------------------------
|
||||
pivot = 1.00d0 / lhs_(1,1,2)
|
||||
lhs_(1,2,2) = lhs_(1,2,2) * pivot
|
||||
lhs_(1,3,2) = lhs_(1,3,2) * pivot
|
||||
lhs_(1,4,2) = lhs_(1,4,2) * pivot
|
||||
lhs_(1,5,2) = lhs_(1,5,2) * pivot
|
||||
lhs_(1,1,3) = lhs_(1,1,3) * pivot
|
||||
lhs_(1,2,3) = lhs_(1,2,3) * pivot
|
||||
lhs_(1,3,3) = lhs_(1,3,3) * pivot
|
||||
lhs_(1,4,3) = lhs_(1,4,3) * pivot
|
||||
lhs_(1,5,3) = lhs_(1,5,3) * pivot
|
||||
rhs_(1) = rhs_(1) * pivot
|
||||
coeff = lhs_(2,1,2)
|
||||
lhs_(2,2,2) = lhs_(2,2,2) - coeff * lhs_(1,2,2)
|
||||
lhs_(2,3,2) = lhs_(2,3,2) - coeff * lhs_(1,3,2)
|
||||
lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(1,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(1,5,2)
|
||||
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(1,1,3)
|
||||
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(1,2,3)
|
||||
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(1,3,3)
|
||||
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(1,4,3)
|
||||
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(1,5,3)
|
||||
rhs_(2) = rhs_(2) - coeff * rhs_(1)
|
||||
coeff = lhs_(3,1,2)
|
||||
lhs_(3,2,2) = lhs_(3,2,2) - coeff * lhs_(1,2,2)
|
||||
lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(1,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(1,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(1,5,2)
|
||||
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(1,1,3)
|
||||
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(1,2,3)
|
||||
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(1,3,3)
|
||||
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(1,4,3)
|
||||
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(1,5,3)
|
||||
rhs_(3) = rhs_(3) - coeff * rhs_(1)
|
||||
coeff = lhs_(4,1,2)
|
||||
lhs_(4,2,2) = lhs_(4,2,2) - coeff * lhs_(1,2,2)
|
||||
lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(1,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(1,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(1,5,2)
|
||||
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(1,1,3)
|
||||
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(1,2,3)
|
||||
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(1,3,3)
|
||||
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(1,4,3)
|
||||
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(1,5,3)
|
||||
rhs_(4) = rhs_(4) - coeff * rhs_(1)
|
||||
coeff = lhs_(5,1,2)
|
||||
lhs_(5,2,2) = lhs_(5,2,2) - coeff * lhs_(1,2,2)
|
||||
lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(1,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(1,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(1,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(1,1,3)
|
||||
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(1,2,3)
|
||||
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(1,3,3)
|
||||
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(1,4,3)
|
||||
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(1,5,3)
|
||||
rhs_(5) = rhs_(5) - coeff * rhs_(1)
|
||||
pivot = 1.00d0 / lhs_(2,2,2)
|
||||
lhs_(2,3,2) = lhs_(2,3,2) * pivot
|
||||
lhs_(2,4,2) = lhs_(2,4,2) * pivot
|
||||
lhs_(2,5,2) = lhs_(2,5,2) * pivot
|
||||
lhs_(2,1,3) = lhs_(2,1,3) * pivot
|
||||
lhs_(2,2,3) = lhs_(2,2,3) * pivot
|
||||
lhs_(2,3,3) = lhs_(2,3,3) * pivot
|
||||
lhs_(2,4,3) = lhs_(2,4,3) * pivot
|
||||
lhs_(2,5,3) = lhs_(2,5,3) * pivot
|
||||
rhs_(2) = rhs_(2) * pivot
|
||||
coeff = lhs_(1,2,2)
|
||||
lhs_(1,3,2) = lhs_(1,3,2) - coeff * lhs_(2,3,2)
|
||||
lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(2,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(2,5,2)
|
||||
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(2,1,3)
|
||||
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(2,2,3)
|
||||
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(2,3,3)
|
||||
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(2,4,3)
|
||||
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(2,5,3)
|
||||
rhs_(1) = rhs_(1) - coeff * rhs_(2)
|
||||
coeff = lhs_(3,2,2)
|
||||
lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(2,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(2,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(2,5,2)
|
||||
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(2,1,3)
|
||||
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(2,2,3)
|
||||
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(2,3,3)
|
||||
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(2,4,3)
|
||||
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(2,5,3)
|
||||
rhs_(3) = rhs_(3) - coeff * rhs_(2)
|
||||
coeff = lhs_(4,2,2)
|
||||
lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(2,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(2,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(2,5,2)
|
||||
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(2,1,3)
|
||||
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(2,2,3)
|
||||
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(2,3,3)
|
||||
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(2,4,3)
|
||||
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(2,5,3)
|
||||
rhs_(4) = rhs_(4) - coeff * rhs_(2)
|
||||
coeff = lhs_(5,2,2)
|
||||
lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(2,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(2,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(2,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(2,1,3)
|
||||
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(2,2,3)
|
||||
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(2,3,3)
|
||||
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(2,4,3)
|
||||
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(2,5,3)
|
||||
rhs_(5) = rhs_(5) - coeff * rhs_(2)
|
||||
pivot = 1.00d0 / lhs_(3,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) * pivot
|
||||
lhs_(3,5,2) = lhs_(3,5,2) * pivot
|
||||
lhs_(3,1,3) = lhs_(3,1,3) * pivot
|
||||
lhs_(3,2,3) = lhs_(3,2,3) * pivot
|
||||
lhs_(3,3,3) = lhs_(3,3,3) * pivot
|
||||
lhs_(3,4,3) = lhs_(3,4,3) * pivot
|
||||
lhs_(3,5,3) = lhs_(3,5,3) * pivot
|
||||
rhs_(3) = rhs_(3) * pivot
|
||||
coeff = lhs_(1,3,2)
|
||||
lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(3,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(3,5,2)
|
||||
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(3,1,3)
|
||||
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(3,2,3)
|
||||
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(3,3,3)
|
||||
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(3,4,3)
|
||||
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(3,5,3)
|
||||
rhs_(1) = rhs_(1) - coeff * rhs_(3)
|
||||
coeff = lhs_(2,3,2)
|
||||
lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(3,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(3,5,2)
|
||||
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(3,1,3)
|
||||
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(3,2,3)
|
||||
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(3,3,3)
|
||||
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(3,4,3)
|
||||
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(3,5,3)
|
||||
rhs_(2) = rhs_(2) - coeff * rhs_(3)
|
||||
coeff = lhs_(4,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(3,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(3,5,2)
|
||||
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(3,1,3)
|
||||
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(3,2,3)
|
||||
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(3,3,3)
|
||||
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(3,4,3)
|
||||
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(3,5,3)
|
||||
rhs_(4) = rhs_(4) - coeff * rhs_(3)
|
||||
coeff = lhs_(5,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(3,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(3,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(3,1,3)
|
||||
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(3,2,3)
|
||||
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(3,3,3)
|
||||
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(3,4,3)
|
||||
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(3,5,3)
|
||||
rhs_(5) = rhs_(5) - coeff * rhs_(3)
|
||||
pivot = 1.00d0 / lhs_(4,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) * pivot
|
||||
lhs_(4,1,3) = lhs_(4,1,3) * pivot
|
||||
lhs_(4,2,3) = lhs_(4,2,3) * pivot
|
||||
lhs_(4,3,3) = lhs_(4,3,3) * pivot
|
||||
lhs_(4,4,3) = lhs_(4,4,3) * pivot
|
||||
lhs_(4,5,3) = lhs_(4,5,3) * pivot
|
||||
rhs_(4) = rhs_(4) * pivot
|
||||
coeff = lhs_(1,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(4,5,2)
|
||||
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(4,1,3)
|
||||
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(4,2,3)
|
||||
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(4,3,3)
|
||||
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(4,4,3)
|
||||
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(4,5,3)
|
||||
rhs_(1) = rhs_(1) - coeff * rhs_(4)
|
||||
coeff = lhs_(2,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(4,5,2)
|
||||
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(4,1,3)
|
||||
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(4,2,3)
|
||||
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(4,3,3)
|
||||
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(4,4,3)
|
||||
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(4,5,3)
|
||||
rhs_(2) = rhs_(2) - coeff * rhs_(4)
|
||||
coeff = lhs_(3,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(4,5,2)
|
||||
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(4,1,3)
|
||||
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(4,2,3)
|
||||
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(4,3,3)
|
||||
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(4,4,3)
|
||||
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(4,5,3)
|
||||
rhs_(3) = rhs_(3) - coeff * rhs_(4)
|
||||
coeff = lhs_(5,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(4,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(4,1,3)
|
||||
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(4,2,3)
|
||||
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(4,3,3)
|
||||
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(4,4,3)
|
||||
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(4,5,3)
|
||||
rhs_(5) = rhs_(5) - coeff * rhs_(4)
|
||||
pivot = 1.00d0 / lhs_(5,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) * pivot
|
||||
lhs_(5,2,3) = lhs_(5,2,3) * pivot
|
||||
lhs_(5,3,3) = lhs_(5,3,3) * pivot
|
||||
lhs_(5,4,3) = lhs_(5,4,3) * pivot
|
||||
lhs_(5,5,3) = lhs_(5,5,3) * pivot
|
||||
rhs_(5) = rhs_(5) * pivot
|
||||
coeff = lhs_(1,5,2)
|
||||
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(5,1,3)
|
||||
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(5,2,3)
|
||||
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(5,3,3)
|
||||
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(5,4,3)
|
||||
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(5,5,3)
|
||||
rhs_(1) = rhs_(1) - coeff * rhs_(5)
|
||||
coeff = lhs_(2,5,2)
|
||||
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(5,1,3)
|
||||
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(5,2,3)
|
||||
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(5,3,3)
|
||||
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(5,4,3)
|
||||
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(5,5,3)
|
||||
rhs_(2) = rhs_(2) - coeff * rhs_(5)
|
||||
coeff = lhs_(3,5,2)
|
||||
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(5,1,3)
|
||||
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(5,2,3)
|
||||
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(5,3,3)
|
||||
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(5,4,3)
|
||||
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(5,5,3)
|
||||
rhs_(3) = rhs_(3) - coeff * rhs_(5)
|
||||
coeff = lhs_(4,5,2)
|
||||
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(5,1,3)
|
||||
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(5,2,3)
|
||||
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(5,3,3)
|
||||
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(5,4,3)
|
||||
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(5,5,3)
|
||||
rhs_(4) = rhs_(4) - coeff * rhs_(5)
|
||||
do i__0 = 1,5
|
||||
lhs__(i__0,1,i,j,k1) = lhs_(i__0,1,3)
|
||||
lhs__(i__0,2,i,j,k1) = lhs_(i__0,2,3)
|
||||
lhs__(i__0,3,i,j,k1) = lhs_(i__0,3,3)
|
||||
lhs__(i__0,4,i,j,k1) = lhs_(i__0,4,3)
|
||||
lhs__(i__0,5,i,j,k1) = lhs_(i__0,5,3)
|
||||
enddo
|
||||
do m = 1,5
|
||||
rhs(m,i,j,k) = rhs_(m)
|
||||
u_(0,m) = u_(1,m)
|
||||
u_(1,m) = u_(2,m)
|
||||
enddo
|
||||
enddo
|
||||
|
||||
! else ! ******************* else case *************************
|
||||
do n = 1,5
|
||||
lhs_(1,n,1) = 0.0d0
|
||||
lhs_(1,n,2) = 0.0d0
|
||||
lhs_(1,n,3) = 0.0d0
|
||||
lhs_(2,n,1) = 0.0d0
|
||||
lhs_(2,n,2) = 0.0d0
|
||||
lhs_(2,n,3) = 0.0d0
|
||||
lhs_(3,n,1) = 0.0d0
|
||||
lhs_(3,n,2) = 0.0d0
|
||||
lhs_(3,n,3) = 0.0d0
|
||||
lhs_(4,n,1) = 0.0d0
|
||||
lhs_(4,n,2) = 0.0d0
|
||||
lhs_(4,n,3) = 0.0d0
|
||||
lhs_(5,n,1) = 0.0d0
|
||||
lhs_(5,n,2) = 0.0d0
|
||||
lhs_(5,n,3) = 0.0d0
|
||||
enddo
|
||||
do m = 1,5
|
||||
lhs_(m,m,2) = 1.0d0
|
||||
enddo
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs(m,i,j,k)
|
||||
enddo
|
||||
do i__0 = 1,5
|
||||
rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i,jsize
|
||||
&- 1,k) - lhs_(i__0,2,1) * rhs(2,i,jsize - 1,k) - lhs_(i__0,3,1) *
|
||||
&rhs(3,i,jsize - 1,k) - lhs_(i__0,4,1) * rhs(4,i,jsize - 1,k) - lhs
|
||||
&_(i__0,5,1) * rhs(5,i,jsize - 1,k)
|
||||
enddo
|
||||
do j__1 = 1,5
|
||||
lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1,3)
|
||||
lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1,3)
|
||||
lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1,3)
|
||||
lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1,3)
|
||||
lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1,3)
|
||||
enddo
|
||||
pivot__3 = 1.00d0 / lhs_(1,1,2)
|
||||
lhs_(1,2,2) = lhs_(1,2,2) * pivot__3
|
||||
lhs_(1,3,2) = lhs_(1,3,2) * pivot__3
|
||||
lhs_(1,4,2) = lhs_(1,4,2) * pivot__3
|
||||
lhs_(1,5,2) = lhs_(1,5,2) * pivot__3
|
||||
rhs_(1) = rhs_(1) * pivot__3
|
||||
coeff__2 = lhs_(2,1,2)
|
||||
lhs_(2,2,2) = lhs_(2,2,2) - coeff__2 * lhs_(1,2,2)
|
||||
lhs_(2,3,2) = lhs_(2,3,2) - coeff__2 * lhs_(1,3,2)
|
||||
lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(1,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(1,5,2)
|
||||
rhs_(2) = rhs_(2) - coeff__2 * rhs_(1)
|
||||
coeff__2 = lhs_(3,1,2)
|
||||
lhs_(3,2,2) = lhs_(3,2,2) - coeff__2 * lhs_(1,2,2)
|
||||
lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(1,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(1,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(1,5,2)
|
||||
rhs_(3) = rhs_(3) - coeff__2 * rhs_(1)
|
||||
coeff__2 = lhs_(4,1,2)
|
||||
lhs_(4,2,2) = lhs_(4,2,2) - coeff__2 * lhs_(1,2,2)
|
||||
lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(1,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(1,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(1,5,2)
|
||||
rhs_(4) = rhs_(4) - coeff__2 * rhs_(1)
|
||||
coeff__2 = lhs_(5,1,2)
|
||||
lhs_(5,2,2) = lhs_(5,2,2) - coeff__2 * lhs_(1,2,2)
|
||||
lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(1,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(1,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(1,5,2)
|
||||
rhs_(5) = rhs_(5) - coeff__2 * rhs_(1)
|
||||
pivot__3 = 1.00d0 / lhs_(2,2,2)
|
||||
lhs_(2,3,2) = lhs_(2,3,2) * pivot__3
|
||||
lhs_(2,4,2) = lhs_(2,4,2) * pivot__3
|
||||
lhs_(2,5,2) = lhs_(2,5,2) * pivot__3
|
||||
rhs_(2) = rhs_(2) * pivot__3
|
||||
coeff__2 = lhs_(1,2,2)
|
||||
lhs_(1,3,2) = lhs_(1,3,2) - coeff__2 * lhs_(2,3,2)
|
||||
lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(2,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(2,5,2)
|
||||
rhs_(1) = rhs_(1) - coeff__2 * rhs_(2)
|
||||
coeff__2 = lhs_(3,2,2)
|
||||
lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(2,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(2,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(2,5,2)
|
||||
rhs_(3) = rhs_(3) - coeff__2 * rhs_(2)
|
||||
coeff__2 = lhs_(4,2,2)
|
||||
lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(2,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(2,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(2,5,2)
|
||||
rhs_(4) = rhs_(4) - coeff__2 * rhs_(2)
|
||||
coeff__2 = lhs_(5,2,2)
|
||||
lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(2,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(2,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(2,5,2)
|
||||
rhs_(5) = rhs_(5) - coeff__2 * rhs_(2)
|
||||
pivot__3 = 1.00d0 / lhs_(3,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) * pivot__3
|
||||
lhs_(3,5,2) = lhs_(3,5,2) * pivot__3
|
||||
rhs_(3) = rhs_(3) * pivot__3
|
||||
coeff__2 = lhs_(1,3,2)
|
||||
lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(3,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(3,5,2)
|
||||
rhs_(1) = rhs_(1) - coeff__2 * rhs_(3)
|
||||
coeff__2 = lhs_(2,3,2)
|
||||
lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(3,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(3,5,2)
|
||||
rhs_(2) = rhs_(2) - coeff__2 * rhs_(3)
|
||||
coeff__2 = lhs_(4,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(3,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(3,5,2)
|
||||
rhs_(4) = rhs_(4) - coeff__2 * rhs_(3)
|
||||
coeff__2 = lhs_(5,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(3,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(3,5,2)
|
||||
rhs_(5) = rhs_(5) - coeff__2 * rhs_(3)
|
||||
pivot__3 = 1.00d0 / lhs_(4,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) * pivot__3
|
||||
rhs_(4) = rhs_(4) * pivot__3
|
||||
coeff__2 = lhs_(1,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(4,5,2)
|
||||
rhs_(1) = rhs_(1) - coeff__2 * rhs_(4)
|
||||
coeff__2 = lhs_(2,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(4,5,2)
|
||||
rhs_(2) = rhs_(2) - coeff__2 * rhs_(4)
|
||||
coeff__2 = lhs_(3,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(4,5,2)
|
||||
rhs_(3) = rhs_(3) - coeff__2 * rhs_(4)
|
||||
coeff__2 = lhs_(5,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(4,5,2)
|
||||
rhs_(5) = rhs_(5) - coeff__2 * rhs_(4)
|
||||
pivot__3 = 1.00d0 / lhs_(5,5,2)
|
||||
rhs_(5) = rhs_(5) * pivot__3
|
||||
coeff__2 = lhs_(1,5,2)
|
||||
rhs_(1) = rhs_(1) - coeff__2 * rhs_(5)
|
||||
coeff__2 = lhs_(2,5,2)
|
||||
rhs_(2) = rhs_(2) - coeff__2 * rhs_(5)
|
||||
coeff__2 = lhs_(3,5,2)
|
||||
rhs_(3) = rhs_(3) - coeff__2 * rhs_(5)
|
||||
coeff__2 = lhs_(4,5,2)
|
||||
rhs_(4) = rhs_(4) - coeff__2 * rhs_(5)
|
||||
|
||||
! endif
|
||||
do m = 1,5
|
||||
rhs(m,i,j,k) = rhs_(m)
|
||||
enddo
|
||||
|
||||
! enddo
|
||||
do j = problem_size - 2,0,(-(1))
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs(m,i,j,k)
|
||||
enddo
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - lhs__(m,1,i,j,k1) * rhs(1,i,j + 1,k)
|
||||
rhs_(m) = rhs_(m) - lhs__(m,2,i,j,k1) * rhs(2,i,j + 1,k)
|
||||
rhs_(m) = rhs_(m) - lhs__(m,3,i,j,k1) * rhs(3,i,j + 1,k)
|
||||
rhs_(m) = rhs_(m) - lhs__(m,4,i,j,k1) * rhs(4,i,j + 1,k)
|
||||
rhs_(m) = rhs_(m) - lhs__(m,5,i,j,k1) * rhs(5,i,j + 1,k)
|
||||
enddo
|
||||
do m = 1,5
|
||||
rhs(m,i,j,k) = rhs_(m)
|
||||
enddo
|
||||
enddo
|
||||
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!DVM$ end region
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,634 @@
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! performs guaussian elimination on this cell.
|
||||
!
|
||||
! assumes that unpacking routines for non-first cells
|
||||
! preload C' and rhs' from previous cell.
|
||||
!
|
||||
! assumed send happens outside this routine, but that
|
||||
! c'(JMAX) and rhs'(JMAX) will be sent to next cell
|
||||
!---------------------------------------------------------------------
|
||||
subroutine y_solve ()
|
||||
|
||||
include 'header3d.h'
|
||||
double precision coeff
|
||||
double precision pivot
|
||||
integer i__0
|
||||
integer j__1,m,n
|
||||
double precision coeff__2
|
||||
double precision pivot__3
|
||||
double precision lhs_(5,5,3),rhs_(5),rhsp_(5),u_(0:3,5)
|
||||
integer i,j,k,jsize,jstart
|
||||
jstart = 0
|
||||
jsize = problem_size - 1
|
||||
|
||||
!DVM$ region local(lhs__)
|
||||
!DVM$ PARALLEL (k,j,i) ON rhs(*,i,j,k), private(u_,rhs_,pivot,
|
||||
!DVM$&coeff,tmp1, tmp2, tmp3, t1, t2, t3, tm1, tm2, tm3, tmp11,
|
||||
!DVM$&tmp22, i__0, j__1,lhs_,coeff__2,pivot__3,m,n),stage(stage_n),
|
||||
!DVM$& ACROSS(rhs(0:0,0:0,1:0,0:0),lhs__(0:0,0:0,0:0,1:0,0:0))
|
||||
do k = 1,problem_size - 2
|
||||
do j = 1,jsize - 1
|
||||
do i = 1,problem_size - 2
|
||||
if(j .ne. jsize) then
|
||||
|
||||
do m = 1,5
|
||||
lhs_(m,1,3) = lhs__(m,1,i,j-1,k)
|
||||
lhs_(m,2,3) = lhs__(m,2,i,j-1,k)
|
||||
lhs_(m,3,3) = lhs__(m,3,i,j-1,k)
|
||||
lhs_(m,4,3) = lhs__(m,4,i,j-1,k)
|
||||
lhs_(m,5,3) = lhs__(m,5,i,j-1,k)
|
||||
|
||||
u_(0,m) = u(m,i,j-1,k)
|
||||
u_(1,m) = u(m,i,j,k)
|
||||
u_(2,m) = u(m,i,j+1,k)
|
||||
enddo
|
||||
|
||||
tmp1 = 1.0d+00 / u_(1,1)
|
||||
tmp2 = tmp1 * tmp1
|
||||
tmp3 = tmp1 * tmp2
|
||||
t1 = 1.0d+00 / u_(0,1)
|
||||
t2 = t1 * t1
|
||||
t3 = t1 * t2
|
||||
tm1 = 1.0d+00 / u_(2,1)
|
||||
tm2 = tm1 * tm1
|
||||
tm3 = tm1 * tm2
|
||||
tmp11 = dt * ty1
|
||||
tmp22 = dt * ty2
|
||||
lhs_(1,1,1) = (-(tmp11)) * dy1
|
||||
lhs_(1,2,1) = 0.
|
||||
lhs_(1,3,1) = (-(tmp22))
|
||||
lhs_(1,4,1) = 0.
|
||||
lhs_(1,5,1) = 0.
|
||||
lhs_(2,1,1) = (-(tmp22)) * ((-(u_(0,2) * u_(0,3))) * t2)
|
||||
&- tmp11 * ((-(c3c4)) * t2 * u_(0,2))
|
||||
lhs_(2,2,1) = (-(tmp22)) * u_(0,3) * t1 - tmp11 * c3c4 *
|
||||
&t1 - tmp11 * dy2
|
||||
lhs_(2,3,1) = (-(tmp22)) * u_(0,2) * t1
|
||||
lhs_(2,4,1) = 0.
|
||||
lhs_(2,5,1) = 0.
|
||||
lhs_(3,1,1) = (-(tmp22)) * ((-(u_(0,3) * u_(0,3) * t2)) +
|
||||
& 0.50d+00 * c2 * ((u_(0,2) * u_(0,2) + u_(0,3) * u_(0,3) + u_(0,4)
|
||||
& * u_(0,4)) * t2)) - tmp11 * ((-(con43)) * c3c4 * t2 * u_(0,3))
|
||||
lhs_(3,2,1) = (-(tmp22)) * ((-(c2)) * u_(0,2) * t1)
|
||||
lhs_(3,3,1) = (-(tmp22)) * ((2.0d+00 - c2) * u_(0,3) * t1
|
||||
&) - tmp11 * con43 * c3c4 * t1 - tmp11 * dy3
|
||||
lhs_(3,4,1) = (-(tmp22)) * ((-(c2)) * u_(0,4) * t1)
|
||||
lhs_(3,5,1) = (-(tmp22)) * c2
|
||||
lhs_(4,1,1) = (-(tmp22)) * ((-(u_(0,3) * u_(0,4))) * t2)
|
||||
&- tmp11 * ((-(c3c4)) * t2 * u_(0,4))
|
||||
lhs_(4,2,1) = 0.
|
||||
lhs_(4,3,1) = (-(tmp22)) * u_(0,4) * t1
|
||||
lhs_(4,4,1) = (-(tmp22)) * u_(0,3) * t1 - tmp11 * c3c4 *
|
||||
&t1 - tmp11 * dy4
|
||||
lhs_(4,5,1) = 0.
|
||||
lhs_(5,1,1) = (-(tmp22)) * ((c2 * (u_(0,2) * u_(0,2) + u_
|
||||
&(0,3) * u_(0,3) + u_(0,4) * u_(0,4)) * t2 - c1 * u_(0,5) * t1) * u
|
||||
&_(0,3) * t1) - tmp11 * ((-(c3c4 - c1345)) * t3 * u_(0,2)** 2 - (co
|
||||
&n43 * c3c4 - c1345) * t3 * u_(0,3)** 2 - (c3c4 - c1345) * t3 * u_(
|
||||
&0,4)** 2 - c1345 * t2 * u_(0,5))
|
||||
lhs_(5,2,1) = (-(tmp22)) * ((-(c2)) * u_(0,2) * u_(0,3) *
|
||||
& t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,2)
|
||||
lhs_(5,3,1) = (-(tmp22)) * (c1 * u_(0,5) * t1 - 0.50d+00
|
||||
&* c2 * ((u_(0,2) * u_(0,2) + 3.0d+00 * u_(0,3) * u_(0,3) + u_(0,4)
|
||||
& * u_(0,4)) * t2)) - tmp11 * (con43 * c3c4 - c1345) * t2 * u_(0,3)
|
||||
lhs_(5,4,1) = (-(tmp22)) * ((-(c2)) * (u_(0,3) * u_(0,4))
|
||||
& * t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,4)
|
||||
lhs_(5,5,1) = (-(tmp22)) * c1 * u_(0,3) * t1 - tmp11 * c1
|
||||
&345 * t1 - tmp11 * dy5
|
||||
lhs_(1,1,2) = 1.0d+00 + tmp11 * 2.0d+00 * dy1
|
||||
lhs_(1,2,2) = 0.
|
||||
lhs_(1,3,2) = 0.
|
||||
lhs_(1,4,2) = 0.
|
||||
lhs_(1,5,2) = 0.
|
||||
lhs_(2,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1,
|
||||
&2))
|
||||
lhs_(2,2,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t
|
||||
&mp11 * 2.0d+00 * dy2
|
||||
lhs_(2,3,2) = 0.
|
||||
lhs_(2,4,2) = 0.
|
||||
lhs_(2,5,2) = 0.
|
||||
lhs_(3,1,2) = tmp11 * 2.0d+00 * ((-(con43)) * c3c4 * tmp2
|
||||
& * u_(1,3))
|
||||
lhs_(3,2,2) = 0.
|
||||
lhs_(3,3,2) = 1.0d+00 + tmp11 * 2.0d+00 * con43 * c3c4 *
|
||||
&tmp1 + tmp11 * 2.0d+00 * dy3
|
||||
lhs_(3,4,2) = 0.
|
||||
lhs_(3,5,2) = 0.
|
||||
lhs_(4,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1,
|
||||
&4))
|
||||
lhs_(4,2,2) = 0.
|
||||
lhs_(4,3,2) = 0.
|
||||
lhs_(4,4,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t
|
||||
&mp11 * 2.0d+00 * dy4
|
||||
lhs_(4,5,2) = 0.
|
||||
lhs_(5,1,2) = tmp11 * 2.0d+00 * ((-(c3c4 - c1345)) * tmp3
|
||||
& * u_(1,2)** 2 - (con43 * c3c4 - c1345) * tmp3 * u_(1,3)** 2 - (c3
|
||||
&c4 - c1345) * tmp3 * u_(1,4)** 2 - c1345 * tmp2 * u_(1,5))
|
||||
lhs_(5,2,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u
|
||||
&_(1,2)
|
||||
lhs_(5,3,2) = tmp11 * 2.0d+00 * (con43 * c3c4 - c1345) *
|
||||
&tmp2 * u_(1,3)
|
||||
lhs_(5,4,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u
|
||||
&_(1,4)
|
||||
lhs_(5,5,2) = 1.0d+00 + tmp11 * 2.0d+00 * c1345 * tmp1 +
|
||||
&tmp11 * 2.0d+00 * dy5
|
||||
if (j .ne. 1) then
|
||||
do j__1 = 1,5
|
||||
lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
enddo
|
||||
endif
|
||||
lhs_(1,1,3) = (-(tmp11)) * dy1
|
||||
lhs_(1,2,3) = 0.
|
||||
lhs_(1,3,3) = tmp22
|
||||
lhs_(1,4,3) = 0.
|
||||
lhs_(1,5,3) = 0.
|
||||
lhs_(2,1,3) = tmp22 * ((-(u_(2,2) * u_(2,3))) * tm2) - tm
|
||||
&p11 * ((-(c3c4)) * tm2 * u_(2,2))
|
||||
lhs_(2,2,3) = tmp22 * u_(2,3) * tm1 - tmp11 * c3c4 * tm1
|
||||
&- tmp11 * dy2
|
||||
lhs_(2,3,3) = tmp22 * u_(2,2) * tm1
|
||||
lhs_(2,4,3) = 0.
|
||||
lhs_(2,5,3) = 0.
|
||||
lhs_(3,1,3) = tmp22 * ((-(u_(2,3) * u_(2,3) * tm2)) + 0.5
|
||||
&0d+00 * c2 * ((u_(2,2) * u_(2,2) + u_(2,3) * u_(2,3) + u_(2,4) * u
|
||||
&_(2,4)) * tm2)) - tmp11 * ((-(con43)) * c3c4 * tm2 * u_(2,3))
|
||||
lhs_(3,2,3) = tmp22 * ((-(c2)) * u_(2,2) * tm1)
|
||||
lhs_(3,3,3) = tmp22 * ((2.0d+00 - c2) * u_(2,3) * tm1) -
|
||||
&tmp11 * con43 * c3c4 * tm1 - tmp11 * dy3
|
||||
lhs_(3,4,3) = tmp22 * ((-(c2)) * u_(2,4) * tm1)
|
||||
lhs_(3,5,3) = tmp22 * c2
|
||||
lhs_(4,1,3) = tmp22 * ((-(u_(2,3) * u_(2,4))) * tm2) - tm
|
||||
&p11 * ((-(c3c4)) * tm2 * u_(2,4))
|
||||
lhs_(4,2,3) = 0.
|
||||
lhs_(4,3,3) = tmp22 * u_(2,4) * tm1
|
||||
lhs_(4,4,3) = tmp22 * u_(2,3) * tm1 - tmp11 * c3c4 * tm1
|
||||
&- tmp11 * dy4
|
||||
lhs_(4,5,3) = 0.
|
||||
lhs_(5,1,3) = tmp22 * ((c2 * (u_(2,2) * u_(2,2) + u_(2,3)
|
||||
& * u_(2,3) + u_(2,4) * u_(2,4)) * tm2 - c1 * u_(2,5) * tm1) * u_(2
|
||||
&,3) * tm1) - tmp11 * ((-(c3c4 - c1345)) * tm3 * u_(2,2)** 2 - (con
|
||||
&43 * c3c4 - c1345) * tm3 * u_(2,3)** 2 - (c3c4 - c1345) * tm3 * u_
|
||||
&(2,4)** 2 - c1345 * tm2 * u_(2,5))
|
||||
lhs_(5,2,3) = tmp22 * ((-(c2)) * u_(2,2) * u_(2,3) * tm2)
|
||||
& - tmp11 * (c3c4 - c1345) * tm2 * u_(2,2)
|
||||
lhs_(5,3,3) = tmp22 * (c1 * u_(2,5) * tm1 - 0.50d+00 * c2
|
||||
& * ((u_(2,2) * u_(2,2) + 3.0d+00 * u_(2,3) * u_(2,3) + u_(2,4) * u
|
||||
&_(2,4)) * tm2)) - tmp11 * (con43 * c3c4 - c1345) * tm2 * u_(2,3)
|
||||
lhs_(5,4,3) = tmp22 * ((-(c2)) * (u_(2,3) * u_(2,4)) * tm
|
||||
&2) - tmp11 * (c3c4 - c1345) * tm2 * u_(2,4)
|
||||
lhs_(5,5,3) = tmp22 * c1 * u_(2,3) * tm1 - tmp11 * c1345
|
||||
&* tm1 - tmp11 * dy5
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs(m,i,j,k)
|
||||
enddo
|
||||
do i__0 = 1,5
|
||||
rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i,j -
|
||||
& 1,k) - lhs_(i__0,2,1) * rhs(2,i,j - 1,k) - lhs_(i__0,3,1) * rhs(3
|
||||
&,i,j - 1,k) - lhs_(i__0,4,1) * rhs(4,i,j - 1,k) - lhs_(i__0,5,1) *
|
||||
& rhs(5,i,j - 1,k)
|
||||
enddo
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!
|
||||
!---------------------------------------------------------------------
|
||||
pivot = 1.00d0 / lhs_(1,1,2)
|
||||
lhs_(1,2,2) = lhs_(1,2,2) * pivot
|
||||
lhs_(1,3,2) = lhs_(1,3,2) * pivot
|
||||
lhs_(1,4,2) = lhs_(1,4,2) * pivot
|
||||
lhs_(1,5,2) = lhs_(1,5,2) * pivot
|
||||
lhs_(1,1,3) = lhs_(1,1,3) * pivot
|
||||
lhs_(1,2,3) = lhs_(1,2,3) * pivot
|
||||
lhs_(1,3,3) = lhs_(1,3,3) * pivot
|
||||
lhs_(1,4,3) = lhs_(1,4,3) * pivot
|
||||
lhs_(1,5,3) = lhs_(1,5,3) * pivot
|
||||
rhs_(1) = rhs_(1) * pivot
|
||||
coeff = lhs_(2,1,2)
|
||||
lhs_(2,2,2) = lhs_(2,2,2) - coeff * lhs_(1,2,2)
|
||||
lhs_(2,3,2) = lhs_(2,3,2) - coeff * lhs_(1,3,2)
|
||||
lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(1,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(1,5,2)
|
||||
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(1,1,3)
|
||||
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(1,2,3)
|
||||
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(1,3,3)
|
||||
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(1,4,3)
|
||||
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(1,5,3)
|
||||
rhs_(2) = rhs_(2) - coeff * rhs_(1)
|
||||
coeff = lhs_(3,1,2)
|
||||
lhs_(3,2,2) = lhs_(3,2,2) - coeff * lhs_(1,2,2)
|
||||
lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(1,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(1,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(1,5,2)
|
||||
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(1,1,3)
|
||||
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(1,2,3)
|
||||
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(1,3,3)
|
||||
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(1,4,3)
|
||||
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(1,5,3)
|
||||
rhs_(3) = rhs_(3) - coeff * rhs_(1)
|
||||
coeff = lhs_(4,1,2)
|
||||
lhs_(4,2,2) = lhs_(4,2,2) - coeff * lhs_(1,2,2)
|
||||
lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(1,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(1,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(1,5,2)
|
||||
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(1,1,3)
|
||||
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(1,2,3)
|
||||
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(1,3,3)
|
||||
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(1,4,3)
|
||||
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(1,5,3)
|
||||
rhs_(4) = rhs_(4) - coeff * rhs_(1)
|
||||
coeff = lhs_(5,1,2)
|
||||
lhs_(5,2,2) = lhs_(5,2,2) - coeff * lhs_(1,2,2)
|
||||
lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(1,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(1,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(1,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(1,1,3)
|
||||
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(1,2,3)
|
||||
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(1,3,3)
|
||||
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(1,4,3)
|
||||
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(1,5,3)
|
||||
rhs_(5) = rhs_(5) - coeff * rhs_(1)
|
||||
pivot = 1.00d0 / lhs_(2,2,2)
|
||||
lhs_(2,3,2) = lhs_(2,3,2) * pivot
|
||||
lhs_(2,4,2) = lhs_(2,4,2) * pivot
|
||||
lhs_(2,5,2) = lhs_(2,5,2) * pivot
|
||||
lhs_(2,1,3) = lhs_(2,1,3) * pivot
|
||||
lhs_(2,2,3) = lhs_(2,2,3) * pivot
|
||||
lhs_(2,3,3) = lhs_(2,3,3) * pivot
|
||||
lhs_(2,4,3) = lhs_(2,4,3) * pivot
|
||||
lhs_(2,5,3) = lhs_(2,5,3) * pivot
|
||||
rhs_(2) = rhs_(2) * pivot
|
||||
coeff = lhs_(1,2,2)
|
||||
lhs_(1,3,2) = lhs_(1,3,2) - coeff * lhs_(2,3,2)
|
||||
lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(2,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(2,5,2)
|
||||
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(2,1,3)
|
||||
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(2,2,3)
|
||||
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(2,3,3)
|
||||
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(2,4,3)
|
||||
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(2,5,3)
|
||||
rhs_(1) = rhs_(1) - coeff * rhs_(2)
|
||||
coeff = lhs_(3,2,2)
|
||||
lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(2,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(2,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(2,5,2)
|
||||
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(2,1,3)
|
||||
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(2,2,3)
|
||||
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(2,3,3)
|
||||
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(2,4,3)
|
||||
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(2,5,3)
|
||||
rhs_(3) = rhs_(3) - coeff * rhs_(2)
|
||||
coeff = lhs_(4,2,2)
|
||||
lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(2,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(2,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(2,5,2)
|
||||
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(2,1,3)
|
||||
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(2,2,3)
|
||||
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(2,3,3)
|
||||
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(2,4,3)
|
||||
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(2,5,3)
|
||||
rhs_(4) = rhs_(4) - coeff * rhs_(2)
|
||||
coeff = lhs_(5,2,2)
|
||||
lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(2,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(2,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(2,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(2,1,3)
|
||||
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(2,2,3)
|
||||
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(2,3,3)
|
||||
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(2,4,3)
|
||||
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(2,5,3)
|
||||
rhs_(5) = rhs_(5) - coeff * rhs_(2)
|
||||
pivot = 1.00d0 / lhs_(3,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) * pivot
|
||||
lhs_(3,5,2) = lhs_(3,5,2) * pivot
|
||||
lhs_(3,1,3) = lhs_(3,1,3) * pivot
|
||||
lhs_(3,2,3) = lhs_(3,2,3) * pivot
|
||||
lhs_(3,3,3) = lhs_(3,3,3) * pivot
|
||||
lhs_(3,4,3) = lhs_(3,4,3) * pivot
|
||||
lhs_(3,5,3) = lhs_(3,5,3) * pivot
|
||||
rhs_(3) = rhs_(3) * pivot
|
||||
coeff = lhs_(1,3,2)
|
||||
lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(3,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(3,5,2)
|
||||
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(3,1,3)
|
||||
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(3,2,3)
|
||||
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(3,3,3)
|
||||
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(3,4,3)
|
||||
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(3,5,3)
|
||||
rhs_(1) = rhs_(1) - coeff * rhs_(3)
|
||||
coeff = lhs_(2,3,2)
|
||||
lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(3,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(3,5,2)
|
||||
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(3,1,3)
|
||||
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(3,2,3)
|
||||
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(3,3,3)
|
||||
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(3,4,3)
|
||||
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(3,5,3)
|
||||
rhs_(2) = rhs_(2) - coeff * rhs_(3)
|
||||
coeff = lhs_(4,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(3,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(3,5,2)
|
||||
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(3,1,3)
|
||||
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(3,2,3)
|
||||
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(3,3,3)
|
||||
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(3,4,3)
|
||||
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(3,5,3)
|
||||
rhs_(4) = rhs_(4) - coeff * rhs_(3)
|
||||
coeff = lhs_(5,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(3,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(3,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(3,1,3)
|
||||
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(3,2,3)
|
||||
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(3,3,3)
|
||||
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(3,4,3)
|
||||
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(3,5,3)
|
||||
rhs_(5) = rhs_(5) - coeff * rhs_(3)
|
||||
pivot = 1.00d0 / lhs_(4,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) * pivot
|
||||
lhs_(4,1,3) = lhs_(4,1,3) * pivot
|
||||
lhs_(4,2,3) = lhs_(4,2,3) * pivot
|
||||
lhs_(4,3,3) = lhs_(4,3,3) * pivot
|
||||
lhs_(4,4,3) = lhs_(4,4,3) * pivot
|
||||
lhs_(4,5,3) = lhs_(4,5,3) * pivot
|
||||
rhs_(4) = rhs_(4) * pivot
|
||||
coeff = lhs_(1,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(4,5,2)
|
||||
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(4,1,3)
|
||||
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(4,2,3)
|
||||
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(4,3,3)
|
||||
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(4,4,3)
|
||||
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(4,5,3)
|
||||
rhs_(1) = rhs_(1) - coeff * rhs_(4)
|
||||
coeff = lhs_(2,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(4,5,2)
|
||||
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(4,1,3)
|
||||
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(4,2,3)
|
||||
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(4,3,3)
|
||||
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(4,4,3)
|
||||
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(4,5,3)
|
||||
rhs_(2) = rhs_(2) - coeff * rhs_(4)
|
||||
coeff = lhs_(3,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(4,5,2)
|
||||
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(4,1,3)
|
||||
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(4,2,3)
|
||||
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(4,3,3)
|
||||
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(4,4,3)
|
||||
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(4,5,3)
|
||||
rhs_(3) = rhs_(3) - coeff * rhs_(4)
|
||||
coeff = lhs_(5,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(4,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(4,1,3)
|
||||
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(4,2,3)
|
||||
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(4,3,3)
|
||||
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(4,4,3)
|
||||
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(4,5,3)
|
||||
rhs_(5) = rhs_(5) - coeff * rhs_(4)
|
||||
pivot = 1.00d0 / lhs_(5,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) * pivot
|
||||
lhs_(5,2,3) = lhs_(5,2,3) * pivot
|
||||
lhs_(5,3,3) = lhs_(5,3,3) * pivot
|
||||
lhs_(5,4,3) = lhs_(5,4,3) * pivot
|
||||
lhs_(5,5,3) = lhs_(5,5,3) * pivot
|
||||
rhs_(5) = rhs_(5) * pivot
|
||||
coeff = lhs_(1,5,2)
|
||||
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(5,1,3)
|
||||
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(5,2,3)
|
||||
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(5,3,3)
|
||||
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(5,4,3)
|
||||
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(5,5,3)
|
||||
rhs_(1) = rhs_(1) - coeff * rhs_(5)
|
||||
coeff = lhs_(2,5,2)
|
||||
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(5,1,3)
|
||||
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(5,2,3)
|
||||
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(5,3,3)
|
||||
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(5,4,3)
|
||||
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(5,5,3)
|
||||
rhs_(2) = rhs_(2) - coeff * rhs_(5)
|
||||
coeff = lhs_(3,5,2)
|
||||
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(5,1,3)
|
||||
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(5,2,3)
|
||||
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(5,3,3)
|
||||
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(5,4,3)
|
||||
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(5,5,3)
|
||||
rhs_(3) = rhs_(3) - coeff * rhs_(5)
|
||||
coeff = lhs_(4,5,2)
|
||||
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(5,1,3)
|
||||
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(5,2,3)
|
||||
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(5,3,3)
|
||||
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(5,4,3)
|
||||
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(5,5,3)
|
||||
rhs_(4) = rhs_(4) - coeff * rhs_(5)
|
||||
do i__0 = 1,5
|
||||
lhs__(i__0,1,i,j,k) = lhs_(i__0,1,3)
|
||||
lhs__(i__0,2,i,j,k) = lhs_(i__0,2,3)
|
||||
lhs__(i__0,3,i,j,k) = lhs_(i__0,3,3)
|
||||
lhs__(i__0,4,i,j,k) = lhs_(i__0,4,3)
|
||||
lhs__(i__0,5,i,j,k) = lhs_(i__0,5,3)
|
||||
enddo
|
||||
do m = 1,5
|
||||
rhs(m,i,j,k) = rhs_(m)
|
||||
enddo
|
||||
! enddo
|
||||
|
||||
else ! ******************* else case *************************
|
||||
do n = 1,5
|
||||
lhs_(1,n,1) = 0.0d0
|
||||
lhs_(1,n,2) = 0.0d0
|
||||
lhs_(1,n,3) = 0.0d0
|
||||
lhs_(2,n,1) = 0.0d0
|
||||
lhs_(2,n,2) = 0.0d0
|
||||
lhs_(2,n,3) = 0.0d0
|
||||
lhs_(3,n,1) = 0.0d0
|
||||
lhs_(3,n,2) = 0.0d0
|
||||
lhs_(3,n,3) = 0.0d0
|
||||
lhs_(4,n,1) = 0.0d0
|
||||
lhs_(4,n,2) = 0.0d0
|
||||
lhs_(4,n,3) = 0.0d0
|
||||
lhs_(5,n,1) = 0.0d0
|
||||
lhs_(5,n,2) = 0.0d0
|
||||
lhs_(5,n,3) = 0.0d0
|
||||
enddo
|
||||
do m = 1,5
|
||||
lhs_(m,m,2) = 1.0d0
|
||||
enddo
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs(m,i,j,k)
|
||||
enddo
|
||||
do i__0 = 1,5
|
||||
rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i,jsize
|
||||
&- 1,k) - lhs_(i__0,2,1) * rhs(2,i,jsize - 1,k) - lhs_(i__0,3,1) *
|
||||
&rhs(3,i,jsize - 1,k) - lhs_(i__0,4,1) * rhs(4,i,jsize - 1,k) - lhs
|
||||
&_(i__0,5,1) * rhs(5,i,jsize - 1,k)
|
||||
enddo
|
||||
do j__1 = 1,5
|
||||
lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1,3)
|
||||
lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1,3)
|
||||
lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1,3)
|
||||
lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1,3)
|
||||
lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1,3)
|
||||
enddo
|
||||
pivot__3 = 1.00d0 / lhs_(1,1,2)
|
||||
lhs_(1,2,2) = lhs_(1,2,2) * pivot__3
|
||||
lhs_(1,3,2) = lhs_(1,3,2) * pivot__3
|
||||
lhs_(1,4,2) = lhs_(1,4,2) * pivot__3
|
||||
lhs_(1,5,2) = lhs_(1,5,2) * pivot__3
|
||||
rhs_(1) = rhs_(1) * pivot__3
|
||||
coeff__2 = lhs_(2,1,2)
|
||||
lhs_(2,2,2) = lhs_(2,2,2) - coeff__2 * lhs_(1,2,2)
|
||||
lhs_(2,3,2) = lhs_(2,3,2) - coeff__2 * lhs_(1,3,2)
|
||||
lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(1,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(1,5,2)
|
||||
rhs_(2) = rhs_(2) - coeff__2 * rhs_(1)
|
||||
coeff__2 = lhs_(3,1,2)
|
||||
lhs_(3,2,2) = lhs_(3,2,2) - coeff__2 * lhs_(1,2,2)
|
||||
lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(1,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(1,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(1,5,2)
|
||||
rhs_(3) = rhs_(3) - coeff__2 * rhs_(1)
|
||||
coeff__2 = lhs_(4,1,2)
|
||||
lhs_(4,2,2) = lhs_(4,2,2) - coeff__2 * lhs_(1,2,2)
|
||||
lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(1,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(1,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(1,5,2)
|
||||
rhs_(4) = rhs_(4) - coeff__2 * rhs_(1)
|
||||
coeff__2 = lhs_(5,1,2)
|
||||
lhs_(5,2,2) = lhs_(5,2,2) - coeff__2 * lhs_(1,2,2)
|
||||
lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(1,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(1,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(1,5,2)
|
||||
rhs_(5) = rhs_(5) - coeff__2 * rhs_(1)
|
||||
pivot__3 = 1.00d0 / lhs_(2,2,2)
|
||||
lhs_(2,3,2) = lhs_(2,3,2) * pivot__3
|
||||
lhs_(2,4,2) = lhs_(2,4,2) * pivot__3
|
||||
lhs_(2,5,2) = lhs_(2,5,2) * pivot__3
|
||||
rhs_(2) = rhs_(2) * pivot__3
|
||||
coeff__2 = lhs_(1,2,2)
|
||||
lhs_(1,3,2) = lhs_(1,3,2) - coeff__2 * lhs_(2,3,2)
|
||||
lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(2,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(2,5,2)
|
||||
rhs_(1) = rhs_(1) - coeff__2 * rhs_(2)
|
||||
coeff__2 = lhs_(3,2,2)
|
||||
lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(2,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(2,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(2,5,2)
|
||||
rhs_(3) = rhs_(3) - coeff__2 * rhs_(2)
|
||||
coeff__2 = lhs_(4,2,2)
|
||||
lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(2,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(2,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(2,5,2)
|
||||
rhs_(4) = rhs_(4) - coeff__2 * rhs_(2)
|
||||
coeff__2 = lhs_(5,2,2)
|
||||
lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(2,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(2,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(2,5,2)
|
||||
rhs_(5) = rhs_(5) - coeff__2 * rhs_(2)
|
||||
pivot__3 = 1.00d0 / lhs_(3,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) * pivot__3
|
||||
lhs_(3,5,2) = lhs_(3,5,2) * pivot__3
|
||||
rhs_(3) = rhs_(3) * pivot__3
|
||||
coeff__2 = lhs_(1,3,2)
|
||||
lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(3,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(3,5,2)
|
||||
rhs_(1) = rhs_(1) - coeff__2 * rhs_(3)
|
||||
coeff__2 = lhs_(2,3,2)
|
||||
lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(3,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(3,5,2)
|
||||
rhs_(2) = rhs_(2) - coeff__2 * rhs_(3)
|
||||
coeff__2 = lhs_(4,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(3,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(3,5,2)
|
||||
rhs_(4) = rhs_(4) - coeff__2 * rhs_(3)
|
||||
coeff__2 = lhs_(5,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(3,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(3,5,2)
|
||||
rhs_(5) = rhs_(5) - coeff__2 * rhs_(3)
|
||||
pivot__3 = 1.00d0 / lhs_(4,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) * pivot__3
|
||||
rhs_(4) = rhs_(4) * pivot__3
|
||||
coeff__2 = lhs_(1,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(4,5,2)
|
||||
rhs_(1) = rhs_(1) - coeff__2 * rhs_(4)
|
||||
coeff__2 = lhs_(2,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(4,5,2)
|
||||
rhs_(2) = rhs_(2) - coeff__2 * rhs_(4)
|
||||
coeff__2 = lhs_(3,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(4,5,2)
|
||||
rhs_(3) = rhs_(3) - coeff__2 * rhs_(4)
|
||||
coeff__2 = lhs_(5,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(4,5,2)
|
||||
rhs_(5) = rhs_(5) - coeff__2 * rhs_(4)
|
||||
pivot__3 = 1.00d0 / lhs_(5,5,2)
|
||||
rhs_(5) = rhs_(5) * pivot__3
|
||||
coeff__2 = lhs_(1,5,2)
|
||||
rhs_(1) = rhs_(1) - coeff__2 * rhs_(5)
|
||||
coeff__2 = lhs_(2,5,2)
|
||||
rhs_(2) = rhs_(2) - coeff__2 * rhs_(5)
|
||||
coeff__2 = lhs_(3,5,2)
|
||||
rhs_(3) = rhs_(3) - coeff__2 * rhs_(5)
|
||||
coeff__2 = lhs_(4,5,2)
|
||||
rhs_(4) = rhs_(4) - coeff__2 * rhs_(5)
|
||||
|
||||
endif
|
||||
do m = 1,5
|
||||
rhs(m,i,j,k) = rhs_(m)
|
||||
enddo
|
||||
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
|
||||
!DVM$ PARALLEL (k,j,i) ON rhs(*,i,j,k), private(m, rhs_,rhsp_)
|
||||
!DVM$& ,ACROSS(rhs(0:0,0:0,0:1,0:0)),stage(stage_n)
|
||||
do k = 1,problem_size - 2
|
||||
do j = problem_size - 2,0,(-(1))
|
||||
do i = 1,problem_size - 2
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs(m,i,j,k)
|
||||
rhsp_(m) = rhs(m,i,j+1,k)
|
||||
enddo
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - lhs__(m,1,i,j,k) * rhsp_(1)
|
||||
rhs_(m) = rhs_(m) - lhs__(m,2,i,j,k) * rhsp_(2)
|
||||
rhs_(m) = rhs_(m) - lhs__(m,3,i,j,k) * rhsp_(3)
|
||||
rhs_(m) = rhs_(m) - lhs__(m,4,i,j,k) * rhsp_(4)
|
||||
rhs_(m) = rhs_(m) - lhs__(m,5,i,j,k) * rhsp_(5)
|
||||
enddo
|
||||
do m = 1,5
|
||||
rhs(m,i,j,k) = rhs_(m)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!DVM$ end region
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,623 @@
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! performs guaussian elimination on this cell.
|
||||
!
|
||||
! assumes that unpacking routines for non-first cells
|
||||
! preload C' and rhs' from previous cell.
|
||||
!
|
||||
! assumed send happens outside this routine, but that
|
||||
! c'(KMAX) and rhs'(KMAX) will be sent to next cell.
|
||||
!---------------------------------------------------------------------
|
||||
subroutine z_solve ()
|
||||
|
||||
include 'header3d.h'
|
||||
double precision coeff
|
||||
double precision pivot
|
||||
integer i__0
|
||||
integer j__1,m,n
|
||||
double precision coeff__2
|
||||
double precision pivot__3
|
||||
double precision lhs_(5,5,3),rhs_(5),rhsp_(5),u_(0:3,5)
|
||||
integer i,j,k,ksize, k1
|
||||
ksize = problem_size - 1
|
||||
|
||||
!DVM$ region local(lhs__)
|
||||
!DVM$ PARALLEL (j,i) ON rhs(*,i,j,*), private(k,u_,rhs_,pivot,
|
||||
!DVM$& coeff, tmp1, tmp2, tmp3, t1, t2, t3, tm1, tm2, tm3,rhsp_,
|
||||
!DVM$& tmp11, tmp22, i__0, j__1,lhs_,n,m,pivot__3,coeff__2)
|
||||
do j = 1,problem_size - 2
|
||||
do i = 1,problem_size - 2
|
||||
do m = 1,5
|
||||
u_(0,m) = u(m,i,j,0)
|
||||
u_(1,m) = u(m,i,j,1)
|
||||
enddo
|
||||
do k = 1,ksize - 1
|
||||
do m = 1,5
|
||||
u_(2,m) = u(m,i,j,k + 1)
|
||||
enddo
|
||||
tmp1 = 1.0d+00 / u_(1,1)
|
||||
tmp2 = tmp1 * tmp1
|
||||
tmp3 = tmp1 * tmp2
|
||||
t1 = 1.0d+00 / u_(0,1)
|
||||
t2 = t1 * t1
|
||||
t3 = t1 * t2
|
||||
tm1 = 1.0d+00 / u_(2,1)
|
||||
tm2 = tm1 * tm1
|
||||
tm3 = tm1 * tm2
|
||||
tmp11 = dt * tz1
|
||||
tmp22 = dt * tz2
|
||||
lhs_(1,1,1) = (-(tmp11)) * dz1
|
||||
lhs_(1,2,1) = 0.
|
||||
lhs_(1,3,1) = 0.
|
||||
lhs_(1,4,1) = (-(tmp22))
|
||||
lhs_(1,5,1) = 0.
|
||||
lhs_(2,1,1) = (-(tmp22)) * ((-(u_(0,2) * u_(0,4))) * t2)
|
||||
&- tmp11 * ((-(c3c4)) * t2 * u_(0,2))
|
||||
lhs_(2,2,1) = (-(tmp22)) * u_(0,4) * t1 - tmp11 * c3c4 *
|
||||
&t1 - tmp11 * dz2
|
||||
lhs_(2,3,1) = 0.
|
||||
lhs_(2,4,1) = (-(tmp22)) * u_(0,2) * t1
|
||||
lhs_(2,5,1) = 0.
|
||||
lhs_(3,1,1) = (-(tmp22)) * ((-(u_(0,3) * u_(0,4))) * t2)
|
||||
&- tmp11 * ((-(c3c4)) * t2 * u_(0,3))
|
||||
lhs_(3,2,1) = 0.
|
||||
lhs_(3,3,1) = (-(tmp22)) * u_(0,4) * t1 - tmp11 * c3c4 *
|
||||
&t1 - tmp11 * dz3
|
||||
lhs_(3,4,1) = (-(tmp22)) * u_(0,3) * t1
|
||||
lhs_(3,5,1) = 0.
|
||||
lhs_(4,1,1) = (-(tmp22)) * ((-(u_(0,4) * u_(0,4) * t2)) +
|
||||
& 0.50d+00 * c2 * ((u_(0,2) * u_(0,2) + u_(0,3) * u_(0,3) + u_(0,4)
|
||||
& * u_(0,4)) * t2)) - tmp11 * ((-(con43)) * c3c4 * t2 * u_(0,4))
|
||||
lhs_(4,2,1) = (-(tmp22)) * ((-(c2)) * u_(0,2) * t1)
|
||||
lhs_(4,3,1) = (-(tmp22)) * ((-(c2)) * u_(0,3) * t1)
|
||||
lhs_(4,4,1) = (-(tmp22)) * (2.0d+00 - c2) * u_(0,4) * t1
|
||||
&- tmp11 * con43 * c3 * c4 * t1 - tmp11 * dz4
|
||||
lhs_(4,5,1) = (-(tmp22)) * c2
|
||||
lhs_(5,1,1) = (-(tmp22)) * ((c2 * (u_(0,2) * u_(0,2) + u_
|
||||
&(0,3) * u_(0,3) + u_(0,4) * u_(0,4)) * t2 - c1 * (u_(0,5) * t1)) *
|
||||
& (u_(0,4) * t1)) - tmp11 * ((-(c3c4 - c1345)) * t3 * u_(0,2)** 2 -
|
||||
& (c3c4 - c1345) * t3 * u_(0,3)** 2 - (con43 * c3c4 - c1345) * t3 *
|
||||
& u_(0,4)** 2 - c1345 * t2 * u_(0,5))
|
||||
lhs_(5,2,1) = (-(tmp22)) * ((-(c2)) * (u_(0,2) * u_(0,4))
|
||||
& * t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,2)
|
||||
lhs_(5,3,1) = (-(tmp22)) * ((-(c2)) * (u_(0,3) * u_(0,4))
|
||||
& * t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,3)
|
||||
lhs_(5,4,1) = (-(tmp22)) * (c1 * (u_(0,5) * t1) - 0.50d+0
|
||||
&0 * c2 * ((u_(0,2) * u_(0,2) + u_(0,3) * u_(0,3) + 3.0d+00 * u_(0,
|
||||
&4) * u_(0,4)) * t2)) - tmp11 * (con43 * c3c4 - c1345) * t2 * u_(0,
|
||||
&4)
|
||||
lhs_(5,5,1) = (-(tmp22)) * c1 * u_(0,4) * t1 - tmp11 * c1
|
||||
&345 * t1 - tmp11 * dz5
|
||||
lhs_(1,1,2) = 1.0d+00 + tmp11 * 2.0d+00 * dz1
|
||||
lhs_(1,2,2) = 0.
|
||||
lhs_(1,3,2) = 0.
|
||||
lhs_(1,4,2) = 0.
|
||||
lhs_(1,5,2) = 0.
|
||||
lhs_(2,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1,
|
||||
&2))
|
||||
lhs_(2,2,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t
|
||||
&mp11 * 2.0d+00 * dz2
|
||||
lhs_(2,3,2) = 0.
|
||||
lhs_(2,4,2) = 0.
|
||||
lhs_(2,5,2) = 0.
|
||||
lhs_(3,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1,
|
||||
&3))
|
||||
lhs_(3,2,2) = 0.
|
||||
lhs_(3,3,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t
|
||||
&mp11 * 2.0d+00 * dz3
|
||||
lhs_(3,4,2) = 0.
|
||||
lhs_(3,5,2) = 0.
|
||||
lhs_(4,1,2) = tmp11 * 2.0d+00 * ((-(con43)) * c3c4 * tmp2
|
||||
& * u_(1,4))
|
||||
lhs_(4,2,2) = 0.
|
||||
lhs_(4,3,2) = 0.
|
||||
lhs_(4,4,2) = 1.0d+00 + tmp11 * 2.0d+00 * con43 * c3 * c4
|
||||
& * tmp1 + tmp11 * 2.0d+00 * dz4
|
||||
lhs_(4,5,2) = 0.
|
||||
lhs_(5,1,2) = tmp11 * 2.0d+00 * ((-(c3c4 - c1345)) * tmp3
|
||||
& * u_(1,2)** 2 - (c3c4 - c1345) * tmp3 * u_(1,3)** 2 - (con43 * c3
|
||||
&c4 - c1345) * tmp3 * u_(1,4)** 2 - c1345 * tmp2 * u_(1,5))
|
||||
lhs_(5,2,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u
|
||||
&_(1,2)
|
||||
lhs_(5,3,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u
|
||||
&_(1,3)
|
||||
lhs_(5,4,2) = tmp11 * 2.0d+00 * (con43 * c3c4 - c1345) *
|
||||
&tmp2 * u_(1,4)
|
||||
lhs_(5,5,2) = 1.0d+00 + tmp11 * 2.0d+00 * c1345 * tmp1 +
|
||||
&tmp11 * 2.0d+00 * dz5
|
||||
if (k .ne. 1) then
|
||||
do j__1 = 1,5
|
||||
lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
enddo
|
||||
endif
|
||||
lhs_(1,1,3) = (-(tmp11)) * dz1
|
||||
lhs_(1,2,3) = 0.
|
||||
lhs_(1,3,3) = 0.
|
||||
lhs_(1,4,3) = tmp22
|
||||
lhs_(1,5,3) = 0.
|
||||
lhs_(2,1,3) = tmp22 * ((-(u_(2,2) * u_(2,4))) * tm2) - tm
|
||||
&p11 * ((-(c3c4)) * tm2 * u_(2,2))
|
||||
lhs_(2,2,3) = tmp22 * u_(2,4) * tm1 - tmp11 * c3c4 * tm1
|
||||
&- tmp11 * dz2
|
||||
lhs_(2,3,3) = 0.
|
||||
lhs_(2,4,3) = tmp22 * u_(2,2) * tm1
|
||||
lhs_(2,5,3) = 0.
|
||||
lhs_(3,1,3) = tmp22 * ((-(u_(2,3) * u_(2,4))) * tm2) - tm
|
||||
&p11 * ((-(c3c4)) * tm2 * u_(2,3))
|
||||
lhs_(3,2,3) = 0.
|
||||
lhs_(3,3,3) = tmp22 * u_(2,4) * tm1 - tmp11 * c3c4 * tm1
|
||||
&- tmp11 * dz3
|
||||
lhs_(3,4,3) = tmp22 * u_(2,3) * tm1
|
||||
lhs_(3,5,3) = 0.
|
||||
lhs_(4,1,3) = tmp22 * ((-(u_(2,4) * u_(2,4) * tm2)) + 0.5
|
||||
&0d+00 * c2 * ((u_(2,2) * u_(2,2) + u_(2,3) * u_(2,3) + u_(2,4) * u
|
||||
&_(2,4)) * tm2)) - tmp11 * ((-(con43)) * c3c4 * tm2 * u_(2,4))
|
||||
lhs_(4,2,3) = tmp22 * ((-(c2)) * u_(2,2) * tm1)
|
||||
lhs_(4,3,3) = tmp22 * ((-(c2)) * u_(2,3) * tm1)
|
||||
lhs_(4,4,3) = tmp22 * (2.0d+00 - c2) * u_(2,4) * tm1 - tm
|
||||
&p11 * con43 * c3 * c4 * tm1 - tmp11 * dz4
|
||||
lhs_(4,5,3) = tmp22 * c2
|
||||
lhs_(5,1,3) = tmp22 * ((c2 * (u_(2,2) * u_(2,2) + u_(2,3)
|
||||
& * u_(2,3) + u_(2,4) * u_(2,4)) * tm2 - c1 * (u_(2,5) * tm1)) * (u
|
||||
&_(2,4) * tm1)) - tmp11 * ((-(c3c4 - c1345)) * tm3 * u_(2,2)** 2 -
|
||||
&(c3c4 - c1345) * tm3 * u_(2,3)** 2 - (con43 * c3c4 - c1345) * tm3
|
||||
&* u_(2,4)** 2 - c1345 * tm2 * u_(2,5))
|
||||
lhs_(5,2,3) = tmp22 * ((-(c2)) * (u_(2,2) * u_(2,4)) * tm
|
||||
&2) - tmp11 * (c3c4 - c1345) * tm2 * u_(2,2)
|
||||
lhs_(5,3,3) = tmp22 * ((-(c2)) * (u_(2,3) * u_(2,4)) * tm
|
||||
&2) - tmp11 * (c3c4 - c1345) * tm2 * u_(2,3)
|
||||
lhs_(5,4,3) = tmp22 * (c1 * (u_(2,5) * tm1) - 0.50d+00 *
|
||||
&c2 * ((u_(2,2) * u_(2,2) + u_(2,3) * u_(2,3) + 3.0d+00 * u_(2,4) *
|
||||
& u_(2,4)) * tm2)) - tmp11 * (con43 * c3c4 - c1345) * tm2 * u_(2,4)
|
||||
lhs_(5,5,3) = tmp22 * c1 * u_(2,4) * tm1 - tmp11 * c1345
|
||||
&* tm1 - tmp11 * dz5
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs(m,i,j,k)
|
||||
enddo
|
||||
do i__0 = 1,5
|
||||
rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i,j,k
|
||||
& - 1) - lhs_(i__0,2,1) * rhs(2,i,j,k - 1) - lhs_(i__0,3,1) * rhs(3
|
||||
&,i,j,k - 1) - lhs_(i__0,4,1) * rhs(4,i,j,k - 1) - lhs_(i__0,5,1) *
|
||||
& rhs(5,i,j,k - 1)
|
||||
enddo
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!
|
||||
!---------------------------------------------------------------------
|
||||
pivot = 1.00d0 / lhs_(1,1,2)
|
||||
lhs_(1,2,2) = lhs_(1,2,2) * pivot
|
||||
lhs_(1,3,2) = lhs_(1,3,2) * pivot
|
||||
lhs_(1,4,2) = lhs_(1,4,2) * pivot
|
||||
lhs_(1,5,2) = lhs_(1,5,2) * pivot
|
||||
lhs_(1,1,3) = lhs_(1,1,3) * pivot
|
||||
lhs_(1,2,3) = lhs_(1,2,3) * pivot
|
||||
lhs_(1,3,3) = lhs_(1,3,3) * pivot
|
||||
lhs_(1,4,3) = lhs_(1,4,3) * pivot
|
||||
lhs_(1,5,3) = lhs_(1,5,3) * pivot
|
||||
rhs_(1) = rhs_(1) * pivot
|
||||
coeff = lhs_(2,1,2)
|
||||
lhs_(2,2,2) = lhs_(2,2,2) - coeff * lhs_(1,2,2)
|
||||
lhs_(2,3,2) = lhs_(2,3,2) - coeff * lhs_(1,3,2)
|
||||
lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(1,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(1,5,2)
|
||||
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(1,1,3)
|
||||
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(1,2,3)
|
||||
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(1,3,3)
|
||||
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(1,4,3)
|
||||
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(1,5,3)
|
||||
rhs_(2) = rhs_(2) - coeff * rhs_(1)
|
||||
coeff = lhs_(3,1,2)
|
||||
lhs_(3,2,2) = lhs_(3,2,2) - coeff * lhs_(1,2,2)
|
||||
lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(1,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(1,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(1,5,2)
|
||||
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(1,1,3)
|
||||
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(1,2,3)
|
||||
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(1,3,3)
|
||||
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(1,4,3)
|
||||
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(1,5,3)
|
||||
rhs_(3) = rhs_(3) - coeff * rhs_(1)
|
||||
coeff = lhs_(4,1,2)
|
||||
lhs_(4,2,2) = lhs_(4,2,2) - coeff * lhs_(1,2,2)
|
||||
lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(1,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(1,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(1,5,2)
|
||||
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(1,1,3)
|
||||
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(1,2,3)
|
||||
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(1,3,3)
|
||||
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(1,4,3)
|
||||
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(1,5,3)
|
||||
rhs_(4) = rhs_(4) - coeff * rhs_(1)
|
||||
coeff = lhs_(5,1,2)
|
||||
lhs_(5,2,2) = lhs_(5,2,2) - coeff * lhs_(1,2,2)
|
||||
lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(1,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(1,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(1,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(1,1,3)
|
||||
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(1,2,3)
|
||||
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(1,3,3)
|
||||
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(1,4,3)
|
||||
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(1,5,3)
|
||||
rhs_(5) = rhs_(5) - coeff * rhs_(1)
|
||||
pivot = 1.00d0 / lhs_(2,2,2)
|
||||
lhs_(2,3,2) = lhs_(2,3,2) * pivot
|
||||
lhs_(2,4,2) = lhs_(2,4,2) * pivot
|
||||
lhs_(2,5,2) = lhs_(2,5,2) * pivot
|
||||
lhs_(2,1,3) = lhs_(2,1,3) * pivot
|
||||
lhs_(2,2,3) = lhs_(2,2,3) * pivot
|
||||
lhs_(2,3,3) = lhs_(2,3,3) * pivot
|
||||
lhs_(2,4,3) = lhs_(2,4,3) * pivot
|
||||
lhs_(2,5,3) = lhs_(2,5,3) * pivot
|
||||
rhs_(2) = rhs_(2) * pivot
|
||||
coeff = lhs_(1,2,2)
|
||||
lhs_(1,3,2) = lhs_(1,3,2) - coeff * lhs_(2,3,2)
|
||||
lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(2,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(2,5,2)
|
||||
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(2,1,3)
|
||||
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(2,2,3)
|
||||
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(2,3,3)
|
||||
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(2,4,3)
|
||||
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(2,5,3)
|
||||
rhs_(1) = rhs_(1) - coeff * rhs_(2)
|
||||
coeff = lhs_(3,2,2)
|
||||
lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(2,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(2,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(2,5,2)
|
||||
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(2,1,3)
|
||||
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(2,2,3)
|
||||
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(2,3,3)
|
||||
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(2,4,3)
|
||||
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(2,5,3)
|
||||
rhs_(3) = rhs_(3) - coeff * rhs_(2)
|
||||
coeff = lhs_(4,2,2)
|
||||
lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(2,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(2,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(2,5,2)
|
||||
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(2,1,3)
|
||||
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(2,2,3)
|
||||
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(2,3,3)
|
||||
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(2,4,3)
|
||||
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(2,5,3)
|
||||
rhs_(4) = rhs_(4) - coeff * rhs_(2)
|
||||
coeff = lhs_(5,2,2)
|
||||
lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(2,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(2,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(2,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(2,1,3)
|
||||
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(2,2,3)
|
||||
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(2,3,3)
|
||||
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(2,4,3)
|
||||
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(2,5,3)
|
||||
rhs_(5) = rhs_(5) - coeff * rhs_(2)
|
||||
pivot = 1.00d0 / lhs_(3,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) * pivot
|
||||
lhs_(3,5,2) = lhs_(3,5,2) * pivot
|
||||
lhs_(3,1,3) = lhs_(3,1,3) * pivot
|
||||
lhs_(3,2,3) = lhs_(3,2,3) * pivot
|
||||
lhs_(3,3,3) = lhs_(3,3,3) * pivot
|
||||
lhs_(3,4,3) = lhs_(3,4,3) * pivot
|
||||
lhs_(3,5,3) = lhs_(3,5,3) * pivot
|
||||
rhs_(3) = rhs_(3) * pivot
|
||||
coeff = lhs_(1,3,2)
|
||||
lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(3,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(3,5,2)
|
||||
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(3,1,3)
|
||||
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(3,2,3)
|
||||
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(3,3,3)
|
||||
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(3,4,3)
|
||||
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(3,5,3)
|
||||
rhs_(1) = rhs_(1) - coeff * rhs_(3)
|
||||
coeff = lhs_(2,3,2)
|
||||
lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(3,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(3,5,2)
|
||||
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(3,1,3)
|
||||
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(3,2,3)
|
||||
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(3,3,3)
|
||||
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(3,4,3)
|
||||
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(3,5,3)
|
||||
rhs_(2) = rhs_(2) - coeff * rhs_(3)
|
||||
coeff = lhs_(4,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(3,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(3,5,2)
|
||||
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(3,1,3)
|
||||
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(3,2,3)
|
||||
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(3,3,3)
|
||||
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(3,4,3)
|
||||
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(3,5,3)
|
||||
rhs_(4) = rhs_(4) - coeff * rhs_(3)
|
||||
coeff = lhs_(5,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(3,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(3,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(3,1,3)
|
||||
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(3,2,3)
|
||||
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(3,3,3)
|
||||
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(3,4,3)
|
||||
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(3,5,3)
|
||||
rhs_(5) = rhs_(5) - coeff * rhs_(3)
|
||||
pivot = 1.00d0 / lhs_(4,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) * pivot
|
||||
lhs_(4,1,3) = lhs_(4,1,3) * pivot
|
||||
lhs_(4,2,3) = lhs_(4,2,3) * pivot
|
||||
lhs_(4,3,3) = lhs_(4,3,3) * pivot
|
||||
lhs_(4,4,3) = lhs_(4,4,3) * pivot
|
||||
lhs_(4,5,3) = lhs_(4,5,3) * pivot
|
||||
rhs_(4) = rhs_(4) * pivot
|
||||
coeff = lhs_(1,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(4,5,2)
|
||||
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(4,1,3)
|
||||
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(4,2,3)
|
||||
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(4,3,3)
|
||||
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(4,4,3)
|
||||
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(4,5,3)
|
||||
rhs_(1) = rhs_(1) - coeff * rhs_(4)
|
||||
coeff = lhs_(2,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(4,5,2)
|
||||
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(4,1,3)
|
||||
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(4,2,3)
|
||||
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(4,3,3)
|
||||
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(4,4,3)
|
||||
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(4,5,3)
|
||||
rhs_(2) = rhs_(2) - coeff * rhs_(4)
|
||||
coeff = lhs_(3,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(4,5,2)
|
||||
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(4,1,3)
|
||||
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(4,2,3)
|
||||
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(4,3,3)
|
||||
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(4,4,3)
|
||||
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(4,5,3)
|
||||
rhs_(3) = rhs_(3) - coeff * rhs_(4)
|
||||
coeff = lhs_(5,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(4,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(4,1,3)
|
||||
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(4,2,3)
|
||||
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(4,3,3)
|
||||
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(4,4,3)
|
||||
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(4,5,3)
|
||||
rhs_(5) = rhs_(5) - coeff * rhs_(4)
|
||||
pivot = 1.00d0 / lhs_(5,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) * pivot
|
||||
lhs_(5,2,3) = lhs_(5,2,3) * pivot
|
||||
lhs_(5,3,3) = lhs_(5,3,3) * pivot
|
||||
lhs_(5,4,3) = lhs_(5,4,3) * pivot
|
||||
lhs_(5,5,3) = lhs_(5,5,3) * pivot
|
||||
rhs_(5) = rhs_(5) * pivot
|
||||
coeff = lhs_(1,5,2)
|
||||
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(5,1,3)
|
||||
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(5,2,3)
|
||||
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(5,3,3)
|
||||
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(5,4,3)
|
||||
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(5,5,3)
|
||||
rhs_(1) = rhs_(1) - coeff * rhs_(5)
|
||||
coeff = lhs_(2,5,2)
|
||||
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(5,1,3)
|
||||
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(5,2,3)
|
||||
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(5,3,3)
|
||||
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(5,4,3)
|
||||
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(5,5,3)
|
||||
rhs_(2) = rhs_(2) - coeff * rhs_(5)
|
||||
coeff = lhs_(3,5,2)
|
||||
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(5,1,3)
|
||||
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(5,2,3)
|
||||
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(5,3,3)
|
||||
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(5,4,3)
|
||||
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(5,5,3)
|
||||
rhs_(3) = rhs_(3) - coeff * rhs_(5)
|
||||
coeff = lhs_(4,5,2)
|
||||
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(5,1,3)
|
||||
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(5,2,3)
|
||||
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(5,3,3)
|
||||
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(5,4,3)
|
||||
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(5,5,3)
|
||||
rhs_(4) = rhs_(4) - coeff * rhs_(5)
|
||||
do i__0 = 1,5
|
||||
lhs__(i__0,1,i,j,k) = lhs_(i__0,1,3)
|
||||
lhs__(i__0,2,i,j,k) = lhs_(i__0,2,3)
|
||||
lhs__(i__0,3,i,j,k) = lhs_(i__0,3,3)
|
||||
lhs__(i__0,4,i,j,k) = lhs_(i__0,4,3)
|
||||
lhs__(i__0,5,i,j,k) = lhs_(i__0,5,3)
|
||||
enddo
|
||||
do m = 1,5
|
||||
rhs(m,i,j,k) = rhs_(m)
|
||||
u_(0,m) = u_(1,m)
|
||||
u_(1,m) = u_(2,m)
|
||||
enddo
|
||||
enddo
|
||||
do n = 1,5
|
||||
lhs_(1,n,1) = 0.0d0
|
||||
lhs_(1,n,2) = 0.0d0
|
||||
lhs_(1,n,3) = 0.0d0
|
||||
lhs_(2,n,1) = 0.0d0
|
||||
lhs_(2,n,2) = 0.0d0
|
||||
lhs_(2,n,3) = 0.0d0
|
||||
lhs_(3,n,1) = 0.0d0
|
||||
lhs_(3,n,2) = 0.0d0
|
||||
lhs_(3,n,3) = 0.0d0
|
||||
lhs_(4,n,1) = 0.0d0
|
||||
lhs_(4,n,2) = 0.0d0
|
||||
lhs_(4,n,3) = 0.0d0
|
||||
lhs_(5,n,1) = 0.0d0
|
||||
lhs_(5,n,2) = 0.0d0
|
||||
lhs_(5,n,3) = 0.0d0
|
||||
enddo
|
||||
do m = 1,5
|
||||
lhs_(m,m,2) = 1.0d0
|
||||
enddo
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs(m,i,j,k)
|
||||
enddo
|
||||
do i__0 = 1,5
|
||||
rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i,j,ksiz
|
||||
&e - 1) - lhs_(i__0,2,1) * rhs(2,i,j,ksize - 1) - lhs_(i__0,3,1) *
|
||||
&rhs(3,i,j,ksize - 1) - lhs_(i__0,4,1) * rhs(4,i,j,ksize - 1) - lhs
|
||||
&_(i__0,5,1) * rhs(5,i,j,ksize - 1)
|
||||
enddo
|
||||
do j__1 = 1,5
|
||||
lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1,3)
|
||||
lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1,3)
|
||||
lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1,3)
|
||||
lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1,3)
|
||||
lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1,3)
|
||||
enddo
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!
|
||||
!---------------------------------------------------------------------
|
||||
pivot__3 = 1.00d0 / lhs_(1,1,2)
|
||||
lhs_(1,2,2) = lhs_(1,2,2) * pivot__3
|
||||
lhs_(1,3,2) = lhs_(1,3,2) * pivot__3
|
||||
lhs_(1,4,2) = lhs_(1,4,2) * pivot__3
|
||||
lhs_(1,5,2) = lhs_(1,5,2) * pivot__3
|
||||
rhs_(1) = rhs_(1) * pivot__3
|
||||
coeff__2 = lhs_(2,1,2)
|
||||
lhs_(2,2,2) = lhs_(2,2,2) - coeff__2 * lhs_(1,2,2)
|
||||
lhs_(2,3,2) = lhs_(2,3,2) - coeff__2 * lhs_(1,3,2)
|
||||
lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(1,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(1,5,2)
|
||||
rhs_(2) = rhs_(2) - coeff__2 * rhs_(1)
|
||||
coeff__2 = lhs_(3,1,2)
|
||||
lhs_(3,2,2) = lhs_(3,2,2) - coeff__2 * lhs_(1,2,2)
|
||||
lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(1,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(1,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(1,5,2)
|
||||
rhs_(3) = rhs_(3) - coeff__2 * rhs_(1)
|
||||
coeff__2 = lhs_(4,1,2)
|
||||
lhs_(4,2,2) = lhs_(4,2,2) - coeff__2 * lhs_(1,2,2)
|
||||
lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(1,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(1,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(1,5,2)
|
||||
rhs_(4) = rhs_(4) - coeff__2 * rhs_(1)
|
||||
coeff__2 = lhs_(5,1,2)
|
||||
lhs_(5,2,2) = lhs_(5,2,2) - coeff__2 * lhs_(1,2,2)
|
||||
lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(1,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(1,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(1,5,2)
|
||||
rhs_(5) = rhs_(5) - coeff__2 * rhs_(1)
|
||||
pivot__3 = 1.00d0 / lhs_(2,2,2)
|
||||
lhs_(2,3,2) = lhs_(2,3,2) * pivot__3
|
||||
lhs_(2,4,2) = lhs_(2,4,2) * pivot__3
|
||||
lhs_(2,5,2) = lhs_(2,5,2) * pivot__3
|
||||
rhs_(2) = rhs_(2) * pivot__3
|
||||
coeff__2 = lhs_(1,2,2)
|
||||
lhs_(1,3,2) = lhs_(1,3,2) - coeff__2 * lhs_(2,3,2)
|
||||
lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(2,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(2,5,2)
|
||||
rhs_(1) = rhs_(1) - coeff__2 * rhs_(2)
|
||||
coeff__2 = lhs_(3,2,2)
|
||||
lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(2,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(2,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(2,5,2)
|
||||
rhs_(3) = rhs_(3) - coeff__2 * rhs_(2)
|
||||
coeff__2 = lhs_(4,2,2)
|
||||
lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(2,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(2,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(2,5,2)
|
||||
rhs_(4) = rhs_(4) - coeff__2 * rhs_(2)
|
||||
coeff__2 = lhs_(5,2,2)
|
||||
lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(2,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(2,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(2,5,2)
|
||||
rhs_(5) = rhs_(5) - coeff__2 * rhs_(2)
|
||||
pivot__3 = 1.00d0 / lhs_(3,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) * pivot__3
|
||||
lhs_(3,5,2) = lhs_(3,5,2) * pivot__3
|
||||
rhs_(3) = rhs_(3) * pivot__3
|
||||
coeff__2 = lhs_(1,3,2)
|
||||
lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(3,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(3,5,2)
|
||||
rhs_(1) = rhs_(1) - coeff__2 * rhs_(3)
|
||||
coeff__2 = lhs_(2,3,2)
|
||||
lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(3,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(3,5,2)
|
||||
rhs_(2) = rhs_(2) - coeff__2 * rhs_(3)
|
||||
coeff__2 = lhs_(4,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(3,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(3,5,2)
|
||||
rhs_(4) = rhs_(4) - coeff__2 * rhs_(3)
|
||||
coeff__2 = lhs_(5,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(3,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(3,5,2)
|
||||
rhs_(5) = rhs_(5) - coeff__2 * rhs_(3)
|
||||
pivot__3 = 1.00d0 / lhs_(4,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) * pivot__3
|
||||
rhs_(4) = rhs_(4) * pivot__3
|
||||
coeff__2 = lhs_(1,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(4,5,2)
|
||||
rhs_(1) = rhs_(1) - coeff__2 * rhs_(4)
|
||||
coeff__2 = lhs_(2,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(4,5,2)
|
||||
rhs_(2) = rhs_(2) - coeff__2 * rhs_(4)
|
||||
coeff__2 = lhs_(3,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(4,5,2)
|
||||
rhs_(3) = rhs_(3) - coeff__2 * rhs_(4)
|
||||
coeff__2 = lhs_(5,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(4,5,2)
|
||||
rhs_(5) = rhs_(5) - coeff__2 * rhs_(4)
|
||||
pivot__3 = 1.00d0 / lhs_(5,5,2)
|
||||
rhs_(5) = rhs_(5) * pivot__3
|
||||
coeff__2 = lhs_(1,5,2)
|
||||
rhs_(1) = rhs_(1) - coeff__2 * rhs_(5)
|
||||
coeff__2 = lhs_(2,5,2)
|
||||
rhs_(2) = rhs_(2) - coeff__2 * rhs_(5)
|
||||
coeff__2 = lhs_(3,5,2)
|
||||
rhs_(3) = rhs_(3) - coeff__2 * rhs_(5)
|
||||
coeff__2 = lhs_(4,5,2)
|
||||
rhs_(4) = rhs_(4) - coeff__2 * rhs_(5)
|
||||
do m = 1,5
|
||||
rhs(m,i,j,k) = rhs_(m)
|
||||
enddo
|
||||
|
||||
k = ksize-1
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs(m,i,j,k)
|
||||
rhsp_(m) = rhs(m,i,j,k + 1)
|
||||
enddo
|
||||
do k = ksize-1, 1, (-(1))
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs(m,i,j,k)
|
||||
enddo
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - lhs__(m,1,i,j,k) * rhsp_(1)
|
||||
rhs_(m) = rhs_(m) - lhs__(m,2,i,j,k) * rhsp_(2)
|
||||
rhs_(m) = rhs_(m) - lhs__(m,3,i,j,k) * rhsp_(3)
|
||||
rhs_(m) = rhs_(m) - lhs__(m,4,i,j,k) * rhsp_(4)
|
||||
rhs_(m) = rhs_(m) - lhs__(m,5,i,j,k) * rhsp_(5)
|
||||
enddo
|
||||
do m = 1,5
|
||||
rhsp_(m) = rhs_(m)
|
||||
u(m,i,j,k) = u(m,i,j,k) + rhs_(m)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ end region
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,636 @@
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! performs guaussian elimination on this cell.
|
||||
!
|
||||
! assumes that unpacking routines for non-first cells
|
||||
! preload C' and rhs' from previous cell.
|
||||
!
|
||||
! assumed send happens outside this routine, but that
|
||||
! c'(KMAX) and rhs'(KMAX) will be sent to next cell.
|
||||
!---------------------------------------------------------------------
|
||||
subroutine z_solve ()
|
||||
|
||||
include 'header3d.h'
|
||||
double precision coeff
|
||||
double precision pivot
|
||||
integer i__0
|
||||
integer j__1,m,n
|
||||
double precision coeff__2
|
||||
double precision pivot__3
|
||||
double precision lhs_(5,5,3),rhs_(5),rhsp_(5),u_(0:3,5)
|
||||
integer i,j,k,ksize,j1,maxblJ,low_j,high_j
|
||||
ksize = problem_size - 1
|
||||
if(mod((problem_size - 2), BL) .eq. 0) then
|
||||
maxBlJ = (problem_size - 2) / BL
|
||||
else
|
||||
maxblJ = (problem_size - 2) / BL + 1
|
||||
endif
|
||||
!DVM$ region local(lhs__)
|
||||
!DVM$ PARALLEL (j1,i) ON rhs(*,i,j1,*), private(k,u_,rhs_,pivot,
|
||||
!DVM$& coeff, tmp1, tmp2, tmp3, t1, t2, t3, tm1, tm2, tm3,rhsp_,
|
||||
!DVM$& tmp11, tmp22, i__0, j__1,lhs_,n,m,pivot__3,
|
||||
!DVM$& coeff__2,j,low_j,high_j),cuda_block(32)
|
||||
do j1 = 1, maxBlJ
|
||||
do i = 1, problem_size - 2
|
||||
low_j = (j1 - 1) * BL + 1
|
||||
high_j = j1 * BL
|
||||
if(high_j .gt. problem_size - 2) then
|
||||
high_j = problem_size - 2
|
||||
endif
|
||||
do j = low_j, high_j
|
||||
do m = 1,5
|
||||
u_(0,m) = u(m,i,j,0)
|
||||
u_(1,m) = u(m,i,j,1)
|
||||
enddo
|
||||
do k = 1,ksize - 1
|
||||
do m = 1,5
|
||||
u_(2,m) = u(m,i,j,k + 1)
|
||||
enddo
|
||||
tmp1 = 1.0d+00 / u_(1,1)
|
||||
tmp2 = tmp1 * tmp1
|
||||
tmp3 = tmp1 * tmp2
|
||||
t1 = 1.0d+00 / u_(0,1)
|
||||
t2 = t1 * t1
|
||||
t3 = t1 * t2
|
||||
tm1 = 1.0d+00 / u_(2,1)
|
||||
tm2 = tm1 * tm1
|
||||
tm3 = tm1 * tm2
|
||||
tmp11 = dt * tz1
|
||||
tmp22 = dt * tz2
|
||||
lhs_(1,1,1) = (-(tmp11)) * dz1
|
||||
lhs_(1,2,1) = 0.
|
||||
lhs_(1,3,1) = 0.
|
||||
lhs_(1,4,1) = (-(tmp22))
|
||||
lhs_(1,5,1) = 0.
|
||||
lhs_(2,1,1) = (-(tmp22)) * ((-(u_(0,2) * u_(0,4))) * t2)
|
||||
&- tmp11 * ((-(c3c4)) * t2 * u_(0,2))
|
||||
lhs_(2,2,1) = (-(tmp22)) * u_(0,4) * t1 - tmp11 * c3c4 *
|
||||
&t1 - tmp11 * dz2
|
||||
lhs_(2,3,1) = 0.
|
||||
lhs_(2,4,1) = (-(tmp22)) * u_(0,2) * t1
|
||||
lhs_(2,5,1) = 0.
|
||||
lhs_(3,1,1) = (-(tmp22)) * ((-(u_(0,3) * u_(0,4))) * t2)
|
||||
&- tmp11 * ((-(c3c4)) * t2 * u_(0,3))
|
||||
lhs_(3,2,1) = 0.
|
||||
lhs_(3,3,1) = (-(tmp22)) * u_(0,4) * t1 - tmp11 * c3c4 *
|
||||
&t1 - tmp11 * dz3
|
||||
lhs_(3,4,1) = (-(tmp22)) * u_(0,3) * t1
|
||||
lhs_(3,5,1) = 0.
|
||||
lhs_(4,1,1) = (-(tmp22)) * ((-(u_(0,4) * u_(0,4) * t2)) +
|
||||
& 0.50d+00 * c2 * ((u_(0,2) * u_(0,2) + u_(0,3) * u_(0,3) + u_(0,4)
|
||||
& * u_(0,4)) * t2)) - tmp11 * ((-(con43)) * c3c4 * t2 * u_(0,4))
|
||||
lhs_(4,2,1) = (-(tmp22)) * ((-(c2)) * u_(0,2) * t1)
|
||||
lhs_(4,3,1) = (-(tmp22)) * ((-(c2)) * u_(0,3) * t1)
|
||||
lhs_(4,4,1) = (-(tmp22)) * (2.0d+00 - c2) * u_(0,4) * t1
|
||||
&- tmp11 * con43 * c3 * c4 * t1 - tmp11 * dz4
|
||||
lhs_(4,5,1) = (-(tmp22)) * c2
|
||||
lhs_(5,1,1) = (-(tmp22)) * ((c2 * (u_(0,2) * u_(0,2) + u_
|
||||
&(0,3) * u_(0,3) + u_(0,4) * u_(0,4)) * t2 - c1 * (u_(0,5) * t1)) *
|
||||
& (u_(0,4) * t1)) - tmp11 * ((-(c3c4 - c1345)) * t3 * u_(0,2)** 2 -
|
||||
& (c3c4 - c1345) * t3 * u_(0,3)** 2 - (con43 * c3c4 - c1345) * t3 *
|
||||
& u_(0,4)** 2 - c1345 * t2 * u_(0,5))
|
||||
lhs_(5,2,1) = (-(tmp22)) * ((-(c2)) * (u_(0,2) * u_(0,4))
|
||||
& * t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,2)
|
||||
lhs_(5,3,1) = (-(tmp22)) * ((-(c2)) * (u_(0,3) * u_(0,4))
|
||||
& * t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,3)
|
||||
lhs_(5,4,1) = (-(tmp22)) * (c1 * (u_(0,5) * t1) - 0.50d+0
|
||||
&0 * c2 * ((u_(0,2) * u_(0,2) + u_(0,3) * u_(0,3) + 3.0d+00 * u_(0,
|
||||
&4) * u_(0,4)) * t2)) - tmp11 * (con43 * c3c4 - c1345) * t2 * u_(0,
|
||||
&4)
|
||||
lhs_(5,5,1) = (-(tmp22)) * c1 * u_(0,4) * t1 - tmp11 * c1
|
||||
&345 * t1 - tmp11 * dz5
|
||||
lhs_(1,1,2) = 1.0d+00 + tmp11 * 2.0d+00 * dz1
|
||||
lhs_(1,2,2) = 0.
|
||||
lhs_(1,3,2) = 0.
|
||||
lhs_(1,4,2) = 0.
|
||||
lhs_(1,5,2) = 0.
|
||||
lhs_(2,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1,
|
||||
&2))
|
||||
lhs_(2,2,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t
|
||||
&mp11 * 2.0d+00 * dz2
|
||||
lhs_(2,3,2) = 0.
|
||||
lhs_(2,4,2) = 0.
|
||||
lhs_(2,5,2) = 0.
|
||||
lhs_(3,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1,
|
||||
&3))
|
||||
lhs_(3,2,2) = 0.
|
||||
lhs_(3,3,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t
|
||||
&mp11 * 2.0d+00 * dz3
|
||||
lhs_(3,4,2) = 0.
|
||||
lhs_(3,5,2) = 0.
|
||||
lhs_(4,1,2) = tmp11 * 2.0d+00 * ((-(con43)) * c3c4 * tmp2
|
||||
& * u_(1,4))
|
||||
lhs_(4,2,2) = 0.
|
||||
lhs_(4,3,2) = 0.
|
||||
lhs_(4,4,2) = 1.0d+00 + tmp11 * 2.0d+00 * con43 * c3 * c4
|
||||
& * tmp1 + tmp11 * 2.0d+00 * dz4
|
||||
lhs_(4,5,2) = 0.
|
||||
lhs_(5,1,2) = tmp11 * 2.0d+00 * ((-(c3c4 - c1345)) * tmp3
|
||||
& * u_(1,2)** 2 - (c3c4 - c1345) * tmp3 * u_(1,3)** 2 - (con43 * c3
|
||||
&c4 - c1345) * tmp3 * u_(1,4)** 2 - c1345 * tmp2 * u_(1,5))
|
||||
lhs_(5,2,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u
|
||||
&_(1,2)
|
||||
lhs_(5,3,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u
|
||||
&_(1,3)
|
||||
lhs_(5,4,2) = tmp11 * 2.0d+00 * (con43 * c3c4 - c1345) *
|
||||
&tmp2 * u_(1,4)
|
||||
lhs_(5,5,2) = 1.0d+00 + tmp11 * 2.0d+00 * c1345 * tmp1 +
|
||||
&tmp11 * 2.0d+00 * dz5
|
||||
if (k .ne. 1) then
|
||||
do j__1 = 1,5
|
||||
lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
enddo
|
||||
endif
|
||||
lhs_(1,1,3) = (-(tmp11)) * dz1
|
||||
lhs_(1,2,3) = 0.
|
||||
lhs_(1,3,3) = 0.
|
||||
lhs_(1,4,3) = tmp22
|
||||
lhs_(1,5,3) = 0.
|
||||
lhs_(2,1,3) = tmp22 * ((-(u_(2,2) * u_(2,4))) * tm2) - tm
|
||||
&p11 * ((-(c3c4)) * tm2 * u_(2,2))
|
||||
lhs_(2,2,3) = tmp22 * u_(2,4) * tm1 - tmp11 * c3c4 * tm1
|
||||
&- tmp11 * dz2
|
||||
lhs_(2,3,3) = 0.
|
||||
lhs_(2,4,3) = tmp22 * u_(2,2) * tm1
|
||||
lhs_(2,5,3) = 0.
|
||||
lhs_(3,1,3) = tmp22 * ((-(u_(2,3) * u_(2,4))) * tm2) - tm
|
||||
&p11 * ((-(c3c4)) * tm2 * u_(2,3))
|
||||
lhs_(3,2,3) = 0.
|
||||
lhs_(3,3,3) = tmp22 * u_(2,4) * tm1 - tmp11 * c3c4 * tm1
|
||||
&- tmp11 * dz3
|
||||
lhs_(3,4,3) = tmp22 * u_(2,3) * tm1
|
||||
lhs_(3,5,3) = 0.
|
||||
lhs_(4,1,3) = tmp22 * ((-(u_(2,4) * u_(2,4) * tm2)) + 0.5
|
||||
&0d+00 * c2 * ((u_(2,2) * u_(2,2) + u_(2,3) * u_(2,3) + u_(2,4) * u
|
||||
&_(2,4)) * tm2)) - tmp11 * ((-(con43)) * c3c4 * tm2 * u_(2,4))
|
||||
lhs_(4,2,3) = tmp22 * ((-(c2)) * u_(2,2) * tm1)
|
||||
lhs_(4,3,3) = tmp22 * ((-(c2)) * u_(2,3) * tm1)
|
||||
lhs_(4,4,3) = tmp22 * (2.0d+00 - c2) * u_(2,4) * tm1 - tm
|
||||
&p11 * con43 * c3 * c4 * tm1 - tmp11 * dz4
|
||||
lhs_(4,5,3) = tmp22 * c2
|
||||
lhs_(5,1,3) = tmp22 * ((c2 * (u_(2,2) * u_(2,2) + u_(2,3)
|
||||
& * u_(2,3) + u_(2,4) * u_(2,4)) * tm2 - c1 * (u_(2,5) * tm1)) * (u
|
||||
&_(2,4) * tm1)) - tmp11 * ((-(c3c4 - c1345)) * tm3 * u_(2,2)** 2 -
|
||||
&(c3c4 - c1345) * tm3 * u_(2,3)** 2 - (con43 * c3c4 - c1345) * tm3
|
||||
&* u_(2,4)** 2 - c1345 * tm2 * u_(2,5))
|
||||
lhs_(5,2,3) = tmp22 * ((-(c2)) * (u_(2,2) * u_(2,4)) * tm
|
||||
&2) - tmp11 * (c3c4 - c1345) * tm2 * u_(2,2)
|
||||
lhs_(5,3,3) = tmp22 * ((-(c2)) * (u_(2,3) * u_(2,4)) * tm
|
||||
&2) - tmp11 * (c3c4 - c1345) * tm2 * u_(2,3)
|
||||
lhs_(5,4,3) = tmp22 * (c1 * (u_(2,5) * tm1) - 0.50d+00 *
|
||||
&c2 * ((u_(2,2) * u_(2,2) + u_(2,3) * u_(2,3) + 3.0d+00 * u_(2,4) *
|
||||
& u_(2,4)) * tm2)) - tmp11 * (con43 * c3c4 - c1345) * tm2 * u_(2,4)
|
||||
lhs_(5,5,3) = tmp22 * c1 * u_(2,4) * tm1 - tmp11 * c1345
|
||||
&* tm1 - tmp11 * dz5
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs(m,i,j,k)
|
||||
enddo
|
||||
do i__0 = 1,5
|
||||
rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i,j,k
|
||||
& - 1) - lhs_(i__0,2,1) * rhs(2,i,j,k - 1) - lhs_(i__0,3,1) * rhs(3
|
||||
&,i,j,k - 1) - lhs_(i__0,4,1) * rhs(4,i,j,k - 1) - lhs_(i__0,5,1) *
|
||||
& rhs(5,i,j,k - 1)
|
||||
enddo
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!
|
||||
!---------------------------------------------------------------------
|
||||
pivot = 1.00d0 / lhs_(1,1,2)
|
||||
lhs_(1,2,2) = lhs_(1,2,2) * pivot
|
||||
lhs_(1,3,2) = lhs_(1,3,2) * pivot
|
||||
lhs_(1,4,2) = lhs_(1,4,2) * pivot
|
||||
lhs_(1,5,2) = lhs_(1,5,2) * pivot
|
||||
lhs_(1,1,3) = lhs_(1,1,3) * pivot
|
||||
lhs_(1,2,3) = lhs_(1,2,3) * pivot
|
||||
lhs_(1,3,3) = lhs_(1,3,3) * pivot
|
||||
lhs_(1,4,3) = lhs_(1,4,3) * pivot
|
||||
lhs_(1,5,3) = lhs_(1,5,3) * pivot
|
||||
rhs_(1) = rhs_(1) * pivot
|
||||
coeff = lhs_(2,1,2)
|
||||
lhs_(2,2,2) = lhs_(2,2,2) - coeff * lhs_(1,2,2)
|
||||
lhs_(2,3,2) = lhs_(2,3,2) - coeff * lhs_(1,3,2)
|
||||
lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(1,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(1,5,2)
|
||||
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(1,1,3)
|
||||
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(1,2,3)
|
||||
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(1,3,3)
|
||||
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(1,4,3)
|
||||
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(1,5,3)
|
||||
rhs_(2) = rhs_(2) - coeff * rhs_(1)
|
||||
coeff = lhs_(3,1,2)
|
||||
lhs_(3,2,2) = lhs_(3,2,2) - coeff * lhs_(1,2,2)
|
||||
lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(1,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(1,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(1,5,2)
|
||||
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(1,1,3)
|
||||
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(1,2,3)
|
||||
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(1,3,3)
|
||||
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(1,4,3)
|
||||
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(1,5,3)
|
||||
rhs_(3) = rhs_(3) - coeff * rhs_(1)
|
||||
coeff = lhs_(4,1,2)
|
||||
lhs_(4,2,2) = lhs_(4,2,2) - coeff * lhs_(1,2,2)
|
||||
lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(1,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(1,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(1,5,2)
|
||||
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(1,1,3)
|
||||
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(1,2,3)
|
||||
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(1,3,3)
|
||||
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(1,4,3)
|
||||
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(1,5,3)
|
||||
rhs_(4) = rhs_(4) - coeff * rhs_(1)
|
||||
coeff = lhs_(5,1,2)
|
||||
lhs_(5,2,2) = lhs_(5,2,2) - coeff * lhs_(1,2,2)
|
||||
lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(1,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(1,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(1,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(1,1,3)
|
||||
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(1,2,3)
|
||||
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(1,3,3)
|
||||
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(1,4,3)
|
||||
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(1,5,3)
|
||||
rhs_(5) = rhs_(5) - coeff * rhs_(1)
|
||||
pivot = 1.00d0 / lhs_(2,2,2)
|
||||
lhs_(2,3,2) = lhs_(2,3,2) * pivot
|
||||
lhs_(2,4,2) = lhs_(2,4,2) * pivot
|
||||
lhs_(2,5,2) = lhs_(2,5,2) * pivot
|
||||
lhs_(2,1,3) = lhs_(2,1,3) * pivot
|
||||
lhs_(2,2,3) = lhs_(2,2,3) * pivot
|
||||
lhs_(2,3,3) = lhs_(2,3,3) * pivot
|
||||
lhs_(2,4,3) = lhs_(2,4,3) * pivot
|
||||
lhs_(2,5,3) = lhs_(2,5,3) * pivot
|
||||
rhs_(2) = rhs_(2) * pivot
|
||||
coeff = lhs_(1,2,2)
|
||||
lhs_(1,3,2) = lhs_(1,3,2) - coeff * lhs_(2,3,2)
|
||||
lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(2,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(2,5,2)
|
||||
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(2,1,3)
|
||||
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(2,2,3)
|
||||
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(2,3,3)
|
||||
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(2,4,3)
|
||||
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(2,5,3)
|
||||
rhs_(1) = rhs_(1) - coeff * rhs_(2)
|
||||
coeff = lhs_(3,2,2)
|
||||
lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(2,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(2,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(2,5,2)
|
||||
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(2,1,3)
|
||||
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(2,2,3)
|
||||
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(2,3,3)
|
||||
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(2,4,3)
|
||||
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(2,5,3)
|
||||
rhs_(3) = rhs_(3) - coeff * rhs_(2)
|
||||
coeff = lhs_(4,2,2)
|
||||
lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(2,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(2,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(2,5,2)
|
||||
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(2,1,3)
|
||||
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(2,2,3)
|
||||
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(2,3,3)
|
||||
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(2,4,3)
|
||||
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(2,5,3)
|
||||
rhs_(4) = rhs_(4) - coeff * rhs_(2)
|
||||
coeff = lhs_(5,2,2)
|
||||
lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(2,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(2,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(2,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(2,1,3)
|
||||
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(2,2,3)
|
||||
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(2,3,3)
|
||||
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(2,4,3)
|
||||
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(2,5,3)
|
||||
rhs_(5) = rhs_(5) - coeff * rhs_(2)
|
||||
pivot = 1.00d0 / lhs_(3,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) * pivot
|
||||
lhs_(3,5,2) = lhs_(3,5,2) * pivot
|
||||
lhs_(3,1,3) = lhs_(3,1,3) * pivot
|
||||
lhs_(3,2,3) = lhs_(3,2,3) * pivot
|
||||
lhs_(3,3,3) = lhs_(3,3,3) * pivot
|
||||
lhs_(3,4,3) = lhs_(3,4,3) * pivot
|
||||
lhs_(3,5,3) = lhs_(3,5,3) * pivot
|
||||
rhs_(3) = rhs_(3) * pivot
|
||||
coeff = lhs_(1,3,2)
|
||||
lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(3,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(3,5,2)
|
||||
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(3,1,3)
|
||||
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(3,2,3)
|
||||
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(3,3,3)
|
||||
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(3,4,3)
|
||||
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(3,5,3)
|
||||
rhs_(1) = rhs_(1) - coeff * rhs_(3)
|
||||
coeff = lhs_(2,3,2)
|
||||
lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(3,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(3,5,2)
|
||||
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(3,1,3)
|
||||
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(3,2,3)
|
||||
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(3,3,3)
|
||||
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(3,4,3)
|
||||
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(3,5,3)
|
||||
rhs_(2) = rhs_(2) - coeff * rhs_(3)
|
||||
coeff = lhs_(4,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(3,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(3,5,2)
|
||||
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(3,1,3)
|
||||
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(3,2,3)
|
||||
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(3,3,3)
|
||||
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(3,4,3)
|
||||
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(3,5,3)
|
||||
rhs_(4) = rhs_(4) - coeff * rhs_(3)
|
||||
coeff = lhs_(5,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(3,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(3,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(3,1,3)
|
||||
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(3,2,3)
|
||||
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(3,3,3)
|
||||
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(3,4,3)
|
||||
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(3,5,3)
|
||||
rhs_(5) = rhs_(5) - coeff * rhs_(3)
|
||||
pivot = 1.00d0 / lhs_(4,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) * pivot
|
||||
lhs_(4,1,3) = lhs_(4,1,3) * pivot
|
||||
lhs_(4,2,3) = lhs_(4,2,3) * pivot
|
||||
lhs_(4,3,3) = lhs_(4,3,3) * pivot
|
||||
lhs_(4,4,3) = lhs_(4,4,3) * pivot
|
||||
lhs_(4,5,3) = lhs_(4,5,3) * pivot
|
||||
rhs_(4) = rhs_(4) * pivot
|
||||
coeff = lhs_(1,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(4,5,2)
|
||||
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(4,1,3)
|
||||
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(4,2,3)
|
||||
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(4,3,3)
|
||||
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(4,4,3)
|
||||
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(4,5,3)
|
||||
rhs_(1) = rhs_(1) - coeff * rhs_(4)
|
||||
coeff = lhs_(2,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(4,5,2)
|
||||
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(4,1,3)
|
||||
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(4,2,3)
|
||||
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(4,3,3)
|
||||
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(4,4,3)
|
||||
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(4,5,3)
|
||||
rhs_(2) = rhs_(2) - coeff * rhs_(4)
|
||||
coeff = lhs_(3,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(4,5,2)
|
||||
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(4,1,3)
|
||||
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(4,2,3)
|
||||
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(4,3,3)
|
||||
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(4,4,3)
|
||||
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(4,5,3)
|
||||
rhs_(3) = rhs_(3) - coeff * rhs_(4)
|
||||
coeff = lhs_(5,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(4,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(4,1,3)
|
||||
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(4,2,3)
|
||||
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(4,3,3)
|
||||
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(4,4,3)
|
||||
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(4,5,3)
|
||||
rhs_(5) = rhs_(5) - coeff * rhs_(4)
|
||||
pivot = 1.00d0 / lhs_(5,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) * pivot
|
||||
lhs_(5,2,3) = lhs_(5,2,3) * pivot
|
||||
lhs_(5,3,3) = lhs_(5,3,3) * pivot
|
||||
lhs_(5,4,3) = lhs_(5,4,3) * pivot
|
||||
lhs_(5,5,3) = lhs_(5,5,3) * pivot
|
||||
rhs_(5) = rhs_(5) * pivot
|
||||
coeff = lhs_(1,5,2)
|
||||
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(5,1,3)
|
||||
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(5,2,3)
|
||||
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(5,3,3)
|
||||
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(5,4,3)
|
||||
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(5,5,3)
|
||||
rhs_(1) = rhs_(1) - coeff * rhs_(5)
|
||||
coeff = lhs_(2,5,2)
|
||||
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(5,1,3)
|
||||
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(5,2,3)
|
||||
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(5,3,3)
|
||||
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(5,4,3)
|
||||
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(5,5,3)
|
||||
rhs_(2) = rhs_(2) - coeff * rhs_(5)
|
||||
coeff = lhs_(3,5,2)
|
||||
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(5,1,3)
|
||||
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(5,2,3)
|
||||
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(5,3,3)
|
||||
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(5,4,3)
|
||||
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(5,5,3)
|
||||
rhs_(3) = rhs_(3) - coeff * rhs_(5)
|
||||
coeff = lhs_(4,5,2)
|
||||
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(5,1,3)
|
||||
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(5,2,3)
|
||||
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(5,3,3)
|
||||
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(5,4,3)
|
||||
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(5,5,3)
|
||||
rhs_(4) = rhs_(4) - coeff * rhs_(5)
|
||||
do i__0 = 1,5
|
||||
lhs__(i__0,1,i,k,j1) = lhs_(i__0,1,3)
|
||||
lhs__(i__0,2,i,k,j1) = lhs_(i__0,2,3)
|
||||
lhs__(i__0,3,i,k,j1) = lhs_(i__0,3,3)
|
||||
lhs__(i__0,4,i,k,j1) = lhs_(i__0,4,3)
|
||||
lhs__(i__0,5,i,k,j1) = lhs_(i__0,5,3)
|
||||
enddo
|
||||
do m = 1,5
|
||||
rhs(m,i,j,k) = rhs_(m)
|
||||
u_(0,m) = u_(1,m)
|
||||
u_(1,m) = u_(2,m)
|
||||
enddo
|
||||
enddo
|
||||
do n = 1,5
|
||||
lhs_(1,n,1) = 0.0d0
|
||||
lhs_(1,n,2) = 0.0d0
|
||||
lhs_(1,n,3) = 0.0d0
|
||||
lhs_(2,n,1) = 0.0d0
|
||||
lhs_(2,n,2) = 0.0d0
|
||||
lhs_(2,n,3) = 0.0d0
|
||||
lhs_(3,n,1) = 0.0d0
|
||||
lhs_(3,n,2) = 0.0d0
|
||||
lhs_(3,n,3) = 0.0d0
|
||||
lhs_(4,n,1) = 0.0d0
|
||||
lhs_(4,n,2) = 0.0d0
|
||||
lhs_(4,n,3) = 0.0d0
|
||||
lhs_(5,n,1) = 0.0d0
|
||||
lhs_(5,n,2) = 0.0d0
|
||||
lhs_(5,n,3) = 0.0d0
|
||||
enddo
|
||||
do m = 1,5
|
||||
lhs_(m,m,2) = 1.0d0
|
||||
enddo
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs(m,i,j,k)
|
||||
enddo
|
||||
do i__0 = 1,5
|
||||
rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i,j,ksiz
|
||||
&e - 1) - lhs_(i__0,2,1) * rhs(2,i,j,ksize - 1) - lhs_(i__0,3,1) *
|
||||
&rhs(3,i,j,ksize - 1) - lhs_(i__0,4,1) * rhs(4,i,j,ksize - 1) - lhs
|
||||
&_(i__0,5,1) * rhs(5,i,j,ksize - 1)
|
||||
enddo
|
||||
do j__1 = 1,5
|
||||
lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1,3)
|
||||
lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1,3)
|
||||
lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1,3)
|
||||
lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1,3)
|
||||
lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1,3)
|
||||
enddo
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!
|
||||
!---------------------------------------------------------------------
|
||||
pivot__3 = 1.00d0 / lhs_(1,1,2)
|
||||
lhs_(1,2,2) = lhs_(1,2,2) * pivot__3
|
||||
lhs_(1,3,2) = lhs_(1,3,2) * pivot__3
|
||||
lhs_(1,4,2) = lhs_(1,4,2) * pivot__3
|
||||
lhs_(1,5,2) = lhs_(1,5,2) * pivot__3
|
||||
rhs_(1) = rhs_(1) * pivot__3
|
||||
coeff__2 = lhs_(2,1,2)
|
||||
lhs_(2,2,2) = lhs_(2,2,2) - coeff__2 * lhs_(1,2,2)
|
||||
lhs_(2,3,2) = lhs_(2,3,2) - coeff__2 * lhs_(1,3,2)
|
||||
lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(1,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(1,5,2)
|
||||
rhs_(2) = rhs_(2) - coeff__2 * rhs_(1)
|
||||
coeff__2 = lhs_(3,1,2)
|
||||
lhs_(3,2,2) = lhs_(3,2,2) - coeff__2 * lhs_(1,2,2)
|
||||
lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(1,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(1,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(1,5,2)
|
||||
rhs_(3) = rhs_(3) - coeff__2 * rhs_(1)
|
||||
coeff__2 = lhs_(4,1,2)
|
||||
lhs_(4,2,2) = lhs_(4,2,2) - coeff__2 * lhs_(1,2,2)
|
||||
lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(1,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(1,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(1,5,2)
|
||||
rhs_(4) = rhs_(4) - coeff__2 * rhs_(1)
|
||||
coeff__2 = lhs_(5,1,2)
|
||||
lhs_(5,2,2) = lhs_(5,2,2) - coeff__2 * lhs_(1,2,2)
|
||||
lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(1,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(1,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(1,5,2)
|
||||
rhs_(5) = rhs_(5) - coeff__2 * rhs_(1)
|
||||
pivot__3 = 1.00d0 / lhs_(2,2,2)
|
||||
lhs_(2,3,2) = lhs_(2,3,2) * pivot__3
|
||||
lhs_(2,4,2) = lhs_(2,4,2) * pivot__3
|
||||
lhs_(2,5,2) = lhs_(2,5,2) * pivot__3
|
||||
rhs_(2) = rhs_(2) * pivot__3
|
||||
coeff__2 = lhs_(1,2,2)
|
||||
lhs_(1,3,2) = lhs_(1,3,2) - coeff__2 * lhs_(2,3,2)
|
||||
lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(2,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(2,5,2)
|
||||
rhs_(1) = rhs_(1) - coeff__2 * rhs_(2)
|
||||
coeff__2 = lhs_(3,2,2)
|
||||
lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(2,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(2,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(2,5,2)
|
||||
rhs_(3) = rhs_(3) - coeff__2 * rhs_(2)
|
||||
coeff__2 = lhs_(4,2,2)
|
||||
lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(2,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(2,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(2,5,2)
|
||||
rhs_(4) = rhs_(4) - coeff__2 * rhs_(2)
|
||||
coeff__2 = lhs_(5,2,2)
|
||||
lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(2,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(2,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(2,5,2)
|
||||
rhs_(5) = rhs_(5) - coeff__2 * rhs_(2)
|
||||
pivot__3 = 1.00d0 / lhs_(3,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) * pivot__3
|
||||
lhs_(3,5,2) = lhs_(3,5,2) * pivot__3
|
||||
rhs_(3) = rhs_(3) * pivot__3
|
||||
coeff__2 = lhs_(1,3,2)
|
||||
lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(3,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(3,5,2)
|
||||
rhs_(1) = rhs_(1) - coeff__2 * rhs_(3)
|
||||
coeff__2 = lhs_(2,3,2)
|
||||
lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(3,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(3,5,2)
|
||||
rhs_(2) = rhs_(2) - coeff__2 * rhs_(3)
|
||||
coeff__2 = lhs_(4,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(3,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(3,5,2)
|
||||
rhs_(4) = rhs_(4) - coeff__2 * rhs_(3)
|
||||
coeff__2 = lhs_(5,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(3,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(3,5,2)
|
||||
rhs_(5) = rhs_(5) - coeff__2 * rhs_(3)
|
||||
pivot__3 = 1.00d0 / lhs_(4,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) * pivot__3
|
||||
rhs_(4) = rhs_(4) * pivot__3
|
||||
coeff__2 = lhs_(1,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(4,5,2)
|
||||
rhs_(1) = rhs_(1) - coeff__2 * rhs_(4)
|
||||
coeff__2 = lhs_(2,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(4,5,2)
|
||||
rhs_(2) = rhs_(2) - coeff__2 * rhs_(4)
|
||||
coeff__2 = lhs_(3,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(4,5,2)
|
||||
rhs_(3) = rhs_(3) - coeff__2 * rhs_(4)
|
||||
coeff__2 = lhs_(5,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(4,5,2)
|
||||
rhs_(5) = rhs_(5) - coeff__2 * rhs_(4)
|
||||
pivot__3 = 1.00d0 / lhs_(5,5,2)
|
||||
rhs_(5) = rhs_(5) * pivot__3
|
||||
coeff__2 = lhs_(1,5,2)
|
||||
rhs_(1) = rhs_(1) - coeff__2 * rhs_(5)
|
||||
coeff__2 = lhs_(2,5,2)
|
||||
rhs_(2) = rhs_(2) - coeff__2 * rhs_(5)
|
||||
coeff__2 = lhs_(3,5,2)
|
||||
rhs_(3) = rhs_(3) - coeff__2 * rhs_(5)
|
||||
coeff__2 = lhs_(4,5,2)
|
||||
rhs_(4) = rhs_(4) - coeff__2 * rhs_(5)
|
||||
do m = 1,5
|
||||
rhs(m,i,j,k) = rhs_(m)
|
||||
enddo
|
||||
|
||||
k = ksize-1
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs(m,i,j,k)
|
||||
rhsp_(m) = rhs(m,i,j,k + 1)
|
||||
enddo
|
||||
do k = ksize-1, 1, (-(1))
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs(m,i,j,k)
|
||||
enddo
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - lhs__(m,1,i,k,j1) * rhsp_(1)
|
||||
rhs_(m) = rhs_(m) - lhs__(m,2,i,k,j1) * rhsp_(2)
|
||||
rhs_(m) = rhs_(m) - lhs__(m,3,i,k,j1) * rhsp_(3)
|
||||
rhs_(m) = rhs_(m) - lhs__(m,4,i,k,j1) * rhsp_(4)
|
||||
rhs_(m) = rhs_(m) - lhs__(m,5,i,k,j1) * rhsp_(5)
|
||||
enddo
|
||||
do m = 1,5
|
||||
rhsp_(m) = rhs_(m)
|
||||
u(m,i,j,k) = u(m,i,j,k) + rhs_(m)
|
||||
enddo
|
||||
enddo
|
||||
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ end region
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,640 @@
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! performs guaussian elimination on this cell.
|
||||
!
|
||||
! assumes that unpacking routines for non-first cells
|
||||
! preload C' and rhs' from previous cell.
|
||||
!
|
||||
! assumed send happens outside this routine, but that
|
||||
! c'(KMAX) and rhs'(KMAX) will be sent to next cell.
|
||||
!---------------------------------------------------------------------
|
||||
subroutine z_solve ()
|
||||
|
||||
include 'header3d.h'
|
||||
double precision coeff
|
||||
double precision pivot
|
||||
integer i__0
|
||||
integer j__1,m,n,zst
|
||||
double precision coeff__2
|
||||
double precision pivot__3
|
||||
double precision lhs_(5,5,3),rhs_(5),rhsp_(5),u_(0:3,5)
|
||||
integer i,j,k,ksize, k1
|
||||
ksize = problem_size - 1
|
||||
zst = ksize
|
||||
!DVM$ region local(lhs__)
|
||||
|
||||
!DVM$ PARALLEL (k,j,i) ON rhs(*,i,j,k), private(u_,rhs_,pivot,
|
||||
!DVM$& coeff, tmp1, tmp2, tmp3, t1, t2, t3, tm1, tm2, tm3,
|
||||
!DVM$& tmp11, tmp22, i__0, j__1,lhs_,n,m,pivot__3,coeff__2),
|
||||
!DVM$& stage(stage_n)
|
||||
!DVM$& ,ACROSS(rhs(0:0,0:0,0:0,1:0),lhs__(0:0,0:0,0:0,0:0,1:0))
|
||||
do k = 1, problem_size - 1
|
||||
do j = 1,problem_size - 2
|
||||
do i = 1,problem_size - 2
|
||||
|
||||
if( k .ne. problem_size - 1) then
|
||||
do m = 1,5
|
||||
lhs_(m,1,3) = lhs__(m,1,i,j,k-1)
|
||||
lhs_(m,2,3) = lhs__(m,2,i,j,k-1)
|
||||
lhs_(m,3,3) = lhs__(m,3,i,j,k-1)
|
||||
lhs_(m,4,3) = lhs__(m,4,i,j,k-1)
|
||||
lhs_(m,5,3) = lhs__(m,5,i,j,k-1)
|
||||
|
||||
u_(0,m) = u(m,i,j,k - 1)
|
||||
u_(1,m) = u(m,i,j,k)
|
||||
u_(2,m) = u(m,i,j,k + 1)
|
||||
enddo
|
||||
tmp1 = 1.0d+00 / u_(1,1)
|
||||
tmp2 = tmp1 * tmp1
|
||||
tmp3 = tmp1 * tmp2
|
||||
t1 = 1.0d+00 / u_(0,1)
|
||||
t2 = t1 * t1
|
||||
t3 = t1 * t2
|
||||
tm1 = 1.0d+00 / u_(2,1)
|
||||
tm2 = tm1 * tm1
|
||||
tm3 = tm1 * tm2
|
||||
tmp11 = dt * tz1
|
||||
tmp22 = dt * tz2
|
||||
lhs_(1,1,1) = (-(tmp11)) * dz1
|
||||
lhs_(1,2,1) = 0.
|
||||
lhs_(1,3,1) = 0.
|
||||
lhs_(1,4,1) = (-(tmp22))
|
||||
lhs_(1,5,1) = 0.
|
||||
lhs_(2,1,1) = (-(tmp22)) * ((-(u_(0,2) * u_(0,4))) * t2)
|
||||
&- tmp11 * ((-(c3c4)) * t2 * u_(0,2))
|
||||
lhs_(2,2,1) = (-(tmp22)) * u_(0,4) * t1 - tmp11 * c3c4 *
|
||||
&t1 - tmp11 * dz2
|
||||
lhs_(2,3,1) = 0.
|
||||
lhs_(2,4,1) = (-(tmp22)) * u_(0,2) * t1
|
||||
lhs_(2,5,1) = 0.
|
||||
lhs_(3,1,1) = (-(tmp22)) * ((-(u_(0,3) * u_(0,4))) * t2)
|
||||
&- tmp11 * ((-(c3c4)) * t2 * u_(0,3))
|
||||
lhs_(3,2,1) = 0.
|
||||
lhs_(3,3,1) = (-(tmp22)) * u_(0,4) * t1 - tmp11 * c3c4 *
|
||||
&t1 - tmp11 * dz3
|
||||
lhs_(3,4,1) = (-(tmp22)) * u_(0,3) * t1
|
||||
lhs_(3,5,1) = 0.
|
||||
lhs_(4,1,1) = (-(tmp22)) * ((-(u_(0,4) * u_(0,4) * t2)) +
|
||||
& 0.50d+00 * c2 * ((u_(0,2) * u_(0,2) + u_(0,3) * u_(0,3) + u_(0,4)
|
||||
& * u_(0,4)) * t2)) - tmp11 * ((-(con43)) * c3c4 * t2 * u_(0,4))
|
||||
lhs_(4,2,1) = (-(tmp22)) * ((-(c2)) * u_(0,2) * t1)
|
||||
lhs_(4,3,1) = (-(tmp22)) * ((-(c2)) * u_(0,3) * t1)
|
||||
lhs_(4,4,1) = (-(tmp22)) * (2.0d+00 - c2) * u_(0,4) * t1
|
||||
&- tmp11 * con43 * c3 * c4 * t1 - tmp11 * dz4
|
||||
lhs_(4,5,1) = (-(tmp22)) * c2
|
||||
lhs_(5,1,1) = (-(tmp22)) * ((c2 * (u_(0,2) * u_(0,2) + u_
|
||||
&(0,3) * u_(0,3) + u_(0,4) * u_(0,4)) * t2 - c1 * (u_(0,5) * t1)) *
|
||||
& (u_(0,4) * t1)) - tmp11 * ((-(c3c4 - c1345)) * t3 * u_(0,2)** 2 -
|
||||
& (c3c4 - c1345) * t3 * u_(0,3)** 2 - (con43 * c3c4 - c1345) * t3 *
|
||||
& u_(0,4)** 2 - c1345 * t2 * u_(0,5))
|
||||
lhs_(5,2,1) = (-(tmp22)) * ((-(c2)) * (u_(0,2) * u_(0,4))
|
||||
& * t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,2)
|
||||
lhs_(5,3,1) = (-(tmp22)) * ((-(c2)) * (u_(0,3) * u_(0,4))
|
||||
& * t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,3)
|
||||
lhs_(5,4,1) = (-(tmp22)) * (c1 * (u_(0,5) * t1) - 0.50d+0
|
||||
&0 * c2 * ((u_(0,2) * u_(0,2) + u_(0,3) * u_(0,3) + 3.0d+00 * u_(0,
|
||||
&4) * u_(0,4)) * t2)) - tmp11 * (con43 * c3c4 - c1345) * t2 * u_(0,
|
||||
&4)
|
||||
lhs_(5,5,1) = (-(tmp22)) * c1 * u_(0,4) * t1 - tmp11 * c1
|
||||
&345 * t1 - tmp11 * dz5
|
||||
lhs_(1,1,2) = 1.0d+00 + tmp11 * 2.0d+00 * dz1
|
||||
lhs_(1,2,2) = 0.
|
||||
lhs_(1,3,2) = 0.
|
||||
lhs_(1,4,2) = 0.
|
||||
lhs_(1,5,2) = 0.
|
||||
lhs_(2,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1,
|
||||
&2))
|
||||
lhs_(2,2,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t
|
||||
&mp11 * 2.0d+00 * dz2
|
||||
lhs_(2,3,2) = 0.
|
||||
lhs_(2,4,2) = 0.
|
||||
lhs_(2,5,2) = 0.
|
||||
lhs_(3,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1,
|
||||
&3))
|
||||
lhs_(3,2,2) = 0.
|
||||
lhs_(3,3,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t
|
||||
&mp11 * 2.0d+00 * dz3
|
||||
lhs_(3,4,2) = 0.
|
||||
lhs_(3,5,2) = 0.
|
||||
lhs_(4,1,2) = tmp11 * 2.0d+00 * ((-(con43)) * c3c4 * tmp2
|
||||
& * u_(1,4))
|
||||
lhs_(4,2,2) = 0.
|
||||
lhs_(4,3,2) = 0.
|
||||
lhs_(4,4,2) = 1.0d+00 + tmp11 * 2.0d+00 * con43 * c3 * c4
|
||||
& * tmp1 + tmp11 * 2.0d+00 * dz4
|
||||
lhs_(4,5,2) = 0.
|
||||
lhs_(5,1,2) = tmp11 * 2.0d+00 * ((-(c3c4 - c1345)) * tmp3
|
||||
& * u_(1,2)** 2 - (c3c4 - c1345) * tmp3 * u_(1,3)** 2 - (con43 * c3
|
||||
&c4 - c1345) * tmp3 * u_(1,4)** 2 - c1345 * tmp2 * u_(1,5))
|
||||
lhs_(5,2,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u
|
||||
&_(1,2)
|
||||
lhs_(5,3,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u
|
||||
&_(1,3)
|
||||
lhs_(5,4,2) = tmp11 * 2.0d+00 * (con43 * c3c4 - c1345) *
|
||||
&tmp2 * u_(1,4)
|
||||
lhs_(5,5,2) = 1.0d+00 + tmp11 * 2.0d+00 * c1345 * tmp1 +
|
||||
&tmp11 * 2.0d+00 * dz5
|
||||
if (k .ne. 1) then
|
||||
do j__1 = 1,5
|
||||
lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs
|
||||
&_(1,j__1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,
|
||||
&j__1,3) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1
|
||||
&,3)
|
||||
enddo
|
||||
endif
|
||||
lhs_(1,1,3) = (-(tmp11)) * dz1
|
||||
lhs_(1,2,3) = 0.
|
||||
lhs_(1,3,3) = 0.
|
||||
lhs_(1,4,3) = tmp22
|
||||
lhs_(1,5,3) = 0.
|
||||
lhs_(2,1,3) = tmp22 * ((-(u_(2,2) * u_(2,4))) * tm2) - tm
|
||||
&p11 * ((-(c3c4)) * tm2 * u_(2,2))
|
||||
lhs_(2,2,3) = tmp22 * u_(2,4) * tm1 - tmp11 * c3c4 * tm1
|
||||
&- tmp11 * dz2
|
||||
lhs_(2,3,3) = 0.
|
||||
lhs_(2,4,3) = tmp22 * u_(2,2) * tm1
|
||||
lhs_(2,5,3) = 0.
|
||||
lhs_(3,1,3) = tmp22 * ((-(u_(2,3) * u_(2,4))) * tm2) - tm
|
||||
&p11 * ((-(c3c4)) * tm2 * u_(2,3))
|
||||
lhs_(3,2,3) = 0.
|
||||
lhs_(3,3,3) = tmp22 * u_(2,4) * tm1 - tmp11 * c3c4 * tm1
|
||||
&- tmp11 * dz3
|
||||
lhs_(3,4,3) = tmp22 * u_(2,3) * tm1
|
||||
lhs_(3,5,3) = 0.
|
||||
lhs_(4,1,3) = tmp22 * ((-(u_(2,4) * u_(2,4) * tm2)) + 0.5
|
||||
&0d+00 * c2 * ((u_(2,2) * u_(2,2) + u_(2,3) * u_(2,3) + u_(2,4) * u
|
||||
&_(2,4)) * tm2)) - tmp11 * ((-(con43)) * c3c4 * tm2 * u_(2,4))
|
||||
lhs_(4,2,3) = tmp22 * ((-(c2)) * u_(2,2) * tm1)
|
||||
lhs_(4,3,3) = tmp22 * ((-(c2)) * u_(2,3) * tm1)
|
||||
lhs_(4,4,3) = tmp22 * (2.0d+00 - c2) * u_(2,4) * tm1 - tm
|
||||
&p11 * con43 * c3 * c4 * tm1 - tmp11 * dz4
|
||||
lhs_(4,5,3) = tmp22 * c2
|
||||
lhs_(5,1,3) = tmp22 * ((c2 * (u_(2,2) * u_(2,2) + u_(2,3)
|
||||
& * u_(2,3) + u_(2,4) * u_(2,4)) * tm2 - c1 * (u_(2,5) * tm1)) * (u
|
||||
&_(2,4) * tm1)) - tmp11 * ((-(c3c4 - c1345)) * tm3 * u_(2,2)** 2 -
|
||||
&(c3c4 - c1345) * tm3 * u_(2,3)** 2 - (con43 * c3c4 - c1345) * tm3
|
||||
&* u_(2,4)** 2 - c1345 * tm2 * u_(2,5))
|
||||
lhs_(5,2,3) = tmp22 * ((-(c2)) * (u_(2,2) * u_(2,4)) * tm
|
||||
&2) - tmp11 * (c3c4 - c1345) * tm2 * u_(2,2)
|
||||
lhs_(5,3,3) = tmp22 * ((-(c2)) * (u_(2,3) * u_(2,4)) * tm
|
||||
&2) - tmp11 * (c3c4 - c1345) * tm2 * u_(2,3)
|
||||
lhs_(5,4,3) = tmp22 * (c1 * (u_(2,5) * tm1) - 0.50d+00 *
|
||||
&c2 * ((u_(2,2) * u_(2,2) + u_(2,3) * u_(2,3) + 3.0d+00 * u_(2,4) *
|
||||
& u_(2,4)) * tm2)) - tmp11 * (con43 * c3c4 - c1345) * tm2 * u_(2,4)
|
||||
lhs_(5,5,3) = tmp22 * c1 * u_(2,4) * tm1 - tmp11 * c1345
|
||||
&* tm1 - tmp11 * dz5
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs(m,i,j,k)
|
||||
enddo
|
||||
do i__0 = 1,5
|
||||
rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i,j,k
|
||||
& - 1) - lhs_(i__0,2,1) * rhs(2,i,j,k - 1) - lhs_(i__0,3,1) * rhs(3
|
||||
&,i,j,k - 1) - lhs_(i__0,4,1) * rhs(4,i,j,k - 1) - lhs_(i__0,5,1) *
|
||||
& rhs(5,i,j,k - 1)
|
||||
enddo
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!
|
||||
!---------------------------------------------------------------------
|
||||
pivot = 1.00d0 / lhs_(1,1,2)
|
||||
lhs_(1,2,2) = lhs_(1,2,2) * pivot
|
||||
lhs_(1,3,2) = lhs_(1,3,2) * pivot
|
||||
lhs_(1,4,2) = lhs_(1,4,2) * pivot
|
||||
lhs_(1,5,2) = lhs_(1,5,2) * pivot
|
||||
lhs_(1,1,3) = lhs_(1,1,3) * pivot
|
||||
lhs_(1,2,3) = lhs_(1,2,3) * pivot
|
||||
lhs_(1,3,3) = lhs_(1,3,3) * pivot
|
||||
lhs_(1,4,3) = lhs_(1,4,3) * pivot
|
||||
lhs_(1,5,3) = lhs_(1,5,3) * pivot
|
||||
rhs_(1) = rhs_(1) * pivot
|
||||
coeff = lhs_(2,1,2)
|
||||
lhs_(2,2,2) = lhs_(2,2,2) - coeff * lhs_(1,2,2)
|
||||
lhs_(2,3,2) = lhs_(2,3,2) - coeff * lhs_(1,3,2)
|
||||
lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(1,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(1,5,2)
|
||||
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(1,1,3)
|
||||
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(1,2,3)
|
||||
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(1,3,3)
|
||||
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(1,4,3)
|
||||
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(1,5,3)
|
||||
rhs_(2) = rhs_(2) - coeff * rhs_(1)
|
||||
coeff = lhs_(3,1,2)
|
||||
lhs_(3,2,2) = lhs_(3,2,2) - coeff * lhs_(1,2,2)
|
||||
lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(1,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(1,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(1,5,2)
|
||||
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(1,1,3)
|
||||
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(1,2,3)
|
||||
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(1,3,3)
|
||||
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(1,4,3)
|
||||
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(1,5,3)
|
||||
rhs_(3) = rhs_(3) - coeff * rhs_(1)
|
||||
coeff = lhs_(4,1,2)
|
||||
lhs_(4,2,2) = lhs_(4,2,2) - coeff * lhs_(1,2,2)
|
||||
lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(1,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(1,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(1,5,2)
|
||||
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(1,1,3)
|
||||
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(1,2,3)
|
||||
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(1,3,3)
|
||||
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(1,4,3)
|
||||
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(1,5,3)
|
||||
rhs_(4) = rhs_(4) - coeff * rhs_(1)
|
||||
coeff = lhs_(5,1,2)
|
||||
lhs_(5,2,2) = lhs_(5,2,2) - coeff * lhs_(1,2,2)
|
||||
lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(1,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(1,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(1,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(1,1,3)
|
||||
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(1,2,3)
|
||||
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(1,3,3)
|
||||
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(1,4,3)
|
||||
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(1,5,3)
|
||||
rhs_(5) = rhs_(5) - coeff * rhs_(1)
|
||||
pivot = 1.00d0 / lhs_(2,2,2)
|
||||
lhs_(2,3,2) = lhs_(2,3,2) * pivot
|
||||
lhs_(2,4,2) = lhs_(2,4,2) * pivot
|
||||
lhs_(2,5,2) = lhs_(2,5,2) * pivot
|
||||
lhs_(2,1,3) = lhs_(2,1,3) * pivot
|
||||
lhs_(2,2,3) = lhs_(2,2,3) * pivot
|
||||
lhs_(2,3,3) = lhs_(2,3,3) * pivot
|
||||
lhs_(2,4,3) = lhs_(2,4,3) * pivot
|
||||
lhs_(2,5,3) = lhs_(2,5,3) * pivot
|
||||
rhs_(2) = rhs_(2) * pivot
|
||||
coeff = lhs_(1,2,2)
|
||||
lhs_(1,3,2) = lhs_(1,3,2) - coeff * lhs_(2,3,2)
|
||||
lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(2,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(2,5,2)
|
||||
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(2,1,3)
|
||||
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(2,2,3)
|
||||
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(2,3,3)
|
||||
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(2,4,3)
|
||||
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(2,5,3)
|
||||
rhs_(1) = rhs_(1) - coeff * rhs_(2)
|
||||
coeff = lhs_(3,2,2)
|
||||
lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(2,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(2,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(2,5,2)
|
||||
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(2,1,3)
|
||||
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(2,2,3)
|
||||
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(2,3,3)
|
||||
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(2,4,3)
|
||||
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(2,5,3)
|
||||
rhs_(3) = rhs_(3) - coeff * rhs_(2)
|
||||
coeff = lhs_(4,2,2)
|
||||
lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(2,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(2,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(2,5,2)
|
||||
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(2,1,3)
|
||||
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(2,2,3)
|
||||
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(2,3,3)
|
||||
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(2,4,3)
|
||||
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(2,5,3)
|
||||
rhs_(4) = rhs_(4) - coeff * rhs_(2)
|
||||
coeff = lhs_(5,2,2)
|
||||
lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(2,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(2,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(2,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(2,1,3)
|
||||
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(2,2,3)
|
||||
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(2,3,3)
|
||||
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(2,4,3)
|
||||
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(2,5,3)
|
||||
rhs_(5) = rhs_(5) - coeff * rhs_(2)
|
||||
pivot = 1.00d0 / lhs_(3,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) * pivot
|
||||
lhs_(3,5,2) = lhs_(3,5,2) * pivot
|
||||
lhs_(3,1,3) = lhs_(3,1,3) * pivot
|
||||
lhs_(3,2,3) = lhs_(3,2,3) * pivot
|
||||
lhs_(3,3,3) = lhs_(3,3,3) * pivot
|
||||
lhs_(3,4,3) = lhs_(3,4,3) * pivot
|
||||
lhs_(3,5,3) = lhs_(3,5,3) * pivot
|
||||
rhs_(3) = rhs_(3) * pivot
|
||||
coeff = lhs_(1,3,2)
|
||||
lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(3,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(3,5,2)
|
||||
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(3,1,3)
|
||||
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(3,2,3)
|
||||
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(3,3,3)
|
||||
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(3,4,3)
|
||||
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(3,5,3)
|
||||
rhs_(1) = rhs_(1) - coeff * rhs_(3)
|
||||
coeff = lhs_(2,3,2)
|
||||
lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(3,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(3,5,2)
|
||||
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(3,1,3)
|
||||
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(3,2,3)
|
||||
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(3,3,3)
|
||||
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(3,4,3)
|
||||
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(3,5,3)
|
||||
rhs_(2) = rhs_(2) - coeff * rhs_(3)
|
||||
coeff = lhs_(4,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(3,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(3,5,2)
|
||||
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(3,1,3)
|
||||
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(3,2,3)
|
||||
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(3,3,3)
|
||||
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(3,4,3)
|
||||
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(3,5,3)
|
||||
rhs_(4) = rhs_(4) - coeff * rhs_(3)
|
||||
coeff = lhs_(5,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(3,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(3,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(3,1,3)
|
||||
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(3,2,3)
|
||||
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(3,3,3)
|
||||
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(3,4,3)
|
||||
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(3,5,3)
|
||||
rhs_(5) = rhs_(5) - coeff * rhs_(3)
|
||||
pivot = 1.00d0 / lhs_(4,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) * pivot
|
||||
lhs_(4,1,3) = lhs_(4,1,3) * pivot
|
||||
lhs_(4,2,3) = lhs_(4,2,3) * pivot
|
||||
lhs_(4,3,3) = lhs_(4,3,3) * pivot
|
||||
lhs_(4,4,3) = lhs_(4,4,3) * pivot
|
||||
lhs_(4,5,3) = lhs_(4,5,3) * pivot
|
||||
rhs_(4) = rhs_(4) * pivot
|
||||
coeff = lhs_(1,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(4,5,2)
|
||||
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(4,1,3)
|
||||
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(4,2,3)
|
||||
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(4,3,3)
|
||||
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(4,4,3)
|
||||
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(4,5,3)
|
||||
rhs_(1) = rhs_(1) - coeff * rhs_(4)
|
||||
coeff = lhs_(2,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(4,5,2)
|
||||
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(4,1,3)
|
||||
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(4,2,3)
|
||||
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(4,3,3)
|
||||
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(4,4,3)
|
||||
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(4,5,3)
|
||||
rhs_(2) = rhs_(2) - coeff * rhs_(4)
|
||||
coeff = lhs_(3,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(4,5,2)
|
||||
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(4,1,3)
|
||||
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(4,2,3)
|
||||
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(4,3,3)
|
||||
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(4,4,3)
|
||||
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(4,5,3)
|
||||
rhs_(3) = rhs_(3) - coeff * rhs_(4)
|
||||
coeff = lhs_(5,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(4,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(4,1,3)
|
||||
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(4,2,3)
|
||||
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(4,3,3)
|
||||
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(4,4,3)
|
||||
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(4,5,3)
|
||||
rhs_(5) = rhs_(5) - coeff * rhs_(4)
|
||||
pivot = 1.00d0 / lhs_(5,5,2)
|
||||
lhs_(5,1,3) = lhs_(5,1,3) * pivot
|
||||
lhs_(5,2,3) = lhs_(5,2,3) * pivot
|
||||
lhs_(5,3,3) = lhs_(5,3,3) * pivot
|
||||
lhs_(5,4,3) = lhs_(5,4,3) * pivot
|
||||
lhs_(5,5,3) = lhs_(5,5,3) * pivot
|
||||
rhs_(5) = rhs_(5) * pivot
|
||||
coeff = lhs_(1,5,2)
|
||||
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(5,1,3)
|
||||
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(5,2,3)
|
||||
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(5,3,3)
|
||||
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(5,4,3)
|
||||
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(5,5,3)
|
||||
rhs_(1) = rhs_(1) - coeff * rhs_(5)
|
||||
coeff = lhs_(2,5,2)
|
||||
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(5,1,3)
|
||||
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(5,2,3)
|
||||
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(5,3,3)
|
||||
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(5,4,3)
|
||||
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(5,5,3)
|
||||
rhs_(2) = rhs_(2) - coeff * rhs_(5)
|
||||
coeff = lhs_(3,5,2)
|
||||
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(5,1,3)
|
||||
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(5,2,3)
|
||||
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(5,3,3)
|
||||
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(5,4,3)
|
||||
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(5,5,3)
|
||||
rhs_(3) = rhs_(3) - coeff * rhs_(5)
|
||||
coeff = lhs_(4,5,2)
|
||||
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(5,1,3)
|
||||
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(5,2,3)
|
||||
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(5,3,3)
|
||||
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(5,4,3)
|
||||
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(5,5,3)
|
||||
rhs_(4) = rhs_(4) - coeff * rhs_(5)
|
||||
do i__0 = 1,5
|
||||
lhs__(i__0,1,i,j,k) = lhs_(i__0,1,3)
|
||||
lhs__(i__0,2,i,j,k) = lhs_(i__0,2,3)
|
||||
lhs__(i__0,3,i,j,k) = lhs_(i__0,3,3)
|
||||
lhs__(i__0,4,i,j,k) = lhs_(i__0,4,3)
|
||||
lhs__(i__0,5,i,j,k) = lhs_(i__0,5,3)
|
||||
enddo
|
||||
do m = 1,5
|
||||
rhs(m,i,j,k) = rhs_(m)
|
||||
enddo
|
||||
|
||||
|
||||
else !! of big IF(k .ne. lastIter)
|
||||
|
||||
do n = 1,5
|
||||
lhs_(1,n,1) = 0.0d0
|
||||
lhs_(1,n,2) = 0.0d0
|
||||
lhs_(1,n,3) = 0.0d0
|
||||
lhs_(2,n,1) = 0.0d0
|
||||
lhs_(2,n,2) = 0.0d0
|
||||
lhs_(2,n,3) = 0.0d0
|
||||
lhs_(3,n,1) = 0.0d0
|
||||
lhs_(3,n,2) = 0.0d0
|
||||
lhs_(3,n,3) = 0.0d0
|
||||
lhs_(4,n,1) = 0.0d0
|
||||
lhs_(4,n,2) = 0.0d0
|
||||
lhs_(4,n,3) = 0.0d0
|
||||
lhs_(5,n,1) = 0.0d0
|
||||
lhs_(5,n,2) = 0.0d0
|
||||
lhs_(5,n,3) = 0.0d0
|
||||
enddo
|
||||
do m = 1,5
|
||||
lhs_(m,m,2) = 1.0d0
|
||||
enddo
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs(m,i,j,k)
|
||||
enddo
|
||||
do i__0 = 1,5
|
||||
rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i,j,ksiz
|
||||
&e - 1) - lhs_(i__0,2,1) * rhs(2,i,j,ksize - 1) - lhs_(i__0,3,1) *
|
||||
&rhs(3,i,j,ksize - 1) - lhs_(i__0,4,1) * rhs(4,i,j,ksize - 1) - lhs
|
||||
&_(i__0,5,1) * rhs(5,i,j,ksize - 1)
|
||||
enddo
|
||||
do j__1 = 1,5
|
||||
lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1,3)
|
||||
lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1,3)
|
||||
lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1,3)
|
||||
lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1,3)
|
||||
lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs_(1,j_
|
||||
&_1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,j__1,3
|
||||
&) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1,3)
|
||||
enddo
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!
|
||||
!---------------------------------------------------------------------
|
||||
pivot__3 = 1.00d0 / lhs_(1,1,2)
|
||||
lhs_(1,2,2) = lhs_(1,2,2) * pivot__3
|
||||
lhs_(1,3,2) = lhs_(1,3,2) * pivot__3
|
||||
lhs_(1,4,2) = lhs_(1,4,2) * pivot__3
|
||||
lhs_(1,5,2) = lhs_(1,5,2) * pivot__3
|
||||
rhs_(1) = rhs_(1) * pivot__3
|
||||
coeff__2 = lhs_(2,1,2)
|
||||
lhs_(2,2,2) = lhs_(2,2,2) - coeff__2 * lhs_(1,2,2)
|
||||
lhs_(2,3,2) = lhs_(2,3,2) - coeff__2 * lhs_(1,3,2)
|
||||
lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(1,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(1,5,2)
|
||||
rhs_(2) = rhs_(2) - coeff__2 * rhs_(1)
|
||||
coeff__2 = lhs_(3,1,2)
|
||||
lhs_(3,2,2) = lhs_(3,2,2) - coeff__2 * lhs_(1,2,2)
|
||||
lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(1,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(1,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(1,5,2)
|
||||
rhs_(3) = rhs_(3) - coeff__2 * rhs_(1)
|
||||
coeff__2 = lhs_(4,1,2)
|
||||
lhs_(4,2,2) = lhs_(4,2,2) - coeff__2 * lhs_(1,2,2)
|
||||
lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(1,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(1,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(1,5,2)
|
||||
rhs_(4) = rhs_(4) - coeff__2 * rhs_(1)
|
||||
coeff__2 = lhs_(5,1,2)
|
||||
lhs_(5,2,2) = lhs_(5,2,2) - coeff__2 * lhs_(1,2,2)
|
||||
lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(1,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(1,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(1,5,2)
|
||||
rhs_(5) = rhs_(5) - coeff__2 * rhs_(1)
|
||||
pivot__3 = 1.00d0 / lhs_(2,2,2)
|
||||
lhs_(2,3,2) = lhs_(2,3,2) * pivot__3
|
||||
lhs_(2,4,2) = lhs_(2,4,2) * pivot__3
|
||||
lhs_(2,5,2) = lhs_(2,5,2) * pivot__3
|
||||
rhs_(2) = rhs_(2) * pivot__3
|
||||
coeff__2 = lhs_(1,2,2)
|
||||
lhs_(1,3,2) = lhs_(1,3,2) - coeff__2 * lhs_(2,3,2)
|
||||
lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(2,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(2,5,2)
|
||||
rhs_(1) = rhs_(1) - coeff__2 * rhs_(2)
|
||||
coeff__2 = lhs_(3,2,2)
|
||||
lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(2,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(2,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(2,5,2)
|
||||
rhs_(3) = rhs_(3) - coeff__2 * rhs_(2)
|
||||
coeff__2 = lhs_(4,2,2)
|
||||
lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(2,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(2,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(2,5,2)
|
||||
rhs_(4) = rhs_(4) - coeff__2 * rhs_(2)
|
||||
coeff__2 = lhs_(5,2,2)
|
||||
lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(2,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(2,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(2,5,2)
|
||||
rhs_(5) = rhs_(5) - coeff__2 * rhs_(2)
|
||||
pivot__3 = 1.00d0 / lhs_(3,3,2)
|
||||
lhs_(3,4,2) = lhs_(3,4,2) * pivot__3
|
||||
lhs_(3,5,2) = lhs_(3,5,2) * pivot__3
|
||||
rhs_(3) = rhs_(3) * pivot__3
|
||||
coeff__2 = lhs_(1,3,2)
|
||||
lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(3,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(3,5,2)
|
||||
rhs_(1) = rhs_(1) - coeff__2 * rhs_(3)
|
||||
coeff__2 = lhs_(2,3,2)
|
||||
lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(3,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(3,5,2)
|
||||
rhs_(2) = rhs_(2) - coeff__2 * rhs_(3)
|
||||
coeff__2 = lhs_(4,3,2)
|
||||
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(3,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(3,5,2)
|
||||
rhs_(4) = rhs_(4) - coeff__2 * rhs_(3)
|
||||
coeff__2 = lhs_(5,3,2)
|
||||
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(3,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(3,5,2)
|
||||
rhs_(5) = rhs_(5) - coeff__2 * rhs_(3)
|
||||
pivot__3 = 1.00d0 / lhs_(4,4,2)
|
||||
lhs_(4,5,2) = lhs_(4,5,2) * pivot__3
|
||||
rhs_(4) = rhs_(4) * pivot__3
|
||||
coeff__2 = lhs_(1,4,2)
|
||||
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(4,5,2)
|
||||
rhs_(1) = rhs_(1) - coeff__2 * rhs_(4)
|
||||
coeff__2 = lhs_(2,4,2)
|
||||
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(4,5,2)
|
||||
rhs_(2) = rhs_(2) - coeff__2 * rhs_(4)
|
||||
coeff__2 = lhs_(3,4,2)
|
||||
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(4,5,2)
|
||||
rhs_(3) = rhs_(3) - coeff__2 * rhs_(4)
|
||||
coeff__2 = lhs_(5,4,2)
|
||||
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(4,5,2)
|
||||
rhs_(5) = rhs_(5) - coeff__2 * rhs_(4)
|
||||
pivot__3 = 1.00d0 / lhs_(5,5,2)
|
||||
rhs_(5) = rhs_(5) * pivot__3
|
||||
coeff__2 = lhs_(1,5,2)
|
||||
rhs_(1) = rhs_(1) - coeff__2 * rhs_(5)
|
||||
coeff__2 = lhs_(2,5,2)
|
||||
rhs_(2) = rhs_(2) - coeff__2 * rhs_(5)
|
||||
coeff__2 = lhs_(3,5,2)
|
||||
rhs_(3) = rhs_(3) - coeff__2 * rhs_(5)
|
||||
coeff__2 = lhs_(4,5,2)
|
||||
rhs_(4) = rhs_(4) - coeff__2 * rhs_(5)
|
||||
do m = 1,5
|
||||
rhs(m,i,j,k) = rhs_(m)
|
||||
enddo
|
||||
endif
|
||||
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!DVM$ PARALLEL (k,j,i) ON rhs(*,i,j,k), private(rhs_,rhsp_,m)
|
||||
!DVM$& ,ACROSS(rhs(0:0,0:0,0:0,0:1)),stage(stage_n)
|
||||
do k = problem_size-2, 1, (-(1))
|
||||
do j = 1,problem_size - 2
|
||||
do i = 1,problem_size - 2
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs(m,i,j,k)
|
||||
rhsp_(m) = rhs(m,i,j,k + 1)
|
||||
enddo
|
||||
|
||||
do m = 1,5
|
||||
rhs_(m) = rhs_(m) - lhs__(m,1,i,j,k) * rhsp_(1)
|
||||
rhs_(m) = rhs_(m) - lhs__(m,2,i,j,k) * rhsp_(2)
|
||||
rhs_(m) = rhs_(m) - lhs__(m,3,i,j,k) * rhsp_(3)
|
||||
rhs_(m) = rhs_(m) - lhs__(m,4,i,j,k) * rhsp_(4)
|
||||
rhs_(m) = rhs_(m) - lhs__(m,5,i,j,k) * rhsp_(5)
|
||||
enddo
|
||||
|
||||
do m = 1,5
|
||||
rhs(m,i,j,k) = rhs_(m)
|
||||
u(m,i,j,k) = u(m,i,j,k) + rhs_(m)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ end region
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,21 @@
|
||||
SHELL=/bin/sh
|
||||
BENCHMARK=cg
|
||||
BENCHMARKU=CG
|
||||
|
||||
include ../config/make.def
|
||||
include ../sys/make.common
|
||||
|
||||
SOURCES = cg.fdv
|
||||
|
||||
OBJS = ${SOURCES:.fdv=.o}
|
||||
|
||||
${PROGRAM}: config $(OBJS)
|
||||
${FLINK} -o ${PROGRAM} ${OBJS}
|
||||
|
||||
%.o: %.fdv npbparams.h globals.h
|
||||
${F77} ${FFLAGS} -dvmIrregAnalysis -c -o $@ $<
|
||||
|
||||
clean:
|
||||
rm -f npbparams.h
|
||||
rm -f *.o *~
|
||||
rm -f *.cu *.cuf *.c
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,105 @@
|
||||
include 'npbparams.h'
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Note: please observe that in the routine conj_grad three
|
||||
c implementations of the sparse matrix-vector multiply have
|
||||
c been supplied. The default matrix-vector multiply is not
|
||||
c loop unrolled. The alternate implementations are unrolled
|
||||
c to a depth of 2 and unrolled to a depth of 8. Please
|
||||
c experiment with these to find the fastest for your particular
|
||||
c architecture. If reporting timing results, any of these three may
|
||||
c be used without penalty.
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Class specific parameters:
|
||||
c It appears here for reference only.
|
||||
c These are their values, however, this info is imported in the npbparams.h
|
||||
c include file, which is written by the sys/setparams.c program.
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
C----------
|
||||
C Class S:
|
||||
C----------
|
||||
CC parameter( na=1400,
|
||||
CC > nonzer=7,
|
||||
CC > shift=10.,
|
||||
CC > niter=15,
|
||||
CC > rcond=1.0d-1 )
|
||||
C----------
|
||||
C Class W:
|
||||
C----------
|
||||
CC parameter( na=7000,
|
||||
CC > nonzer=8,
|
||||
CC > shift=12.,
|
||||
CC > niter=15,
|
||||
CC > rcond=1.0d-1 )
|
||||
C----------
|
||||
C Class A:
|
||||
C----------
|
||||
CC parameter( na=14000,
|
||||
CC > nonzer=11,
|
||||
CC > shift=20.,
|
||||
CC > niter=15,
|
||||
CC > rcond=1.0d-1 )
|
||||
C----------
|
||||
C Class B:
|
||||
C----------
|
||||
CC parameter( na=75000,
|
||||
CC > nonzer=13,
|
||||
CC > shift=60.,
|
||||
CC > niter=75,
|
||||
CC > rcond=1.0d-1 )
|
||||
C----------
|
||||
C Class C:
|
||||
C----------
|
||||
CC parameter( na=150000,
|
||||
CC > nonzer=15,
|
||||
CC > shift=110.,
|
||||
CC > niter=75,
|
||||
CC > rcond=1.0d-1 )
|
||||
C----------
|
||||
C Class D:
|
||||
C----------
|
||||
CC parameter( na=1500000,
|
||||
CC > nonzer=21,
|
||||
CC > shift=500.,
|
||||
CC > niter=100,
|
||||
CC > rcond=1.0d-1 )
|
||||
C----------
|
||||
C Class E:
|
||||
C----------
|
||||
CC parameter( na=9000000,
|
||||
CC > nonzer=26,
|
||||
CC > shift=1500.,
|
||||
CC > niter=100,
|
||||
CC > rcond=1.0d-1 )
|
||||
|
||||
|
||||
integer nz, naz
|
||||
parameter( nz = na*(nonzer+1)*(nonzer+1) )
|
||||
parameter( naz = na*(nonzer+1) )
|
||||
|
||||
|
||||
common / partit_size / naa, nzz,
|
||||
> firstrow,
|
||||
> lastrow,
|
||||
> firstcol,
|
||||
> lastcol
|
||||
integer naa, nzz,
|
||||
> firstrow,
|
||||
> lastrow,
|
||||
> firstcol,
|
||||
> lastcol
|
||||
|
||||
common /urando/ amult, tran
|
||||
double precision amult, tran
|
||||
|
||||
external timer_read
|
||||
double precision timer_read
|
||||
|
||||
integer T_init, T_bench, T_conj_grad, T_last
|
||||
parameter (T_init=1, T_bench=2, T_conj_grad=3, T_last=3)
|
||||
logical timeron
|
||||
common /timers/ timeron
|
||||
@@ -0,0 +1,12 @@
|
||||
@echo off
|
||||
|
||||
set CLASS=%1
|
||||
set OPT=%2
|
||||
|
||||
CALL ..\sys\setparams CG %CLASS%
|
||||
CALL %F77% %OPT% cg 1>out_%CLASS%.txt 2>err_%CLASS%.txt
|
||||
if exist cg.exe (
|
||||
copy cg.exe %BIN%\cg.%CLASS%.x.exe
|
||||
del cg.exe
|
||||
)
|
||||
|
||||
@@ -0,0 +1,111 @@
|
||||
|
||||
subroutine print_results(name, class, n1, n2, n3, niter,
|
||||
> t, mops, optype, verified, npbversion,
|
||||
> compiletime, cs1, cs2, cs3, cs4, cs5, cs6, cs7)
|
||||
|
||||
implicit none
|
||||
character name*(*)
|
||||
character class*1
|
||||
integer n1, n2, n3, niter, j
|
||||
double precision t, mops
|
||||
character optype*24, size*15
|
||||
logical verified
|
||||
character*(*) npbversion, compiletime,
|
||||
> cs1, cs2, cs3, cs4, cs5, cs6, cs7
|
||||
|
||||
write (*, 2) name
|
||||
2 format(//, ' ', A, ' Benchmark Completed.')
|
||||
|
||||
write (*, 3) Class
|
||||
3 format(' Class = ', 12x, a12)
|
||||
|
||||
c If this is not a grid-based problem (EP, FT, CG), then
|
||||
c we only print n1, which contains some measure of the
|
||||
c problem size. In that case, n2 and n3 are both zero.
|
||||
c Otherwise, we print the grid size n1xn2xn3
|
||||
|
||||
if ((n2 .eq. 0) .and. (n3 .eq. 0)) then
|
||||
if (name(1:2) .eq. 'EP') then
|
||||
write(size, '(f15.0)' ) 2.d0**n1
|
||||
j = 15
|
||||
if (size(j:j) .eq. '.') then
|
||||
size(j:j) = ' '
|
||||
j = j - 1
|
||||
endif
|
||||
write (*,42) size(1:j)
|
||||
42 format(' Size = ',9x, a15)
|
||||
else
|
||||
write (*,44) n1
|
||||
44 format(' Size = ',12x, i12)
|
||||
endif
|
||||
else
|
||||
write (*, 4) n1,n2,n3
|
||||
4 format(' Size = ',9x, i4,'x',i4,'x',i4)
|
||||
endif
|
||||
|
||||
write (*, 5) niter
|
||||
5 format(' Iterations = ', 12x, i12)
|
||||
|
||||
write (*, 6) t
|
||||
6 format(' Time in seconds = ',12x, f12.2)
|
||||
|
||||
write (*,9) mops
|
||||
9 format(' Mop/s total = ',12x, f12.2)
|
||||
|
||||
write(*, 11) optype
|
||||
11 format(' Operation type = ', a24)
|
||||
|
||||
if (verified) then
|
||||
write(*,12) ' SUCCESSFUL'
|
||||
else
|
||||
write(*,12) 'UNSUCCESSFUL'
|
||||
endif
|
||||
12 format(' Verification = ', 12x, a)
|
||||
|
||||
write(*,13) npbversion
|
||||
13 format(' Version = ', 12x, a12)
|
||||
|
||||
write(*,14) compiletime
|
||||
14 format(' Compile date = ', 12x, a12)
|
||||
|
||||
|
||||
write (*,121) cs1
|
||||
121 format(/, ' Compile options:', /,
|
||||
> ' F77 = ', A)
|
||||
|
||||
write (*,122) cs2
|
||||
122 format(' FLINK = ', A)
|
||||
|
||||
write (*,123) cs3
|
||||
123 format(' F_LIB = ', A)
|
||||
|
||||
write (*,124) cs4
|
||||
124 format(' F_INC = ', A)
|
||||
|
||||
write (*,125) cs5
|
||||
125 format(' FFLAGS = ', A)
|
||||
|
||||
write (*,126) cs6
|
||||
126 format(' FLINKFLAGS = ', A)
|
||||
|
||||
write(*, 127) cs7
|
||||
127 format(' RAND = ', A)
|
||||
|
||||
write (*,130)
|
||||
130 format(//' Please send all errors/feedbacks to:'//
|
||||
> ' NPB Development Team'/
|
||||
> ' npb@nas.nasa.gov'//)
|
||||
c 130 format(//' Please send the results of this run to:'//
|
||||
c > ' NPB Development Team '/
|
||||
c > ' Internet: npb@nas.nasa.gov'/
|
||||
c > ' '/
|
||||
c > ' If email is not available, send this to:'//
|
||||
c > ' MS T27A-1'/
|
||||
c > ' NASA Ames Research Center'/
|
||||
c > ' Moffett Field, CA 94035-1000'//
|
||||
c > ' Fax: 650-604-3957'//)
|
||||
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
@@ -0,0 +1,137 @@
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
double precision function randlc (x, a)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c
|
||||
c This routine returns a uniform pseudorandom double precision number in the
|
||||
c range (0, 1) by using the linear congruential generator
|
||||
c
|
||||
c x_{k+1} = a x_k (mod 2^46)
|
||||
c
|
||||
c where 0 < x_k < 2^46 and 0 < a < 2^46. This scheme generates 2^44 numbers
|
||||
c before repeating. The argument A is the same as 'a' in the above formula,
|
||||
c and X is the same as x_0. A and X must be odd double precision integers
|
||||
c in the range (1, 2^46). The returned value RANDLC is normalized to be
|
||||
c between 0 and 1, i.e. RANDLC = 2^(-46) * x_1. X is updated to contain
|
||||
c the new seed x_1, so that subsequent calls to RANDLC using the same
|
||||
c arguments will generate a continuous sequence.
|
||||
c
|
||||
c This routine should produce the same results on any computer with at least
|
||||
c 48 mantissa bits in double precision floating point data. On 64 bit
|
||||
c systems, double precision should be disabled.
|
||||
c
|
||||
c David H. Bailey October 26, 1990
|
||||
c
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
implicit none
|
||||
|
||||
double precision r23,r46,t23,t46,a,x,t1,t2,t3,t4,a1,a2,x1,x2,z
|
||||
parameter (r23 = 0.5d0 ** 23, r46 = r23 ** 2, t23 = 2.d0 ** 23,
|
||||
> t46 = t23 ** 2)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Break A into two parts such that A = 2^23 * A1 + A2.
|
||||
c---------------------------------------------------------------------
|
||||
t1 = r23 * a
|
||||
a1 = int (t1)
|
||||
a2 = a - t23 * a1
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Break X into two parts such that X = 2^23 * X1 + X2, compute
|
||||
c Z = A1 * X2 + A2 * X1 (mod 2^23), and then
|
||||
c X = 2^23 * Z + A2 * X2 (mod 2^46).
|
||||
c---------------------------------------------------------------------
|
||||
t1 = r23 * x
|
||||
x1 = int (t1)
|
||||
x2 = x - t23 * x1
|
||||
t1 = a1 * x2 + a2 * x1
|
||||
t2 = int (r23 * t1)
|
||||
z = t1 - t23 * t2
|
||||
t3 = t23 * z + a2 * x2
|
||||
t4 = int (r46 * t3)
|
||||
x = t3 - t46 * t4
|
||||
randlc = r46 * x
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
|
||||
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
subroutine vranlc (n, x, a, y)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c
|
||||
c This routine generates N uniform pseudorandom double precision numbers in
|
||||
c the range (0, 1) by using the linear congruential generator
|
||||
c
|
||||
c x_{k+1} = a x_k (mod 2^46)
|
||||
c
|
||||
c where 0 < x_k < 2^46 and 0 < a < 2^46. This scheme generates 2^44 numbers
|
||||
c before repeating. The argument A is the same as 'a' in the above formula,
|
||||
c and X is the same as x_0. A and X must be odd double precision integers
|
||||
c in the range (1, 2^46). The N results are placed in Y and are normalized
|
||||
c to be between 0 and 1. X is updated to contain the new seed, so that
|
||||
c subsequent calls to VRANLC using the same arguments will generate a
|
||||
c continuous sequence. If N is zero, only initialization is performed, and
|
||||
c the variables X, A and Y are ignored.
|
||||
c
|
||||
c This routine is the standard version designed for scalar or RISC systems.
|
||||
c However, it should produce the same results on any single processor
|
||||
c computer with at least 48 mantissa bits in double precision floating point
|
||||
c data. On 64 bit systems, double precision should be disabled.
|
||||
c
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
implicit none
|
||||
|
||||
integer i,n
|
||||
double precision y,r23,r46,t23,t46,a,x,t1,t2,t3,t4,a1,a2,x1,x2,z
|
||||
dimension y(*)
|
||||
parameter (r23 = 0.5d0 ** 23, r46 = r23 ** 2, t23 = 2.d0 ** 23,
|
||||
> t46 = t23 ** 2)
|
||||
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Break A into two parts such that A = 2^23 * A1 + A2.
|
||||
c---------------------------------------------------------------------
|
||||
t1 = r23 * a
|
||||
a1 = int (t1)
|
||||
a2 = a - t23 * a1
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Generate N results. This loop is not vectorizable.
|
||||
c---------------------------------------------------------------------
|
||||
do i = 1, n
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Break X into two parts such that X = 2^23 * X1 + X2, compute
|
||||
c Z = A1 * X2 + A2 * X1 (mod 2^23), and then
|
||||
c X = 2^23 * Z + A2 * X2 (mod 2^46).
|
||||
c---------------------------------------------------------------------
|
||||
t1 = r23 * x
|
||||
x1 = int (t1)
|
||||
x2 = x - t23 * x1
|
||||
t1 = a1 * x2 + a2 * x1
|
||||
t2 = int (r23 * t1)
|
||||
z = t1 - t23 * t2
|
||||
t3 = t23 * z + a2 * x2
|
||||
t4 = int (r46 * t3)
|
||||
x = t3 - t46 * t4
|
||||
y(i) = r46 * x
|
||||
enddo
|
||||
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,108 @@
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
subroutine timer_clear(n)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
implicit none
|
||||
integer n
|
||||
|
||||
double precision start(64), elapsed(64)
|
||||
common /tt/ start, elapsed
|
||||
|
||||
elapsed(n) = 0.0
|
||||
return
|
||||
end
|
||||
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
subroutine timer_start(n)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
implicit none
|
||||
external elapsed_time
|
||||
double precision elapsed_time
|
||||
integer n
|
||||
double precision start(64), elapsed(64)
|
||||
common /tt/ start, elapsed
|
||||
|
||||
start(n) = elapsed_time()
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
subroutine timer_stop(n)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
implicit none
|
||||
external elapsed_time
|
||||
double precision elapsed_time
|
||||
integer n
|
||||
double precision start(64), elapsed(64)
|
||||
common /tt/ start, elapsed
|
||||
double precision t, now
|
||||
now = elapsed_time()
|
||||
t = now - start(n)
|
||||
elapsed(n) = elapsed(n) + t
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
double precision function timer_read(n)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
implicit none
|
||||
integer n
|
||||
double precision start(64), elapsed(64)
|
||||
common /tt/ start, elapsed
|
||||
|
||||
timer_read = elapsed(n)
|
||||
return
|
||||
end
|
||||
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
double precision function elapsed_time()
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
implicit none
|
||||
|
||||
double precision dvtime
|
||||
|
||||
c This function must measure wall clock time, not CPU time.
|
||||
c Since there is no portable timer in Fortran (77)
|
||||
c we call a routine compiled in C (though the C source may have
|
||||
c to be tweaked).
|
||||
! call wtime(t)
|
||||
c The following is not ok for "official" results because it reports
|
||||
c CPU time not wall clock time. It may be useful for developing/testing
|
||||
c on timeshared Crays, though.
|
||||
c call second(t)
|
||||
|
||||
elapsed_time = dvtime()
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
SHELL=/bin/sh
|
||||
BENCHMARK=ep
|
||||
BENCHMARKU=EP
|
||||
|
||||
include ../config/make.def
|
||||
include ../sys/make.common
|
||||
|
||||
SOURCES = ep.fdv
|
||||
|
||||
OBJS = ${SOURCES:.fdv=.o}
|
||||
|
||||
${PROGRAM}: config $(OBJS)
|
||||
${FLINK} -o ${PROGRAM} ${OBJS}
|
||||
|
||||
%.o: %.fdv npbparams.h
|
||||
${F77} ${FFLAGS} -c -o $@ $<
|
||||
|
||||
clean:
|
||||
rm -f npbparams.h
|
||||
rm -f *.o *~
|
||||
rm -f *.cu *.cuf *.c *.f
|
||||
@@ -0,0 +1,565 @@
|
||||
!-------------------------------------------------------------------------!
|
||||
! !
|
||||
! N A S P A R A L L E L B E N C H M A R K S 2.3 !
|
||||
! !
|
||||
! D V M V E R S I O N S !
|
||||
! !
|
||||
! E P !
|
||||
! !
|
||||
!-------------------------------------------------------------------------!
|
||||
! !
|
||||
! This benchmark is DVM version of the NPB EP code. !
|
||||
! !
|
||||
! Permission to use, copy, distribute and modify this software !
|
||||
! for any purpose with or without fee is hereby granted. We !
|
||||
! request, however, that all derived work reference the NAS !
|
||||
! Parallel Benchmarks 2.3. This software is provided "as is" !
|
||||
! without express or implied warranty. !
|
||||
! !
|
||||
! Information on NPB 2.3, including the technical report, the !
|
||||
! original specifications, source code, results and information !
|
||||
! on how to submit new results, is available at: !
|
||||
! !
|
||||
! http://www.nas.nasa.gov/NAS/NPB/ !
|
||||
! !
|
||||
! Send comments or suggestions to npb@nas.nasa.gov !
|
||||
! Send bug reports to npb-bugs@nas.nasa.gov !
|
||||
! !
|
||||
! NAS Parallel Benchmarks Group !
|
||||
! NASA Ames Research Center !
|
||||
! Mail Stop: T27A-1 !
|
||||
! Moffett Field, CA 94035-1000 !
|
||||
! !
|
||||
! E-mail: npb@nas.nasa.gov !
|
||||
! Fax: (415) 604-3957 !
|
||||
! !
|
||||
!-------------------------------------------------------------------------!
|
||||
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c
|
||||
c Author: P. O. Frederickson
|
||||
c D. H. Bailey
|
||||
c A. C. Woo
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
program epdv
|
||||
c---------------------------------------------------------------------
|
||||
C
|
||||
c This is the serial version of the APP Benchmark 1,
|
||||
c the "embarassingly parallel" benchmark.
|
||||
c
|
||||
c
|
||||
c M is the Log_2 of the number of complex pairs of uniform (0, 1) random
|
||||
c numbers. MK is the Log_2 of the size of each batch of uniform random
|
||||
c numbers. MK can be set for convenience on a given system, since it does
|
||||
c not affect the results.
|
||||
|
||||
implicit none
|
||||
|
||||
include 'npbparams.h'
|
||||
|
||||
cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
|
||||
|
||||
double precision y,r23,r46,t23,t46,a1,a2,z,ah
|
||||
parameter (r23 = 0.5d0 ** 23, r46 = r23 ** 2, t23 = 2.d0 ** 23,
|
||||
> t46 = t23 ** 2)
|
||||
|
||||
cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
|
||||
|
||||
double precision Mops, epsilon, a, s, t1, t2, t3, t4, x, x1,
|
||||
> x2, q, sx, sy, tm, an, tt, gc, dum(3),
|
||||
> timer_read
|
||||
integer mk, mm, nn, nk, nq, np, ierr, node, no_nodes,
|
||||
> i, ik, kk, l, k, nit, ierrcode, no_large_nodes,
|
||||
> np_add, k_offset, j
|
||||
logical verified, timers_enabled
|
||||
parameter (timers_enabled = .false.)
|
||||
external timer_read
|
||||
double precision qq, t1h, t2h, y1, y2, xh
|
||||
character*13 size
|
||||
|
||||
parameter (mk = 16, mm = m - mk, nn = 2 ** mm,
|
||||
> nk = 2 ** mk, nq = 10, epsilon=1.d-8,
|
||||
> a = 1220703125.d0, s = 271828183.d0)
|
||||
|
||||
c common/storage/ x(2*nk), q(0:nq-1), qq(10000)
|
||||
common/storage/ x(2*nk), q(0:9), qq(10000)
|
||||
|
||||
!DVM$ TEMPLATE TEM(nn)
|
||||
!DVM$ DISTRIBUTE TEM (BLOCK)
|
||||
data dum /1.d0, 1.d0, 1.d0/
|
||||
|
||||
|
||||
c Because the size of the problem is too large to store in a 32-bit
|
||||
c integer for some classes, we put it into a string (for printing).
|
||||
c Have to strip off the decimal point put in there by the floating
|
||||
c point print statement (internal file)
|
||||
|
||||
write(*, 1000)
|
||||
write(size, '(f12.0)' ) 2.d0**(m+1)
|
||||
do j =13,1,-1
|
||||
if (size(j:j) .eq. '.') size(j:j) = ' '
|
||||
end do
|
||||
write (*,1001) size
|
||||
|
||||
1000 format(//,' NAS Parallel Benchmarks 3.3 - DVMH version',
|
||||
> ' - EP Benchmark', /)
|
||||
1001 format(' Number of random numbers generated: ', a14)
|
||||
1003 format(' Number of active processes: ', i12, /)
|
||||
|
||||
verified = .false.
|
||||
|
||||
c Compute the number of "batches" of random number pairs generated
|
||||
c per processor. Adjust if the number of processors does not evenly
|
||||
c divide the total number
|
||||
|
||||
np = nn
|
||||
|
||||
|
||||
c Call the random number generator functions and initialize
|
||||
c the x-array to reduce the effects of paging on the timings.
|
||||
c Also, call all mathematical functions that are used. Make
|
||||
c sure these initializations cannot be eliminated as dead code.
|
||||
|
||||
call vranlc(0, dum(1), dum(2), dum(3))
|
||||
call randlc(dum(2), dum(3), dum(1))
|
||||
do 5 i = 1, 2*nk
|
||||
x(i) = -1.d99
|
||||
5 continue
|
||||
Mops = log(sqrt(abs(max(1.d0,1.d0))))
|
||||
|
||||
|
||||
call timer_clear(1)
|
||||
call timer_clear(2)
|
||||
call timer_clear(3)
|
||||
call timer_start(1)
|
||||
!DVM$ INTERVAL 1
|
||||
call vranlc(0, t1, a, x)
|
||||
|
||||
c Compute AN = A ^ (2 * NK) (mod 2^46).
|
||||
|
||||
t1 = a
|
||||
|
||||
do 100 i = 1, mk + 1
|
||||
call randlc(t1, t1, t2)
|
||||
100 continue
|
||||
|
||||
an = t1
|
||||
tt = s
|
||||
gc = 0.d0
|
||||
sx = 0.d0
|
||||
sy = 0.d0
|
||||
|
||||
do 110 i = 0, nq - 1
|
||||
q(i) = 0.d0
|
||||
110 continue
|
||||
|
||||
c Each instance of this loop may be performed independently. We compute
|
||||
c the k offsets separately to take into account the fact that some nodes
|
||||
c have more numbers to generate than others
|
||||
|
||||
k_offset = -1
|
||||
|
||||
!DVM$ region
|
||||
!DVM$ PARALLEL (k) ON TEM(k),REDUCTION(SUM(q),SUM(sx),SUM(sy))
|
||||
!DVM$*,private(xh,i,kk,ik,t1,t2,y1,y2,a1,a2,x1,x2
|
||||
!DVM$*,l,t3,t4),cuda_block(256)
|
||||
do k = 1, np
|
||||
kk = k_offset + k
|
||||
t1 = s
|
||||
t2 = an
|
||||
|
||||
c Find starting seed t1 for this kk.
|
||||
do i = 1, 100
|
||||
ik = kk / 2
|
||||
if (2 * ik .ne. kk) then
|
||||
call randlc(t1, t2, t3)
|
||||
endif
|
||||
if (ik .eq. 0) exit
|
||||
call randlc(t2, t2, t3)
|
||||
kk = ik
|
||||
enddo
|
||||
|
||||
|
||||
xh = t1
|
||||
a1 = int (r23 * a)
|
||||
a2 = a - t23 * a1
|
||||
|
||||
do i = 1, nk
|
||||
call randNext (xh, y1, a1, a2)
|
||||
call randNext (xh, y2, a1, a2)
|
||||
|
||||
x1 = 2.d0 * y1 - 1.d0
|
||||
x2 = 2.d0 * y2 - 1.d0
|
||||
t1 = x1 * x1 + x2 * x2
|
||||
if (t1 .le. 1.d0) then
|
||||
t2 = sqrt(-2.d0 * log(t1) / t1)
|
||||
t3 = (x1 * t2)
|
||||
t4 = (x2 * t2)
|
||||
l = max(abs(t3), abs(t4))
|
||||
q(l) = q(l) + 1.d0
|
||||
sx = sx + t3
|
||||
sy = sy + t4
|
||||
endif
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ end region
|
||||
|
||||
do 160 i = 0, nq - 1
|
||||
gc = gc + q(i)
|
||||
160 continue
|
||||
!DVM$ END INTERVAL
|
||||
call timer_stop(1)
|
||||
tm = timer_read(1)
|
||||
|
||||
nit=0
|
||||
if (m.eq.24) then
|
||||
if((abs((sx- (-3.247834652034740D3))/sx).le.epsilon).and.
|
||||
> (abs((sy- (-6.958407078382297D3))/sy).le.epsilon))
|
||||
> verified = .TRUE.
|
||||
elseif (m.eq.25) then
|
||||
if ((abs((sx- (-2.863319731645753D+03))/sx).le.epsilon).and.
|
||||
> (abs((sy- (-6.320053679109499D+03))/sy).le.epsilon))
|
||||
> verified = .TRUE.
|
||||
elseif (m.eq.28) then
|
||||
if ((abs((sx- (-4.295875165629892D3))/sx).le.epsilon).and.
|
||||
> (abs((sy- (-1.580732573678431D4))/sy).le.epsilon))
|
||||
> verified = .TRUE.
|
||||
elseif (m.eq.30) then
|
||||
if ((abs((sx- (4.033815542441498D4))/sx).le.epsilon).and.
|
||||
> (abs((sy- (-2.660669192809235D4))/sy).le.epsilon))
|
||||
> verified = .true.
|
||||
elseif (m.eq.32) then
|
||||
if ((abs((sx- (4.764367927995374D+4))/sx).le.epsilon).and.
|
||||
> (abs((sy- (-8.084072988043731D+4))/sy).le.epsilon))
|
||||
> verified = .true.
|
||||
elseif (m.eq.36) then
|
||||
if ((abs((sx- (1.982481200946593D+5))/sx).le.epsilon).and.
|
||||
> (abs((sy- (-1.020596636361769D+5))/sy).le.epsilon))
|
||||
> verified = .true.
|
||||
elseif (m.eq.40) then
|
||||
if ((abs((sx- (-5.319717441530D+05))/sx).le.epsilon).and.
|
||||
> (abs((sy- (-3.688834557731D+05))/sy).le.epsilon))
|
||||
> verified = .true.
|
||||
else
|
||||
verified = .false.
|
||||
endif
|
||||
Mops = 2.d0**(m+1)/tm/1000000.d0
|
||||
|
||||
write (6,11) tm, m, gc, sx, sy, (i, q(i), i = 0, nq - 1)
|
||||
11 format (' EP Benchmark Results:'//' CPU Time =',f10.4/' N = 2^',
|
||||
> i5/' No. Gaussian Pairs =',f15.0/' Sums = ',1p,2d25.15/
|
||||
> ' Counts:'/(i3,0p,f15.0))
|
||||
|
||||
call print_results('EP', class, m+1, 0, 0, nit,
|
||||
> tm, Mops,
|
||||
> 'Random numbers generated',
|
||||
> verified, npbversion)
|
||||
|
||||
|
||||
if (timers_enabled) then
|
||||
print *, 'Total time: ', timer_read(1)
|
||||
print *, 'Gaussian pairs: ', timer_read(2)
|
||||
print *, 'Random numbers: ', timer_read(3)
|
||||
endif
|
||||
|
||||
|
||||
end
|
||||
|
||||
subroutine print_results(name, class, n1, n2, n3, niter,
|
||||
> t, mops, optype, verified, npbversion)
|
||||
|
||||
implicit none
|
||||
character*2 name
|
||||
character*1 class
|
||||
integer n1, n2, n3, niter, j
|
||||
double precision t, mops
|
||||
character optype*24, size*13
|
||||
logical verified
|
||||
character*(*) npbversion
|
||||
|
||||
write (*, 2) name
|
||||
2 format(//, ' ', A2, ' Benchmark Completed.')
|
||||
|
||||
write (*, 3) Class
|
||||
3 format(' Class = ', 12x, a12)
|
||||
|
||||
c If this is not a grid-based problem (EP, FT, CG), then
|
||||
c we only print n1, which contains some measure of the
|
||||
c problem size. In that case, n2 and n3 are both zero.
|
||||
c Otherwise, we print the grid size n1xn2xn3
|
||||
|
||||
if ((n2 .eq. 0) .and. (n3 .eq. 0)) then
|
||||
if (name(1:2) .eq. 'EP') then
|
||||
write(size, '(f12.0)' ) 2.d0**n1
|
||||
do j =13,1,-1
|
||||
if (size(j:j) .eq. '.') size(j:j) = ' '
|
||||
end do
|
||||
write (*,42) size
|
||||
42 format(' Size = ',12x, a14)
|
||||
else
|
||||
write (*,44) n1
|
||||
44 format(' Size = ',12x, i12)
|
||||
endif
|
||||
else
|
||||
write (*, 4) n1,n2,n3
|
||||
4 format(' Size = ',12x, i3,'x',i3,'x',i3)
|
||||
endif
|
||||
|
||||
write (*, 5) niter
|
||||
5 format(' Iterations = ', 12x, i12)
|
||||
|
||||
write (*, 6) t
|
||||
6 format(' Time in seconds = ',12x, f12.2)
|
||||
|
||||
write (*,9) mops
|
||||
9 format(' Mop/s total = ',12x, f12.2)
|
||||
|
||||
write(*, 11) optype
|
||||
11 format(' Operation type = ', a24)
|
||||
|
||||
if (verified) then
|
||||
write(*,12) ' SUCCESSFUL'
|
||||
else
|
||||
write(*,12) 'UNSUCCESSFUL'
|
||||
endif
|
||||
12 format(' Verification = ', 12x, a)
|
||||
|
||||
write(*,13) npbversion
|
||||
13 format(' Version = ', 12x, a12)
|
||||
|
||||
write (*,130)
|
||||
130 format(//' Please send the results of this run to:'//
|
||||
> ' NPB Development Team '/
|
||||
> ' Internet: npb@nas.nasa.gov'/
|
||||
> ' '/
|
||||
> ' If email is not available, send this to:'//
|
||||
> ' MS T27A-1'/
|
||||
> ' NASA Ames Research Center'/
|
||||
> ' Moffett Field, CA 94035-1000'//
|
||||
> ' Fax: 415-604-3957'//)
|
||||
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
pure subroutine randlc (x, a, ret)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
implicit none
|
||||
intent(in)::a
|
||||
intent(inout)::x
|
||||
intent(out)::ret
|
||||
double precision r23,r46,t23,t46,a,x,t1,t2,t3,t4,a1,a2,x1,x2,z
|
||||
> ,ret
|
||||
parameter (r23 = 0.5d0 ** 23, r46 = r23 ** 2, t23 = 2.d0 ** 23,
|
||||
> t46 = t23 ** 2)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Break A into two parts such that A = 2^23 * A1 + A2.
|
||||
c---------------------------------------------------------------------
|
||||
t1 = r23 * a
|
||||
a1 = int (t1)
|
||||
a2 = a - t23 * a1
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Break X into two parts such that X = 2^23 * X1 + X2, compute
|
||||
c Z = A1 * X2 + A2 * X1 (mod 2^23), and then
|
||||
c X = 2^23 * Z + A2 * X2 (mod 2^46).
|
||||
c---------------------------------------------------------------------
|
||||
t1 = r23 * x
|
||||
x1 = int (t1)
|
||||
x2 = x - t23 * x1
|
||||
t1 = a1 * x2 + a2 * x1
|
||||
t2 = int (r23 * t1)
|
||||
z = t1 - t23 * t2
|
||||
t3 = t23 * z + a2 * x2
|
||||
t4 = int (r46 * t3)
|
||||
x = t3 - t46 * t4
|
||||
ret = r46 * x
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
pure subroutine randNext (x, ret, a1, a2)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
implicit none
|
||||
intent(inout)::x
|
||||
intent(in)::a1, a2
|
||||
intent(out)::ret
|
||||
double precision r23,r46,t23,t46,x,t1,t2,t3,t4,a1,a2,x1,x2,z
|
||||
> ,ret
|
||||
parameter (r23 = 0.5d0 ** 23, r46 = r23 ** 2, t23 = 2.d0 ** 23,
|
||||
> t46 = t23 ** 2)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Break X into two parts such that X = 2^23 * X1 + X2, compute
|
||||
c Z = A1 * X2 + A2 * X1 (mod 2^23), and then
|
||||
c X = 2^23 * Z + A2 * X2 (mod 2^46).
|
||||
c---------------------------------------------------------------------
|
||||
t1 = r23 * x
|
||||
x1 = int (t1)
|
||||
x2 = x - t23 * x1
|
||||
t1 = a1 * x2 + a2 * x1
|
||||
t2 = int (r23 * t1)
|
||||
z = t1 - t23 * t2
|
||||
t3 = t23 * z + a2 * x2
|
||||
t4 = int (r46 * t3)
|
||||
x = t3 - t46 * t4
|
||||
ret = r46 * x
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
subroutine vranlc (n, x, a, y)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
implicit none
|
||||
|
||||
integer i,n
|
||||
double precision y,r23,r46,t23,t46,a,x,t1,t2,t3,t4,a1,a2,x1,x2,z
|
||||
dimension y(*)
|
||||
parameter (r23 = 0.5d0 ** 23, r46 = r23 ** 2, t23 = 2.d0 ** 23,
|
||||
> t46 = t23 ** 2)
|
||||
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Break A into two parts such that A = 2^23 * A1 + A2.
|
||||
c---------------------------------------------------------------------
|
||||
t1 = r23 * a
|
||||
a1 = int (t1)
|
||||
a2 = a - t23 * a1
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Generate N results. This loop is not vectorizable.
|
||||
c---------------------------------------------------------------------
|
||||
do i = 1, n
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Break X into two parts such that X = 2^23 * X1 + X2, compute
|
||||
c Z = A1 * X2 + A2 * X1 (mod 2^23), and then
|
||||
c X = 2^23 * Z + A2 * X2 (mod 2^46).
|
||||
c---------------------------------------------------------------------
|
||||
t1 = r23 * x
|
||||
x1 = int (t1)
|
||||
x2 = x - t23 * x1
|
||||
t1 = a1 * x2 + a2 * x1
|
||||
t2 = int (r23 * t1)
|
||||
z = t1 - t23 * t2
|
||||
t3 = t23 * z + a2 * x2
|
||||
t4 = int (r46 * t3)
|
||||
x = t3 - t46 * t4
|
||||
y(i) = r46 * x
|
||||
enddo
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
subroutine timer_clear(n)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
implicit none
|
||||
integer n
|
||||
|
||||
double precision start(64), elapsed(64)
|
||||
common /tt/ start, elapsed
|
||||
|
||||
elapsed(n) = 0.0
|
||||
return
|
||||
end
|
||||
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
subroutine timer_start(n)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
implicit none
|
||||
external elapsed_time
|
||||
double precision elapsed_time
|
||||
integer n
|
||||
double precision start(64), elapsed(64)
|
||||
common /tt/ start, elapsed
|
||||
!DVM$ BARRIER
|
||||
start(n) = elapsed_time()
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
subroutine timer_stop(n)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
implicit none
|
||||
external elapsed_time
|
||||
double precision elapsed_time
|
||||
integer n
|
||||
double precision start(64), elapsed(64)
|
||||
common /tt/ start, elapsed
|
||||
double precision t, now
|
||||
now = elapsed_time()
|
||||
t = now - start(n)
|
||||
elapsed(n) = elapsed(n) + t
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
double precision function timer_read(n)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
implicit none
|
||||
integer n
|
||||
double precision start(64), elapsed(64)
|
||||
common /tt/ start, elapsed
|
||||
|
||||
timer_read = elapsed(n)
|
||||
return
|
||||
end
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
double precision function elapsed_time()
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
implicit none
|
||||
double precision t
|
||||
double precision dvtime
|
||||
data t/0.d0/
|
||||
elapsed_time = dvtime()
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,12 @@
|
||||
@echo off
|
||||
|
||||
set CLASS=%1
|
||||
set OPT=%2
|
||||
|
||||
CALL ..\sys\setparams EP %CLASS%
|
||||
CALL %F77% %OPT% ep 1>out_%CLASS%.txt 2>err_%CLASS%.txt
|
||||
if exist ep.exe (
|
||||
copy ep.exe %BIN%\ep.%CLASS%.x.exe
|
||||
del ep.exe
|
||||
)
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
SHELL=/bin/sh
|
||||
BENCHMARK=ft
|
||||
BENCHMARKU=FT
|
||||
|
||||
include ../config/make.def
|
||||
include ../sys/make.common
|
||||
|
||||
SOURCES = ft.fdv
|
||||
|
||||
OBJS = ${SOURCES:.fdv=.o}
|
||||
|
||||
${PROGRAM}: config $(OBJS)
|
||||
${FLINK} -o ${PROGRAM} ${OBJS}
|
||||
|
||||
%.o: %.fdv npbparams.h global.h
|
||||
${F77} ${FFLAGS} -f90 -c -o $@ $<
|
||||
|
||||
clean:
|
||||
rm -f npbparams.h
|
||||
rm -f *.o *~
|
||||
rm -f *.cu *.cuf *.c *.f
|
||||
@@ -0,0 +1,3 @@
|
||||
integer dvm_debug
|
||||
parameter (dvm_debug=0)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,80 @@
|
||||
include 'npbparams.h'
|
||||
|
||||
|
||||
! If processor array is 1x1 -> 0D grid decomposition
|
||||
|
||||
|
||||
! Cache blocking params. These values are good for most
|
||||
! RISC processors.
|
||||
! FFT parameters:
|
||||
! fftblock controls how many ffts are done at a time.
|
||||
! The default is appropriate for most cache-based machines
|
||||
! On vector machines, the FFT can be vectorized with vector
|
||||
! length equal to the block size, so the block size should
|
||||
! be as large as possible. This is the size of the smallest
|
||||
! dimension of the problem: 128 for class A, 256 for class B and
|
||||
! 512 for class C.
|
||||
|
||||
|
||||
! we need a bunch of logic to keep track of how
|
||||
! arrays are laid out.
|
||||
|
||||
|
||||
! Note: this serial version is the derived from the parallel 0D case
|
||||
! of the ft NPB.
|
||||
! The computation proceeds logically as
|
||||
|
||||
! set up initial conditions
|
||||
! fftx(1)
|
||||
! transpose (1->2)
|
||||
! ffty(2)
|
||||
! transpose (2->3)
|
||||
! fftz(3)
|
||||
! time evolution
|
||||
! fftz(3)
|
||||
! transpose (3->2)
|
||||
! ffty(2)
|
||||
! transpose (2->1)
|
||||
! fftx(1)
|
||||
! compute residual(1)
|
||||
|
||||
! for the 0D, 1D, 2D strategies, the layouts look like xxx
|
||||
!
|
||||
! 0D 1D 2D
|
||||
! 1: xyz xyz xyz
|
||||
|
||||
|
||||
|
||||
integer T_total, T_setup, T_fft, T_evolve, T_checksum, T_fftx, T_ffty, T_fftz, T_max
|
||||
parameter (T_total = 1, T_setup = 2, T_fft = 3, T_evolve = 4, T_checksum = 5, T_max = 5)
|
||||
|
||||
logical timers_enabled
|
||||
parameter (timers_enabled = .FALSE.)
|
||||
logical more_memory
|
||||
parameter (more_memory = .FALSE.)
|
||||
|
||||
external timer_read
|
||||
double precision timer_read
|
||||
external ilog2
|
||||
integer ilog2
|
||||
|
||||
external randlc
|
||||
double precision randlc
|
||||
|
||||
|
||||
|
||||
double precision seed, a, pi, alpha
|
||||
parameter (seed = 314159265.d0, a = 1220703125.d0, pi = 3.141592653589793238d0, alpha=1.0d-6)
|
||||
|
||||
double complex u0(nxp,ny,nz), u1(nxp,ny,nz)
|
||||
double precision twiddle(nxp,ny,nz)
|
||||
double complex u(nxp)
|
||||
double complex y1(maxdim),y2(maxdim)
|
||||
common /arrays/ u,u0,u1,twiddle,y1,y2
|
||||
|
||||
!dvm$ distribute (*,*,*) :: twiddle
|
||||
!dvm$ distribute (*,*,*) :: u0
|
||||
!dvm$ distribute (*,*,*) :: u1
|
||||
!dvm$ distribute (*) :: u
|
||||
|
||||
|
||||
@@ -0,0 +1,12 @@
|
||||
@echo off
|
||||
|
||||
set CLASS=%1
|
||||
set OPT=%2
|
||||
|
||||
CALL ..\sys\setparams FT %CLASS%
|
||||
CALL %F77% %OPT% ft 1>out_%CLASS%.txt 2>err_%CLASS%.txt
|
||||
if exist ft.exe (
|
||||
copy ft.exe %BIN%\ft.%CLASS%.x.exe
|
||||
del ft.exe
|
||||
)
|
||||
|
||||
@@ -0,0 +1,44 @@
|
||||
SHELL=/bin/sh
|
||||
BENCHMARK=lu
|
||||
BENCHMARKU=LU
|
||||
|
||||
include ../config/make.def
|
||||
include ../sys/make.common
|
||||
|
||||
OBJS = lu.o read_input.o \
|
||||
domain.o setcoeff.o setbv.o exact.o setiv.o \
|
||||
erhs.o ssor.o rhs.o l2norm.o error.o \
|
||||
pintgr.o verify.o print_results.o timers.o
|
||||
|
||||
|
||||
|
||||
${PROGRAM}: config
|
||||
${MAKE} exec
|
||||
|
||||
exec: $(OBJS)
|
||||
${FLINK} ${FLINKFLAGS} -o ${PROGRAM} ${OBJS} ${F_LIB}
|
||||
|
||||
.f.o :
|
||||
${F77} ${FFLAGS} -c -o $@ $<
|
||||
|
||||
lu.o: lu.f applu.incl npbparams.h
|
||||
erhs.o: erhs.f applu.incl npbparams.h
|
||||
error.o: error.f applu.incl npbparams.h
|
||||
exact.o: exact.f applu.incl npbparams.h
|
||||
l2norm.o: l2norm.f
|
||||
pintgr.o: pintgr.f applu.incl npbparams.h
|
||||
read_input.o: read_input.f applu.incl npbparams.h
|
||||
rhs.o: rhs.f applu.incl npbparams.h
|
||||
setbv.o: setbv.f applu.incl npbparams.h
|
||||
setiv.o: setiv.f applu.incl npbparams.h
|
||||
setcoeff.o: setcoeff.f applu.incl npbparams.h
|
||||
ssor.o: ssor.f applu.incl npbparams.h
|
||||
domain.o: domain.f applu.incl npbparams.h
|
||||
verify.o: verify.f applu.incl npbparams.h
|
||||
print_results.o: print_results.f
|
||||
timers.o: timers.f
|
||||
|
||||
clean:
|
||||
- /bin/rm -f npbparams.h
|
||||
- /bin/rm -f *.o *DVMH* *~
|
||||
- /bin/rm -f *.cu *.cuf
|
||||
@@ -0,0 +1,185 @@
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
c--- applu.incl
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c npbparams.h defines parameters that depend on the class and
|
||||
c number of nodes
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
include 'npbparams.h'
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c parameters which can be overridden in runtime config file
|
||||
c isiz1,isiz2,isiz3 give the maximum size
|
||||
c ipr = 1 to print out verbose information
|
||||
c omega = 2.0 is correct for all classes
|
||||
c tolrsd is tolerance levels for steady state residuals
|
||||
c---------------------------------------------------------------------
|
||||
integer ipr_default,iS,jS,kS
|
||||
parameter (ipr_default = 1)
|
||||
double precision omega_default
|
||||
parameter (omega_default = 1.2d0)
|
||||
double precision tolrsd1_def, tolrsd2_def, tolrsd3_def,
|
||||
> tolrsd4_def, tolrsd5_def
|
||||
parameter (tolrsd1_def=1.0e-08,
|
||||
> tolrsd2_def=1.0e-08, tolrsd3_def=1.0e-08,
|
||||
> tolrsd4_def=1.0e-08, tolrsd5_def=1.0e-08)
|
||||
|
||||
double precision c1, c2, c3, c4, c5
|
||||
parameter( c1 = 1.40d+00, c2 = 0.40d+00,
|
||||
> c3 = 1.00d-01, c4 = 1.00d+00,
|
||||
> c5 = 1.40d+00,
|
||||
> iS =isiz1/2*2+1,jS= isiz2/2*2+1, kS=isiz3 )
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c grid
|
||||
c---------------------------------------------------------------------
|
||||
integer nx, ny, nz
|
||||
integer nx0, ny0, nz0
|
||||
integer ist, iend
|
||||
integer jst, jend
|
||||
integer ii1, ii2
|
||||
integer ji1, ji2
|
||||
integer ki1, ki2
|
||||
double precision dxi, deta, dzeta
|
||||
double precision tx1, tx2, tx3
|
||||
double precision ty1, ty2, ty3
|
||||
double precision tz1, tz2, tz3
|
||||
|
||||
common/cgcon/ dxi, deta, dzeta,
|
||||
> tx1, tx2, tx3,
|
||||
> ty1, ty2, ty3,
|
||||
> tz1, tz2, tz3,
|
||||
> nx, ny, nz,
|
||||
> nx0, ny0, nz0,
|
||||
> ist, iend,
|
||||
> jst, jend,
|
||||
> ii1, ii2,
|
||||
> ji1, ji2,
|
||||
> ki1, ki2
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c dissipation
|
||||
c---------------------------------------------------------------------
|
||||
double precision dx1, dx2, dx3, dx4, dx5
|
||||
double precision dy1, dy2, dy3, dy4, dy5
|
||||
double precision dz1, dz2, dz3, dz4, dz5
|
||||
double precision dssp
|
||||
|
||||
common/disp/ dx1,dx2,dx3,dx4,dx5,
|
||||
> dy1,dy2,dy3,dy4,dy5,
|
||||
> dz1,dz2,dz3,dz4,dz5,
|
||||
> dssp
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c field variables and residuals
|
||||
c to improve cache performance, second two dimensions padded by 1
|
||||
c for even number sizes only.
|
||||
c note: corresponding array (called "v") in routines blts, buts,
|
||||
c and l2norm are similarly padded
|
||||
c---------------------------------------------------------------------
|
||||
!DVM$ ALIGN frct(iEX1,iEX2,iEX3,iEX4) WITH dvmh_temp0(iEX1,iEX2,iEX3,iE
|
||||
!DVM$&X4)
|
||||
!DVM$ ALIGN qs(iEX1,iEX2,iEX3) WITH dvmh_temp0(*,iEX1,iEX2,iEX3)
|
||||
!DVM$ ALIGN rho_i(iEX1,iEX2,iEX3) WITH dvmh_temp0(*,iEX1,iEX2,iEX3)
|
||||
!DVM$ ALIGN rsd(iEX1,iEX2,iEX3,iEX4) WITH dvmh_temp0(iEX1,iEX2,iEX3,iEX
|
||||
!DVM$&4)
|
||||
!DVM$ ALIGN u(iEX1,iEX2,iEX3,iEX4) WITH dvmh_temp0(iEX1,iEX2,iEX3,iEX4)
|
||||
!DVM$ DYNAMIC u,rsd,frct,qs,rho_i
|
||||
!DVM$ SHADOW qs(1:1,1:1,1:1)
|
||||
!DVM$ SHADOW rho_i(1:1,1:1,1:1)
|
||||
!DVM$ SHADOW rsd(0:0,2:2,2:2,2:2)
|
||||
!DVM$ SHADOW frct(0:0,2:2,2:2,2:2)
|
||||
!DVM$ SHADOW u(0:0,2:2,2:2,2:2)
|
||||
!DVM$ TEMPLATE, COMMON :: dvmh_temp0(1:6,0:iS+1,0:jS+1,0:kS+1)
|
||||
!DVM$ TEMPLATE, COMMON :: dvmh_temp0_r1(1:6,0:iS+1,0:jS+1,0:kS+1)
|
||||
!DVM$ TEMPLATE, COMMON :: dvmh_temp0_r2(1:6,0:iS+1,0:jS+1,0:kS+1)
|
||||
!DVM$ TEMPLATE, COMMON :: dvmh_temp0_r3(1:6,0:iS+1,0:jS+1,0:kS+1)
|
||||
!DVM$ TEMPLATE, COMMON :: dvmh_temp0_r0(1:6,0:iS+1,0:jS+1,0:kS+1)
|
||||
!DVM$ DISTRIBUTE dvmh_temp0(*,BLOCK,BLOCK,BLOCK)
|
||||
!DVM$ DISTRIBUTE dvmh_temp0_r1(*,BLOCK,BLOCK,*)
|
||||
!DVM$ DISTRIBUTE dvmh_temp0_r2(*,BLOCK,*,BLOCK)
|
||||
!DVM$ DISTRIBUTE dvmh_temp0_r3(*,*,BLOCK,BLOCK)
|
||||
!DVM$ DISTRIBUTE dvmh_temp0_r0(*,*,*,BLOCK)
|
||||
|
||||
!DVM$ DYNAMIC dvmh_temp0, dvmh_temp0_r1, dvmh_temp0_r2, dvmh_temp0_r3,
|
||||
!DVM$&dvmh_temp0_r0
|
||||
double precision u(5,isiz1/2*2+1,
|
||||
> isiz2/2*2+1,
|
||||
> isiz3),
|
||||
> rsd(5,isiz1/2*2+1,
|
||||
> isiz2/2*2+1,
|
||||
> isiz3),
|
||||
> frct(5,isiz1/2*2+1,
|
||||
> isiz2/2*2+1,
|
||||
> isiz3),
|
||||
> flux(5,isiz1),
|
||||
> qs(isiz1,isiz2,isiz3),
|
||||
> rho_i(isiz1/2*2+1,isiz2/2*2+1,isiz3)
|
||||
|
||||
common/cvar/ u, rsd, frct, flux,
|
||||
> qs, rho_i
|
||||
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c output control parameters
|
||||
c---------------------------------------------------------------------
|
||||
integer ipr, inorm
|
||||
|
||||
common/cprcon/ ipr, inorm
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c newton-raphson iteration control parameters
|
||||
c---------------------------------------------------------------------
|
||||
integer itmax, invert
|
||||
double precision dt, omega, tolrsd(5),
|
||||
> rsdnm(5), errnm(5), frc, ttotal
|
||||
|
||||
common/ctscon/ dt, omega, tolrsd,
|
||||
> rsdnm, errnm, frc, ttotal,
|
||||
> itmax, invert
|
||||
|
||||
double precision a(5,5,isiz1/2*2+1,isiz2),
|
||||
> b(5,5,isiz1/2*2+1,isiz2),
|
||||
> c(5,5,isiz1/2*2+1,isiz2),
|
||||
> d(5,5,isiz1/2*2+1,isiz2)
|
||||
|
||||
common/cjac/ a, b, c, d
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c coefficients of the exact solution
|
||||
c---------------------------------------------------------------------
|
||||
double precision ce(5,13)
|
||||
|
||||
common/cexact/ ce
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c timers
|
||||
c---------------------------------------------------------------------
|
||||
integer t_rhsx,t_rhsy,t_rhsz,t_rhs,t_jacld,t_blts,
|
||||
> t_jacu,t_buts,t_add,t_l2norm,t_last,t_total
|
||||
parameter (t_total = 1)
|
||||
parameter (t_rhsx = 2)
|
||||
parameter (t_rhsy = 3)
|
||||
parameter (t_rhsz = 4)
|
||||
parameter (t_rhs = 5)
|
||||
parameter (t_jacld = 6)
|
||||
parameter (t_blts = 7)
|
||||
parameter (t_jacu = 8)
|
||||
parameter (t_buts = 9)
|
||||
parameter (t_add = 10)
|
||||
parameter (t_l2norm = 11)
|
||||
parameter (t_last = 11)
|
||||
logical timeron
|
||||
double precision maxtime
|
||||
|
||||
common/timer/maxtime,timeron
|
||||
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c end of include file
|
||||
c---------------------------------------------------------------------
|
||||
@@ -0,0 +1,79 @@
|
||||
|
||||
! *** generated by SAPFOR with version 1124 and build date: Apr 18 2019 20:56:38
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
subroutine domain ()
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! local variables
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!--- applu.incl
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! npbparams.h defines parameters that depend on the class and
|
||||
! number of nodes
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! end of include file
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!--- applu.incl
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! npbparams.h defines parameters that depend on the class and
|
||||
! number of nodes
|
||||
!---------------------------------------------------------------------
|
||||
implicit none
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! end of include file
|
||||
include 'applu.incl'
|
||||
!---------------------------------------------------------------------
|
||||
nx = nx0
|
||||
ny = ny0
|
||||
nz = nz0
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! check the sub-domain size
|
||||
!---------------------------------------------------------------------
|
||||
if (nx .lt. 4 .or. ny .lt. 4 .or. nz .lt. 4) then
|
||||
write (unit = *,fmt = 2001) nx,ny,nz
|
||||
2001 format (5x,'SUBDOMAIN SIZE IS TOO SMALL - ', /5x,'
|
||||
&ADJUST PROBLEM SIZE OR NUMBER OF PROCESSORS', /5x,'SO THAT
|
||||
&NX, NY AND NZ ARE GREATER THAN OR EQUAL', /5x,'TO 4 THEY AR
|
||||
&E CURRENTLY', 3I3)
|
||||
stop
|
||||
endif
|
||||
if (nx .gt. isiz1 .or. ny .gt. isiz2 .or. nz .gt. isiz3) then
|
||||
write (unit = *,fmt = 2002) nx,ny,nz
|
||||
2002 format (5x,'SUBDOMAIN SIZE IS TOO LARGE - ', /5x,'
|
||||
&ADJUST PROBLEM SIZE OR NUMBER OF PROCESSORS', /5x,'SO THAT
|
||||
&NX, NY AND NZ ARE LESS THAN OR EQUAL TO ', /5x,'ISIZ1, ISIZ
|
||||
&2 AND ISIZ3 RESPECTIVELY. THEY ARE', /5x,'CURRENTLY', 3I4)
|
||||
stop
|
||||
endif
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! set up the start and end in i and j extents for all processors
|
||||
!---------------------------------------------------------------------
|
||||
ist = 2
|
||||
iend = nx - 1
|
||||
jst = 2
|
||||
jend = ny - 1
|
||||
ii1 = 2
|
||||
ii2 = nx0 - 1
|
||||
ji1 = 2
|
||||
ji2 = ny0 - 2
|
||||
ki1 = 3
|
||||
ki2 = nz0 - 1
|
||||
return
|
||||
end
|
||||
|
||||
@@ -0,0 +1,369 @@
|
||||
|
||||
! *** generated by SAPFOR with version 1124 and build date: Apr 18 2019 20:56:38
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
subroutine erhs ()
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!
|
||||
! compute the right hand side based on exact solution
|
||||
!
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! local variables
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!--- applu.incl
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! npbparams.h defines parameters that depend on the class and
|
||||
! number of nodes
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! end of include file
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!--- applu.incl
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! npbparams.h defines parameters that depend on the class and
|
||||
! number of nodes
|
||||
!---------------------------------------------------------------------
|
||||
implicit none
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! end of include file
|
||||
include 'applu.incl'
|
||||
!---------------------------------------------------------------------
|
||||
integer i,j,k,m
|
||||
double precision xi,eta,zeta
|
||||
double precision q
|
||||
double precision u21,u31,u41
|
||||
double precision tmp
|
||||
double precision u21i,u31i,u41i,u51i
|
||||
double precision u21j,u31j,u41j,u51j
|
||||
double precision u21k,u31k,u41k,u51k
|
||||
double precision u21im1,u31im1,u41im1,u51im1
|
||||
double precision u21jm1,u31jm1,u41jm1,u51jm1
|
||||
double precision u21km1,u31km1,u41km1,u51km1
|
||||
!DVM$ PARALLEL (k,j,i,m) ON frct(m,i,j,k), PRIVATE (m,i,j,k)
|
||||
do k = 1,nz
|
||||
do j = 1,ny
|
||||
do i = 1,nx
|
||||
do m = 1,5
|
||||
frct(m,i,j,k) = 0.0d+00
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ PARALLEL (k,j,i) ON rsd(*,i,j,k), PRIVATE (m,i,j,k,xi,zeta,eta)
|
||||
do k = 1,nz
|
||||
do j = 1,ny
|
||||
do i = 1,nx
|
||||
zeta = dble (k - 1) / (nz - 1)
|
||||
eta = dble (j - 1) / (ny0 - 1)
|
||||
xi = dble (i - 1) / (nx0 - 1)
|
||||
do m = 1,5
|
||||
rsd(m,i,j,k) = ce(m,1) + (ce(m,2) + (ce(m,5) + (ce(m,8
|
||||
&) + ce(m,11) * xi) * xi) * xi) * xi + (ce(m,3) + (ce(m,6) + (ce(m,
|
||||
&9) + ce(m,12) * eta) * eta) * eta) * eta + (ce(m,4) + (ce(m,7) + (
|
||||
&ce(m,10) + ce(m,13) * zeta) * zeta) * zeta) * zeta
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! xi-direction flux differences
|
||||
!---------------------------------------------------------------------
|
||||
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0_r3(iEX0,iEX1,iEX2,iE
|
||||
!DVM$&X3) :: rsd
|
||||
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0_r3(iEX0,iEX1,iEX2,iE
|
||||
!DVM$&X3) :: frct
|
||||
!DVM$ PARALLEL (k,j) ON frct(*,*,j,k), PRIVATE (m,i,j,q,tmp,k,flux,u31i,
|
||||
!DVM$&u41i,u51i,u21i,u21,u31im1,u41im1,u21im1,u51im1)
|
||||
do k = 2,nz - 1
|
||||
do j = jst,jend
|
||||
do i = 1,nx
|
||||
flux(1,i) = rsd(2,i,j,k)
|
||||
u21 = rsd(2,i,j,k) / rsd(1,i,j,k)
|
||||
q = 0.50d+00 * (rsd(2,i,j,k) * rsd(2,i,j,k) + rsd(3,i,j,k
|
||||
&) * rsd(3,i,j,k) + rsd(4,i,j,k) * rsd(4,i,j,k)) / rsd(1,i,j,k)
|
||||
flux(2,i) = rsd(2,i,j,k) * u21 + c2 * (rsd(5,i,j,k) - q)
|
||||
flux(3,i) = rsd(3,i,j,k) * u21
|
||||
flux(4,i) = rsd(4,i,j,k) * u21
|
||||
flux(5,i) = (c1 * rsd(5,i,j,k) - c2 * q) * u21
|
||||
enddo
|
||||
do i = ist,iend
|
||||
do m = 1,5
|
||||
frct(m,i,j,k) = frct(m,i,j,k) - tx2 * (flux(m,i + 1) -
|
||||
& flux(m,i - 1))
|
||||
enddo
|
||||
enddo
|
||||
do i = ist,nx
|
||||
tmp = 1.0d+00 / rsd(1,i,j,k)
|
||||
u21i = tmp * rsd(2,i,j,k)
|
||||
u31i = tmp * rsd(3,i,j,k)
|
||||
u41i = tmp * rsd(4,i,j,k)
|
||||
u51i = tmp * rsd(5,i,j,k)
|
||||
tmp = 1.0d+00 / rsd(1,i - 1,j,k)
|
||||
u21im1 = tmp * rsd(2,i - 1,j,k)
|
||||
u31im1 = tmp * rsd(3,i - 1,j,k)
|
||||
u41im1 = tmp * rsd(4,i - 1,j,k)
|
||||
u51im1 = tmp * rsd(5,i - 1,j,k)
|
||||
flux(2,i) = 4.0d+00 / 3.0d+00 * tx3 * (u21i - u21im1)
|
||||
flux(3,i) = tx3 * (u31i - u31im1)
|
||||
flux(4,i) = tx3 * (u41i - u41im1)
|
||||
flux(5,i) = 0.50d+00 * (1.0d+00 - c1 * c5) * tx3 * (u21i*
|
||||
&* 2 + u31i** 2 + u41i** 2 - (u21im1** 2 + u31im1** 2 + u41im1** 2)
|
||||
&) + 1.0d+00 / 6.0d+00 * tx3 * (u21i** 2 - u21im1** 2) + c1 * c5 *
|
||||
&tx3 * (u51i - u51im1)
|
||||
enddo
|
||||
do i = ist,iend
|
||||
frct(1,i,j,k) = frct(1,i,j,k) + dx1 * tx1 * (rsd(1,i - 1,
|
||||
&j,k) - 2.0d+00 * rsd(1,i,j,k) + rsd(1,i + 1,j,k))
|
||||
frct(2,i,j,k) = frct(2,i,j,k) + tx3 * c3 * c4 * (flux(2,i
|
||||
& + 1) - flux(2,i)) + dx2 * tx1 * (rsd(2,i - 1,j,k) - 2.0d+00 * rsd
|
||||
&(2,i,j,k) + rsd(2,i + 1,j,k))
|
||||
frct(3,i,j,k) = frct(3,i,j,k) + tx3 * c3 * c4 * (flux(3,i
|
||||
& + 1) - flux(3,i)) + dx3 * tx1 * (rsd(3,i - 1,j,k) - 2.0d+00 * rsd
|
||||
&(3,i,j,k) + rsd(3,i + 1,j,k))
|
||||
frct(4,i,j,k) = frct(4,i,j,k) + tx3 * c3 * c4 * (flux(4,i
|
||||
& + 1) - flux(4,i)) + dx4 * tx1 * (rsd(4,i - 1,j,k) - 2.0d+00 * rsd
|
||||
&(4,i,j,k) + rsd(4,i + 1,j,k))
|
||||
frct(5,i,j,k) = frct(5,i,j,k) + tx3 * c3 * c4 * (flux(5,i
|
||||
& + 1) - flux(5,i)) + dx5 * tx1 * (rsd(5,i - 1,j,k) - 2.0d+00 * rsd
|
||||
&(5,i,j,k) + rsd(5,i + 1,j,k))
|
||||
enddo
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Fourth-order dissipation
|
||||
!---------------------------------------------------------------------
|
||||
do m = 1,5
|
||||
frct(m,2,j,k) = frct(m,2,j,k) - dssp * ((+(5.0d+00)) * rs
|
||||
&d(m,2,j,k) - 4.0d+00 * rsd(m,3,j,k) + rsd(m,4,j,k))
|
||||
frct(m,3,j,k) = frct(m,3,j,k) - dssp * ((-(4.0d+00)) * rs
|
||||
&d(m,2,j,k) + 6.0d+00 * rsd(m,3,j,k) - 4.0d+00 * rsd(m,4,j,k) + rsd
|
||||
&(m,5,j,k))
|
||||
enddo
|
||||
do i = 4,nx - 3
|
||||
do m = 1,5
|
||||
frct(m,i,j,k) = frct(m,i,j,k) - dssp * (rsd(m,i - 2,j,
|
||||
&k) - 4.0d+00 * rsd(m,i - 1,j,k) + 6.0d+00 * rsd(m,i,j,k) - 4.0d+00
|
||||
& * rsd(m,i + 1,j,k) + rsd(m,i + 2,j,k))
|
||||
enddo
|
||||
enddo
|
||||
do m = 1,5
|
||||
frct(m,nx - 2,j,k) = frct(m,nx - 2,j,k) - dssp * (rsd(m,n
|
||||
&x - 4,j,k) - 4.0d+00 * rsd(m,nx - 3,j,k) + 6.0d+00 * rsd(m,nx - 2,
|
||||
&j,k) - 4.0d+00 * rsd(m,nx - 1,j,k))
|
||||
frct(m,nx - 1,j,k) = frct(m,nx - 1,j,k) - dssp * (rsd(m,n
|
||||
&x - 3,j,k) - 4.0d+00 * rsd(m,nx - 2,j,k) + 5.0d+00 * rsd(m,nx - 1,
|
||||
&j,k))
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! eta-direction flux differences
|
||||
!---------------------------------------------------------------------
|
||||
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0(iEX0,iEX1,iEX2,iEX3)
|
||||
!DVM$& :: rsd
|
||||
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0(iEX0,iEX1,iEX2,iEX3)
|
||||
!DVM$& :: frct
|
||||
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0_r2(iEX0,iEX1,iEX2,iE
|
||||
!DVM$&X3) :: rsd
|
||||
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0_r2(iEX0,iEX1,iEX2,iE
|
||||
!DVM$&X3) :: frct
|
||||
!DVM$ PARALLEL (k,i) ON frct(*,i,*,k), PRIVATE (m,i,u31,j,q,tmp,u31j,u41
|
||||
!DVM$&j,u41jm1,u51jm1,u21j,u31jm1,k,u21jm1,u51j,flux)
|
||||
do k = 2,nz - 1
|
||||
do i = ist,iend
|
||||
do j = 1,ny
|
||||
flux(1,j) = rsd(3,i,j,k)
|
||||
u31 = rsd(3,i,j,k) / rsd(1,i,j,k)
|
||||
q = 0.50d+00 * (rsd(2,i,j,k) * rsd(2,i,j,k) + rsd(3,i,j,k
|
||||
&) * rsd(3,i,j,k) + rsd(4,i,j,k) * rsd(4,i,j,k)) / rsd(1,i,j,k)
|
||||
flux(2,j) = rsd(2,i,j,k) * u31
|
||||
flux(3,j) = rsd(3,i,j,k) * u31 + c2 * (rsd(5,i,j,k) - q)
|
||||
flux(4,j) = rsd(4,i,j,k) * u31
|
||||
flux(5,j) = (c1 * rsd(5,i,j,k) - c2 * q) * u31
|
||||
enddo
|
||||
do j = jst,jend
|
||||
do m = 1,5
|
||||
frct(m,i,j,k) = frct(m,i,j,k) - ty2 * (flux(m,j + 1) -
|
||||
& flux(m,j - 1))
|
||||
enddo
|
||||
enddo
|
||||
do j = jst,ny
|
||||
tmp = 1.0d+00 / rsd(1,i,j,k)
|
||||
u21j = tmp * rsd(2,i,j,k)
|
||||
u31j = tmp * rsd(3,i,j,k)
|
||||
u41j = tmp * rsd(4,i,j,k)
|
||||
u51j = tmp * rsd(5,i,j,k)
|
||||
tmp = 1.0d+00 / rsd(1,i,j - 1,k)
|
||||
u21jm1 = tmp * rsd(2,i,j - 1,k)
|
||||
u31jm1 = tmp * rsd(3,i,j - 1,k)
|
||||
u41jm1 = tmp * rsd(4,i,j - 1,k)
|
||||
u51jm1 = tmp * rsd(5,i,j - 1,k)
|
||||
flux(2,j) = ty3 * (u21j - u21jm1)
|
||||
flux(3,j) = 4.0d+00 / 3.0d+00 * ty3 * (u31j - u31jm1)
|
||||
flux(4,j) = ty3 * (u41j - u41jm1)
|
||||
flux(5,j) = 0.50d+00 * (1.0d+00 - c1 * c5) * ty3 * (u21j*
|
||||
&* 2 + u31j** 2 + u41j** 2 - (u21jm1** 2 + u31jm1** 2 + u41jm1** 2)
|
||||
&) + 1.0d+00 / 6.0d+00 * ty3 * (u31j** 2 - u31jm1** 2) + c1 * c5 *
|
||||
&ty3 * (u51j - u51jm1)
|
||||
enddo
|
||||
do j = jst,jend
|
||||
frct(1,i,j,k) = frct(1,i,j,k) + dy1 * ty1 * (rsd(1,i,j -
|
||||
&1,k) - 2.0d+00 * rsd(1,i,j,k) + rsd(1,i,j + 1,k))
|
||||
frct(2,i,j,k) = frct(2,i,j,k) + ty3 * c3 * c4 * (flux(2,j
|
||||
& + 1) - flux(2,j)) + dy2 * ty1 * (rsd(2,i,j - 1,k) - 2.0d+00 * rsd
|
||||
&(2,i,j,k) + rsd(2,i,j + 1,k))
|
||||
frct(3,i,j,k) = frct(3,i,j,k) + ty3 * c3 * c4 * (flux(3,j
|
||||
& + 1) - flux(3,j)) + dy3 * ty1 * (rsd(3,i,j - 1,k) - 2.0d+00 * rsd
|
||||
&(3,i,j,k) + rsd(3,i,j + 1,k))
|
||||
frct(4,i,j,k) = frct(4,i,j,k) + ty3 * c3 * c4 * (flux(4,j
|
||||
& + 1) - flux(4,j)) + dy4 * ty1 * (rsd(4,i,j - 1,k) - 2.0d+00 * rsd
|
||||
&(4,i,j,k) + rsd(4,i,j + 1,k))
|
||||
frct(5,i,j,k) = frct(5,i,j,k) + ty3 * c3 * c4 * (flux(5,j
|
||||
& + 1) - flux(5,j)) + dy5 * ty1 * (rsd(5,i,j - 1,k) - 2.0d+00 * rsd
|
||||
&(5,i,j,k) + rsd(5,i,j + 1,k))
|
||||
enddo
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! fourth-order dissipation
|
||||
!---------------------------------------------------------------------
|
||||
do m = 1,5
|
||||
frct(m,i,2,k) = frct(m,i,2,k) - dssp * ((+(5.0d+00)) * rs
|
||||
&d(m,i,2,k) - 4.0d+00 * rsd(m,i,3,k) + rsd(m,i,4,k))
|
||||
frct(m,i,3,k) = frct(m,i,3,k) - dssp * ((-(4.0d+00)) * rs
|
||||
&d(m,i,2,k) + 6.0d+00 * rsd(m,i,3,k) - 4.0d+00 * rsd(m,i,4,k) + rsd
|
||||
&(m,i,5,k))
|
||||
enddo
|
||||
do j = 4,ny - 3
|
||||
do m = 1,5
|
||||
frct(m,i,j,k) = frct(m,i,j,k) - dssp * (rsd(m,i,j - 2,
|
||||
&k) - 4.0d+00 * rsd(m,i,j - 1,k) + 6.0d+00 * rsd(m,i,j,k) - 4.0d+00
|
||||
& * rsd(m,i,j + 1,k) + rsd(m,i,j + 2,k))
|
||||
enddo
|
||||
enddo
|
||||
do m = 1,5
|
||||
frct(m,i,ny - 2,k) = frct(m,i,ny - 2,k) - dssp * (rsd(m,i
|
||||
&,ny - 4,k) - 4.0d+00 * rsd(m,i,ny - 3,k) + 6.0d+00 * rsd(m,i,ny -
|
||||
&2,k) - 4.0d+00 * rsd(m,i,ny - 1,k))
|
||||
frct(m,i,ny - 1,k) = frct(m,i,ny - 1,k) - dssp * (rsd(m,i
|
||||
&,ny - 3,k) - 4.0d+00 * rsd(m,i,ny - 2,k) + 5.0d+00 * rsd(m,i,ny -
|
||||
&1,k))
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! zeta-direction flux differences
|
||||
!---------------------------------------------------------------------
|
||||
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0(iEX0,iEX1,iEX2,iEX3)
|
||||
!DVM$& :: rsd
|
||||
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0(iEX0,iEX1,iEX2,iEX3)
|
||||
!DVM$& :: frct
|
||||
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0_r1(iEX0,iEX1,iEX2,iE
|
||||
!DVM$&X3) :: rsd
|
||||
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0_r1(iEX0,iEX1,iEX2,iE
|
||||
!DVM$&X3) :: frct
|
||||
!DVM$ PARALLEL (j,i) ON frct(*,i,j,*), PRIVATE (m,i,j,q,tmp,u41,k,u51k,u
|
||||
!DVM$&31km1,u21k,u21km1,u41k,u31k,u51km1,u41km1,flux)
|
||||
do j = jst,jend
|
||||
do i = ist,iend
|
||||
do k = 1,nz
|
||||
flux(1,k) = rsd(4,i,j,k)
|
||||
u41 = rsd(4,i,j,k) / rsd(1,i,j,k)
|
||||
q = 0.50d+00 * (rsd(2,i,j,k) * rsd(2,i,j,k) + rsd(3,i,j,k
|
||||
&) * rsd(3,i,j,k) + rsd(4,i,j,k) * rsd(4,i,j,k)) / rsd(1,i,j,k)
|
||||
flux(2,k) = rsd(2,i,j,k) * u41
|
||||
flux(3,k) = rsd(3,i,j,k) * u41
|
||||
flux(4,k) = rsd(4,i,j,k) * u41 + c2 * (rsd(5,i,j,k) - q)
|
||||
flux(5,k) = (c1 * rsd(5,i,j,k) - c2 * q) * u41
|
||||
enddo
|
||||
do k = 2,nz - 1
|
||||
do m = 1,5
|
||||
frct(m,i,j,k) = frct(m,i,j,k) - tz2 * (flux(m,k + 1) -
|
||||
& flux(m,k - 1))
|
||||
enddo
|
||||
enddo
|
||||
do k = 2,nz
|
||||
tmp = 1.0d+00 / rsd(1,i,j,k)
|
||||
u21k = tmp * rsd(2,i,j,k)
|
||||
u31k = tmp * rsd(3,i,j,k)
|
||||
u41k = tmp * rsd(4,i,j,k)
|
||||
u51k = tmp * rsd(5,i,j,k)
|
||||
tmp = 1.0d+00 / rsd(1,i,j,k - 1)
|
||||
u21km1 = tmp * rsd(2,i,j,k - 1)
|
||||
u31km1 = tmp * rsd(3,i,j,k - 1)
|
||||
u41km1 = tmp * rsd(4,i,j,k - 1)
|
||||
u51km1 = tmp * rsd(5,i,j,k - 1)
|
||||
flux(2,k) = tz3 * (u21k - u21km1)
|
||||
flux(3,k) = tz3 * (u31k - u31km1)
|
||||
flux(4,k) = 4.0d+00 / 3.0d+00 * tz3 * (u41k - u41km1)
|
||||
flux(5,k) = 0.50d+00 * (1.0d+00 - c1 * c5) * tz3 * (u21k*
|
||||
&* 2 + u31k** 2 + u41k** 2 - (u21km1** 2 + u31km1** 2 + u41km1** 2)
|
||||
&) + 1.0d+00 / 6.0d+00 * tz3 * (u41k** 2 - u41km1** 2) + c1 * c5 *
|
||||
&tz3 * (u51k - u51km1)
|
||||
enddo
|
||||
do k = 2,nz - 1
|
||||
frct(1,i,j,k) = frct(1,i,j,k) + dz1 * tz1 * (rsd(1,i,j,k
|
||||
&+ 1) - 2.0d+00 * rsd(1,i,j,k) + rsd(1,i,j,k - 1))
|
||||
frct(2,i,j,k) = frct(2,i,j,k) + tz3 * c3 * c4 * (flux(2,k
|
||||
& + 1) - flux(2,k)) + dz2 * tz1 * (rsd(2,i,j,k + 1) - 2.0d+00 * rsd
|
||||
&(2,i,j,k) + rsd(2,i,j,k - 1))
|
||||
frct(3,i,j,k) = frct(3,i,j,k) + tz3 * c3 * c4 * (flux(3,k
|
||||
& + 1) - flux(3,k)) + dz3 * tz1 * (rsd(3,i,j,k + 1) - 2.0d+00 * rsd
|
||||
&(3,i,j,k) + rsd(3,i,j,k - 1))
|
||||
frct(4,i,j,k) = frct(4,i,j,k) + tz3 * c3 * c4 * (flux(4,k
|
||||
& + 1) - flux(4,k)) + dz4 * tz1 * (rsd(4,i,j,k + 1) - 2.0d+00 * rsd
|
||||
&(4,i,j,k) + rsd(4,i,j,k - 1))
|
||||
frct(5,i,j,k) = frct(5,i,j,k) + tz3 * c3 * c4 * (flux(5,k
|
||||
& + 1) - flux(5,k)) + dz5 * tz1 * (rsd(5,i,j,k + 1) - 2.0d+00 * rsd
|
||||
&(5,i,j,k) + rsd(5,i,j,k - 1))
|
||||
enddo
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! fourth-order dissipation
|
||||
!---------------------------------------------------------------------
|
||||
do m = 1,5
|
||||
frct(m,i,j,2) = frct(m,i,j,2) - dssp * ((+(5.0d+00)) * rs
|
||||
&d(m,i,j,2) - 4.0d+00 * rsd(m,i,j,3) + rsd(m,i,j,4))
|
||||
frct(m,i,j,3) = frct(m,i,j,3) - dssp * ((-(4.0d+00)) * rs
|
||||
&d(m,i,j,2) + 6.0d+00 * rsd(m,i,j,3) - 4.0d+00 * rsd(m,i,j,4) + rsd
|
||||
&(m,i,j,5))
|
||||
enddo
|
||||
do k = 4,nz - 3
|
||||
do m = 1,5
|
||||
frct(m,i,j,k) = frct(m,i,j,k) - dssp * (rsd(m,i,j,k -
|
||||
&2) - 4.0d+00 * rsd(m,i,j,k - 1) + 6.0d+00 * rsd(m,i,j,k) - 4.0d+00
|
||||
& * rsd(m,i,j,k + 1) + rsd(m,i,j,k + 2))
|
||||
enddo
|
||||
enddo
|
||||
do m = 1,5
|
||||
frct(m,i,j,nz - 2) = frct(m,i,j,nz - 2) - dssp * (rsd(m,i
|
||||
&,j,nz - 4) - 4.0d+00 * rsd(m,i,j,nz - 3) + 6.0d+00 * rsd(m,i,j,nz
|
||||
&- 2) - 4.0d+00 * rsd(m,i,j,nz - 1))
|
||||
frct(m,i,j,nz - 1) = frct(m,i,j,nz - 1) - dssp * (rsd(m,i
|
||||
&,j,nz - 3) - 4.0d+00 * rsd(m,i,j,nz - 2) + 5.0d+00 * rsd(m,i,j,nz
|
||||
&- 1))
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0(iEX0,iEX1,iEX2,iEX3)
|
||||
!DVM$& :: rsd
|
||||
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0(iEX0,iEX1,iEX2,iEX3)
|
||||
!DVM$& :: frct
|
||||
return
|
||||
end
|
||||
|
||||
@@ -0,0 +1,77 @@
|
||||
|
||||
! *** generated by SAPFOR with version 1124 and build date: Apr 18 2019 20:56:38
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
subroutine error ()
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!
|
||||
! compute the solution error
|
||||
!
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! local variables
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!--- applu.incl
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! npbparams.h defines parameters that depend on the class and
|
||||
! number of nodes
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! end of include file
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!--- applu.incl
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! npbparams.h defines parameters that depend on the class and
|
||||
! number of nodes
|
||||
!---------------------------------------------------------------------
|
||||
implicit none
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! end of include file
|
||||
include 'applu.incl'
|
||||
!---------------------------------------------------------------------
|
||||
integer i,j,k,m
|
||||
double precision tmp
|
||||
double precision u000ijk(5)
|
||||
do m = 1,5
|
||||
errnm(m) = 0.0d+00
|
||||
enddo
|
||||
!DVM$ PARALLEL (k,j,i) ON u(*,i,j,k), PRIVATE (tmp,m,k,u000ijk,i,j),REDU
|
||||
!DVM$&CTION (sum (errnm))
|
||||
do k = 2,nz - 1
|
||||
do j = jst,jend
|
||||
do i = ist,iend
|
||||
call exact(i,j,k,u000ijk)
|
||||
do m = 1,5
|
||||
tmp = u000ijk(m) - u(m,i,j,k)
|
||||
errnm(m) = errnm(m) + tmp** 2
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
do m = 1,5
|
||||
errnm(m) = sqrt (errnm(m) / ((nx0 - 2) * (ny0 - 2) * (nz0 - 2))
|
||||
&)
|
||||
enddo
|
||||
|
||||
! write (*,1002) ( errnm(m), m = 1, 5 )
|
||||
1002 format (1x/1x,'RMS-norm of error in soln. to ', 'first pde = ',1p
|
||||
&e12.5/, 1x,'RMS-norm of error in soln. to ', 'second pde = ',1pe12
|
||||
&.5/, 1x,'RMS-norm of error in soln. to ', 'third pde = ',1pe12.5/
|
||||
&, 1x,'RMS-norm of error in soln. to ', 'fourth pde = ',1pe12.5/, 1
|
||||
&x,'RMS-norm of error in soln. to ', 'fifth pde = ',1pe12.5)
|
||||
return
|
||||
end
|
||||
|
||||
@@ -0,0 +1,64 @@
|
||||
|
||||
! *** generated by SAPFOR with version 1124 and build date: Apr 18 2019 20:56:38
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
subroutine exact (i, j, k, u000ijk)
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!
|
||||
! compute the exact solution at (i,j,k)
|
||||
!
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! input parameters
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!--- applu.incl
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! npbparams.h defines parameters that depend on the class and
|
||||
! number of nodes
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! end of include file
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!--- applu.incl
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! npbparams.h defines parameters that depend on the class and
|
||||
! number of nodes
|
||||
!---------------------------------------------------------------------
|
||||
implicit none
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! end of include file
|
||||
include 'applu.incl'
|
||||
!---------------------------------------------------------------------
|
||||
integer i,j,k
|
||||
double precision u000ijk(*)
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! local variables
|
||||
!---------------------------------------------------------------------
|
||||
integer m
|
||||
double precision xi,eta,zeta
|
||||
xi = dble (i - 1) / (nx0 - 1)
|
||||
eta = dble (j - 1) / (ny0 - 1)
|
||||
zeta = dble (k - 1) / (nz - 1)
|
||||
do m = 1,5
|
||||
u000ijk(m) = ce(m,1) + (ce(m,2) + (ce(m,5) + (ce(m,8) + ce(m,11
|
||||
&) * xi) * xi) * xi) * xi + (ce(m,3) + (ce(m,6) + (ce(m,9) + ce(m,1
|
||||
&2) * eta) * eta) * eta) * eta + (ce(m,4) + (ce(m,7) + (ce(m,10) +
|
||||
&ce(m,13) * zeta) * zeta) * zeta) * zeta
|
||||
enddo
|
||||
return
|
||||
end
|
||||
|
||||
@@ -0,0 +1,69 @@
|
||||
|
||||
! *** generated by SAPFOR with version 1124 and build date: Apr 18 2019 20:56:38
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
subroutine l2norm (ldx, ldy, ldz, nx0, ny0, nz0, ist, iend, jst, j
|
||||
&end, v, sum)
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! to compute the l2-norm of vector v.
|
||||
!---------------------------------------------------------------------
|
||||
implicit none
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! input parameters
|
||||
!---------------------------------------------------------------------
|
||||
integer ldx,ldy,ldz
|
||||
integer nx0,ny0,nz0
|
||||
integer ist,iend
|
||||
integer jst,jend
|
||||
!DVM$ INHERIT v
|
||||
!DVM$ DYNAMIC v
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! To improve cache performance, second two dimensions padded by 1
|
||||
! for even number sizes only. Only needed in v.
|
||||
!---------------------------------------------------------------------
|
||||
!DVM$ TEMPLATE, COMMON :: dvmh_temp0(1:6,0:163,0:163,-1:163)
|
||||
!DVM$ TEMPLATE, COMMON :: dvmh_temp0_r1(1:6,0:163,0:163,-1:163)
|
||||
!DVM$ TEMPLATE, COMMON :: dvmh_temp0_r2(1:6,0:163,0:163,-1:163)
|
||||
!DVM$ TEMPLATE, COMMON :: dvmh_temp0_r3(1:6,0:163,0:163,-1:163)
|
||||
!DVM$ TEMPLATE, COMMON :: dvmh_temp0_r0(1:6,0:163,0:163,-1:163)
|
||||
!DVM$ DISTRIBUTE dvmh_temp0(*,BLOCK,BLOCK,BLOCK)
|
||||
!DVM$ DISTRIBUTE dvmh_temp0_r1(*,BLOCK,BLOCK,*)
|
||||
!DVM$ DISTRIBUTE dvmh_temp0_r2(*,BLOCK,*,BLOCK)
|
||||
!DVM$ DISTRIBUTE dvmh_temp0_r3(*,*,BLOCK,BLOCK)
|
||||
!DVM$ DISTRIBUTE dvmh_temp0_r0(*,*,*,BLOCK)
|
||||
!DVM$ DYNAMIC dvmh_temp0, dvmh_temp0_r1, dvmh_temp0_r2, dvmh_temp0_r3,
|
||||
!DVM$&dvmh_temp0_r0
|
||||
double precision v(5,ldx / 2 * 2 + 1,ldy / 2 * 2 + 1,*),sum(5)
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! local variables
|
||||
!---------------------------------------------------------------------
|
||||
integer i,j,k,m
|
||||
do m = 1,5
|
||||
sum(m) = 0.0d+00
|
||||
enddo
|
||||
!DVM$ region
|
||||
!DVM$ PARALLEL (k,j,i,m) ON v(m,i,j,k), PRIVATE (m,j,i,k),REDUCTION (sum
|
||||
!DVM$& (sum))
|
||||
do k = 2,nz0 - 1
|
||||
do j = jst,jend
|
||||
do i = ist,iend
|
||||
do m = 1,5
|
||||
sum(m) = sum(m) + v(m,i,j,k) * v(m,i,j,k)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ end region
|
||||
do m = 1,5
|
||||
sum(m) = sqrt (sum(m) / ((nx0 - 2) * (ny0 - 2) * (nz0 - 2)))
|
||||
enddo
|
||||
return
|
||||
end
|
||||
|
||||
@@ -0,0 +1,212 @@
|
||||
|
||||
! *** generated by SAPFOR with version 1124 and build date: Apr 18 2019 20:56:38
|
||||
|
||||
!-------------------------------------------------------------------------!
|
||||
! !
|
||||
! N A S P A R A L L E L B E N C H M A R K S 3.3 !
|
||||
! !
|
||||
! S E R I A L V E R S I O N !
|
||||
! !
|
||||
! L U !
|
||||
! !
|
||||
!-------------------------------------------------------------------------!
|
||||
! !
|
||||
! This benchmark is a serial version of the NPB LU code. !
|
||||
! Refer to NAS Technical Reports 95-020 for details. !
|
||||
! !
|
||||
! Permission to use, copy, distribute and modify this software !
|
||||
! for any purpose with or without fee is hereby granted. We !
|
||||
! request, however, that all derived work reference the NAS !
|
||||
! Parallel Benchmarks 3.3. This software is provided "as is" !
|
||||
! without express or implied warranty. !
|
||||
! !
|
||||
! Information on NPB 3.3, including the technical report, the !
|
||||
! original specifications, source code, results and information !
|
||||
! on how to submit new results, is available at: !
|
||||
! !
|
||||
! http://www.nas.nasa.gov/Software/NPB/ !
|
||||
! !
|
||||
! Send comments or suggestions to npb@nas.nasa.gov !
|
||||
! !
|
||||
! NAS Parallel Benchmarks Group !
|
||||
! NASA Ames Research Center !
|
||||
! Mail Stop: T27A-1 !
|
||||
! Moffett Field, CA 94035-1000 !
|
||||
! !
|
||||
! E-mail: npb@nas.nasa.gov !
|
||||
! Fax: (650) 604-3957 !
|
||||
! !
|
||||
!-------------------------------------------------------------------------!
|
||||
!---------------------------------------------------------------------
|
||||
!
|
||||
! Authors: S. Weeratunga
|
||||
! V. Venkatakrishnan
|
||||
! E. Barszcz
|
||||
! M. Yarrow
|
||||
!
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
program applu
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!
|
||||
! driver for the performance evaluation of the solver for
|
||||
! five coupled parabolic/elliptic partial differential equations.
|
||||
!
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!--- applu.incl
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! npbparams.h defines parameters that depend on the class and
|
||||
! number of nodes
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! end of include file
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!--- applu.incl
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! npbparams.h defines parameters that depend on the class and
|
||||
! number of nodes
|
||||
!---------------------------------------------------------------------
|
||||
implicit none
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! end of include file
|
||||
include 'applu.incl'
|
||||
!---------------------------------------------------------------------
|
||||
character class
|
||||
logical verified
|
||||
double precision mflops
|
||||
double precision t,tmax,timer_read,trecs(t_last)
|
||||
external timer_read
|
||||
integer i,fstatus
|
||||
character t_names(t_last)*8
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Setup info for timers
|
||||
!---------------------------------------------------------------------
|
||||
open (unit = 2,file = 'timer.flag',status = 'old',iostat = fstatus
|
||||
&)
|
||||
if (fstatus .eq. 0) then
|
||||
timeron = .TRUE.
|
||||
t_names(t_total) = 'total'
|
||||
t_names(t_rhsx) = 'rhsx'
|
||||
t_names(t_rhsy) = 'rhsy'
|
||||
t_names(t_rhsz) = 'rhsz'
|
||||
t_names(t_rhs) = 'rhs'
|
||||
t_names(t_jacld) = 'jacld'
|
||||
t_names(t_blts) = 'blts'
|
||||
t_names(t_jacu) = 'jacu'
|
||||
t_names(t_buts) = 'buts'
|
||||
t_names(t_add) = 'add'
|
||||
t_names(t_l2norm) = 'l2norm'
|
||||
close (unit = 2)
|
||||
else
|
||||
timeron = .FALSE.
|
||||
endif
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! read input data
|
||||
!---------------------------------------------------------------------
|
||||
call read_input()
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! set up domain sizes
|
||||
!---------------------------------------------------------------------
|
||||
call domain()
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! set up coefficients
|
||||
!---------------------------------------------------------------------
|
||||
call setcoeff()
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! set the boundary values for dependent variables
|
||||
!---------------------------------------------------------------------
|
||||
call setbv()
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! set the initial values for dependent variables
|
||||
!---------------------------------------------------------------------
|
||||
call setiv()
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! compute the forcing term based on prescribed exact solution
|
||||
!---------------------------------------------------------------------
|
||||
call erhs()
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! perform one SSOR iteration to touch all pages
|
||||
!---------------------------------------------------------------------
|
||||
!DVM$ actual()
|
||||
call ssor(1)
|
||||
!DVM$ get_actual()
|
||||
!---------------------------------------------------------------------
|
||||
! reset the boundary and initial values
|
||||
!---------------------------------------------------------------------
|
||||
call setbv()
|
||||
call setiv()
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! perform the SSOR iterations
|
||||
!--------------------------------------------------------------------
|
||||
!DVM$ interval 1
|
||||
!DVM$ actual()
|
||||
call ssor(itmax)
|
||||
!DVM$ get_actual()
|
||||
!DVM$ end interval
|
||||
!---------------------------------------------------------------------
|
||||
! compute the solution error
|
||||
!---------------------------------------------------------------------
|
||||
call error()
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! compute the surface integral
|
||||
!---------------------------------------------------------------------
|
||||
call pintgr()
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! verification test
|
||||
!---------------------------------------------------------------------
|
||||
call verify(rsdnm,errnm,frc,class,verified)
|
||||
mflops = float (itmax) * (1984.77 * float (nx0) * float (ny0) * fl
|
||||
&oat (nz0) - 10923.3 * (float (nx0 + ny0 + nz0) / 3.)** 2 + 27770.9
|
||||
& * float (nx0 + ny0 + nz0) / 3. - 144010.) / (maxtime * 1000000.)
|
||||
call print_results('LU',class,nx0,ny0,nz0,itmax,maxtime,mflops,'
|
||||
& floating point',verified,npbversion,compiletime,cs1,cs2,cs
|
||||
&3,cs4,cs5,cs6,'(none)')
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! More timers
|
||||
!---------------------------------------------------------------------
|
||||
if (.not.(timeron)) goto 999
|
||||
do i = 1,t_last
|
||||
trecs(i) = timer_read (i)
|
||||
enddo
|
||||
tmax = maxtime
|
||||
if (tmax .eq. 0.) tmax = 1.0
|
||||
write (unit = *,fmt = 800)
|
||||
800 format(' SECTION Time (secs)')
|
||||
do i = 1,t_last
|
||||
write (unit = *,fmt = 810) t_names(i),trecs(i),trecs(i) * 100.
|
||||
&/ tmax
|
||||
if (i .eq. t_rhs) then
|
||||
t = trecs(t_rhsx) + trecs(t_rhsy) + trecs(t_rhsz)
|
||||
write (unit = *,fmt = 820) 'sub-rhs',t,t * 100. / tmax
|
||||
t = trecs(i) - t
|
||||
write (unit = *,fmt = 820) 'rest-rhs',t,t * 100. / tmax
|
||||
endif
|
||||
810 format(2x,a8,':',f9.3,' (',f6.2,'%)')
|
||||
820 format(5x,'--> ',a8,':',f9.3,' (',f6.2,'%)')
|
||||
enddo
|
||||
999 continue
|
||||
end
|
||||
|
||||
@@ -0,0 +1,12 @@
|
||||
@echo off
|
||||
|
||||
set CLASS=%1
|
||||
set OPT=%2
|
||||
|
||||
CALL ..\sys\setparams LU %CLASS%
|
||||
CALL %F77% %OPT% -f90 lu 1>out_%CLASS%.txt 2>err_%CLASS%.txt
|
||||
if exist lu.exe (
|
||||
copy lu.exe %BIN%\lu.%CLASS%.x.exe
|
||||
del lu.exe
|
||||
)
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,187 @@
|
||||
|
||||
! *** generated by SAPFOR with version 1124 and build date: Apr 18 2019 20:56:38
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
subroutine pintgr ()
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! local variables
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!--- applu.incl
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! npbparams.h defines parameters that depend on the class and
|
||||
! number of nodes
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! end of include file
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!--- applu.incl
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! npbparams.h defines parameters that depend on the class and
|
||||
! number of nodes
|
||||
!---------------------------------------------------------------------
|
||||
implicit none
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! end of include file
|
||||
include 'applu.incl'
|
||||
!---------------------------------------------------------------------
|
||||
integer i,j,k
|
||||
integer ibeg,ifin,ifin1
|
||||
integer jbeg,jfin,jfin1
|
||||
|
||||
!DVM$ ALIGN phi1(iEX1,iEX2) WITH dvmh_temp0(*,iEX1,*,iEX2)
|
||||
!DVM$ ALIGN phi2(iEX1,iEX2) WITH dvmh_temp0(*,iEX1,*,iEX2)
|
||||
!DVM$ DYNAMIC phi1,phi2
|
||||
double precision phi1(0:isiz2 + 1,0:isiz3 + 1),phi2(0:isiz2 + 1,0
|
||||
&:isiz3 + 1)
|
||||
!DVM$ SHADOW phi2( 0:1,0:1 )
|
||||
!DVM$ SHADOW phi1( 0:1,0:1 )
|
||||
double precision frc1,frc2,frc3
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! set up the sub-domains for integeration in each processor
|
||||
!---------------------------------------------------------------------
|
||||
ibeg = ii1
|
||||
ifin = ii2
|
||||
jbeg = ji1
|
||||
jfin = ji2
|
||||
ifin1 = ifin - 1
|
||||
jfin1 = jfin - 1
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! initialize
|
||||
!---------------------------------------------------------------------
|
||||
!DVM$ PARALLEL (i,k) ON phi1(i,k), PRIVATE (k,i)
|
||||
do i = 0,isiz2 + 1
|
||||
do k = 0,isiz3 + 1
|
||||
phi1(i,k) = 0.
|
||||
phi2(i,k) = 0.
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ PARALLEL (j,i) ON phi1(i,j), REMOTE_ACCESS (u(:,:,:,:)),PRIVATE (k
|
||||
!DVM$&,j,i)
|
||||
do j = jbeg,jfin
|
||||
do i = ibeg,ifin
|
||||
k = ki1
|
||||
phi1(i,j) = c2 * (u(5,i,j,k) - 0.50d+00 * (u(2,i,j,k)** 2 +
|
||||
&u(3,i,j,k)** 2 + u(4,i,j,k)** 2) / u(1,i,j,k))
|
||||
k = ki2
|
||||
phi2(i,j) = c2 * (u(5,i,j,k) - 0.50d+00 * (u(2,i,j,k)** 2 +
|
||||
&u(3,i,j,k)** 2 + u(4,i,j,k)** 2) / u(1,i,j,k))
|
||||
enddo
|
||||
enddo
|
||||
frc1 = 0.0d+00
|
||||
!DVM$ PARALLEL (j,i) ON phi1(i,j), PRIVATE (j,i),SHADOW_RENEW (phi1(CORN
|
||||
!DVM$&ER),phi2(CORNER)),REDUCTION (sum (frc1))
|
||||
do j = jbeg,jfin1
|
||||
do i = ibeg,ifin1
|
||||
frc1 = frc1 + (phi1(i,j) + phi1(i + 1,j) + phi1(i,j + 1) + p
|
||||
&hi1(i + 1,j + 1) + phi2(i,j) + phi2(i + 1,j) + phi2(i,j + 1) + phi
|
||||
&2(i + 1,j + 1))
|
||||
enddo
|
||||
enddo
|
||||
frc1 = dxi * deta * frc1
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! initialize
|
||||
!---------------------------------------------------------------------
|
||||
!DVM$ PARALLEL (i,k) ON phi1(i,k), PRIVATE (k,i)
|
||||
do i = 0,isiz2 + 1
|
||||
do k = 0,isiz3 + 1
|
||||
phi1(i,k) = 0.
|
||||
phi2(i,k) = 0.
|
||||
enddo
|
||||
enddo
|
||||
if (jbeg .eq. ji1) then
|
||||
!DVM$ PARALLEL (k,i) ON phi1(i,k), REMOTE_ACCESS (u(:,:,:,:)),PRIVATE
|
||||
!DVM$& (k,i)
|
||||
do k = ki1,ki2
|
||||
do i = ibeg,ifin
|
||||
phi1(i,k) = c2 * (u(5,i,jbeg,k) - 0.50d+00 * (u(2,i,jbeg,
|
||||
&k)** 2 + u(3,i,jbeg,k)** 2 + u(4,i,jbeg,k)** 2) / u(1,i,jbeg,k))
|
||||
enddo
|
||||
enddo
|
||||
endif
|
||||
if (jfin .eq. ji2) then
|
||||
!DVM$ PARALLEL (k,i) ON phi2(i,k), REMOTE_ACCESS (u(:,:,:,:)),PRIVATE
|
||||
!DVM$& (k,i)
|
||||
do k = ki1,ki2
|
||||
do i = ibeg,ifin
|
||||
phi2(i,k) = c2 * (u(5,i,jfin,k) - 0.50d+00 * (u(2,i,jfin,
|
||||
&k)** 2 + u(3,i,jfin,k)** 2 + u(4,i,jfin,k)** 2) / u(1,i,jfin,k))
|
||||
enddo
|
||||
enddo
|
||||
endif
|
||||
frc2 = 0.0d+00
|
||||
!DVM$ PARALLEL (k,i) ON phi1(i,k), PRIVATE (k,i),SHADOW_RENEW (phi1(CORN
|
||||
!DVM$&ER),phi2(CORNER)),REDUCTION (sum (frc2))
|
||||
do k = ki1,ki2 - 1
|
||||
do i = ibeg,ifin1
|
||||
frc2 = frc2 + (phi1(i,k) + phi1(i + 1,k) + phi1(i,k + 1) + p
|
||||
&hi1(i + 1,k + 1) + phi2(i,k) + phi2(i + 1,k) + phi2(i,k + 1) + phi
|
||||
&2(i + 1,k + 1))
|
||||
enddo
|
||||
enddo
|
||||
frc2 = dxi * dzeta * frc2
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! initialize
|
||||
!---------------------------------------------------------------------
|
||||
!DVM$ PARALLEL (i,k) ON phi1(i,k), PRIVATE (k,i)
|
||||
do i = 0,isiz2 + 1
|
||||
do k = 0,isiz3 + 1
|
||||
phi1(i,k) = 0.
|
||||
phi2(i,k) = 0.
|
||||
enddo
|
||||
enddo
|
||||
if (ibeg .eq. ii1) then
|
||||
!DVM$ PARALLEL (k,j) ON phi1(j,k), REMOTE_ACCESS (u(:,:,:,:)),PRIVATE
|
||||
!DVM$& (k,j)
|
||||
do k = ki1,ki2
|
||||
do j = jbeg,jfin
|
||||
phi1(j,k) = c2 * (u(5,ibeg,j,k) - 0.50d+00 * (u(2,ibeg,j,
|
||||
&k)** 2 + u(3,ibeg,j,k)** 2 + u(4,ibeg,j,k)** 2) / u(1,ibeg,j,k))
|
||||
enddo
|
||||
enddo
|
||||
endif
|
||||
if (ifin .eq. ii2) then
|
||||
!DVM$ PARALLEL (k,j) ON phi2(j,k), REMOTE_ACCESS (u(:,:,:,:)),PRIVATE
|
||||
!DVM$& (k,j)
|
||||
do k = ki1,ki2
|
||||
do j = jbeg,jfin
|
||||
phi2(j,k) = c2 * (u(5,ifin,j,k) - 0.50d+00 * (u(2,ifin,j,
|
||||
&k)** 2 + u(3,ifin,j,k)** 2 + u(4,ifin,j,k)** 2) / u(1,ifin,j,k))
|
||||
enddo
|
||||
enddo
|
||||
endif
|
||||
frc3 = 0.0d+00
|
||||
!DVM$ PARALLEL (k,j) ON phi1(j,k), PRIVATE (k,j),SHADOW_RENEW (phi1(CORN
|
||||
!DVM$&ER),phi2(CORNER)),REDUCTION (sum (frc3))
|
||||
do k = ki1,ki2 - 1
|
||||
do j = jbeg,jfin1
|
||||
frc3 = frc3 + (phi1(j,k) + phi1(j + 1,k) + phi1(j,k + 1) + p
|
||||
&hi1(j + 1,k + 1) + phi2(j,k) + phi2(j + 1,k) + phi2(j,k + 1) + phi
|
||||
&2(j + 1,k + 1))
|
||||
enddo
|
||||
enddo
|
||||
frc3 = deta * dzeta * frc3
|
||||
frc = 0.25d+00 * (frc1 + frc2 + frc3)
|
||||
|
||||
! write (*,1001) frc
|
||||
return
|
||||
|
||||
! 1001 format (//5x,'surface integral = ',1pe12.5//)
|
||||
end
|
||||
|
||||
@@ -0,0 +1,111 @@
|
||||
|
||||
subroutine print_results(name, class, n1, n2, n3, niter,
|
||||
> t, mops, optype, verified, npbversion,
|
||||
> compiletime, cs1, cs2, cs3, cs4, cs5, cs6, cs7)
|
||||
|
||||
implicit none
|
||||
character name*(*)
|
||||
character class*1
|
||||
integer n1, n2, n3, niter, j
|
||||
double precision t, mops
|
||||
character optype*24, size*15
|
||||
logical verified
|
||||
character*(*) npbversion, compiletime,
|
||||
> cs1, cs2, cs3, cs4, cs5, cs6, cs7
|
||||
|
||||
write (*, 2) name
|
||||
2 format(//, ' ', A, ' Benchmark Completed.')
|
||||
|
||||
write (*, 3) Class
|
||||
3 format(' Class = ', 12x, a12)
|
||||
|
||||
c If this is not a grid-based problem (EP, FT, CG), then
|
||||
c we only print n1, which contains some measure of the
|
||||
c problem size. In that case, n2 and n3 are both zero.
|
||||
c Otherwise, we print the grid size n1xn2xn3
|
||||
|
||||
if ((n2 .eq. 0) .and. (n3 .eq. 0)) then
|
||||
if (name(1:2) .eq. 'EP') then
|
||||
write(size, '(f15.0)' ) 2.d0**n1
|
||||
j = 15
|
||||
if (size(j:j) .eq. '.') then
|
||||
size(j:j) = ' '
|
||||
j = j - 1
|
||||
endif
|
||||
write (*,42) size(1:j)
|
||||
42 format(' Size = ',9x, a15)
|
||||
else
|
||||
write (*,44) n1
|
||||
44 format(' Size = ',12x, i12)
|
||||
endif
|
||||
else
|
||||
write (*, 4) n1,n2,n3
|
||||
4 format(' Size = ',9x, i4,'x',i4,'x',i4)
|
||||
endif
|
||||
|
||||
write (*, 5) niter
|
||||
5 format(' Iterations = ', 12x, i12)
|
||||
|
||||
write (*, 6) t
|
||||
6 format(' Time in seconds = ',12x, f12.2)
|
||||
|
||||
write (*,9) mops
|
||||
9 format(' Mop/s total = ',12x, f12.2)
|
||||
|
||||
write(*, 11) optype
|
||||
11 format(' Operation type = ', a24)
|
||||
|
||||
if (verified) then
|
||||
write(*,12) ' SUCCESSFUL'
|
||||
else
|
||||
write(*,12) 'UNSUCCESSFUL'
|
||||
endif
|
||||
12 format(' Verification = ', 12x, a)
|
||||
|
||||
write(*,13) npbversion
|
||||
13 format(' Version = ', 12x, a12)
|
||||
|
||||
write(*,14) compiletime
|
||||
14 format(' Compile date = ', 12x, a12)
|
||||
|
||||
|
||||
write (*,121) cs1
|
||||
121 format(/, ' Compile options:', /,
|
||||
> ' F77 = ', A)
|
||||
|
||||
write (*,122) cs2
|
||||
122 format(' FLINK = ', A)
|
||||
|
||||
write (*,123) cs3
|
||||
123 format(' F_LIB = ', A)
|
||||
|
||||
write (*,124) cs4
|
||||
124 format(' F_INC = ', A)
|
||||
|
||||
write (*,125) cs5
|
||||
125 format(' FFLAGS = ', A)
|
||||
|
||||
write (*,126) cs6
|
||||
126 format(' FLINKFLAGS = ', A)
|
||||
|
||||
write(*, 127) cs7
|
||||
127 format(' RAND = ', A)
|
||||
|
||||
write (*,130)
|
||||
130 format(//' Please send all errors/feedbacks to:'//
|
||||
> ' NPB Development Team'/
|
||||
> ' npb@nas.nasa.gov'//)
|
||||
c 130 format(//' Please send the results of this run to:'//
|
||||
c > ' NPB Development Team '/
|
||||
c > ' Internet: npb@nas.nasa.gov'/
|
||||
c > ' '/
|
||||
c > ' If email is not available, send this to:'//
|
||||
c > ' MS T27A-1'/
|
||||
c > ' NASA Ames Research Center'/
|
||||
c > ' Moffett Field, CA 94035-1000'//
|
||||
c > ' Fax: 650-604-3957'//)
|
||||
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
@@ -0,0 +1,115 @@
|
||||
|
||||
! *** generated by SAPFOR with version 1124 and build date: Apr 18 2019 20:56:38
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
subroutine read_input ()
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!--- applu.incl
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! npbparams.h defines parameters that depend on the class and
|
||||
! number of nodes
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! end of include file
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!--- applu.incl
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! npbparams.h defines parameters that depend on the class and
|
||||
! number of nodes
|
||||
!---------------------------------------------------------------------
|
||||
implicit none
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! end of include file
|
||||
include 'applu.incl'
|
||||
!---------------------------------------------------------------------
|
||||
integer fstatus
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! if input file does not exist, it uses defaults
|
||||
! ipr = 1 for detailed progress output
|
||||
! inorm = how often the norm is printed (once every inorm iterations)
|
||||
! itmax = number of pseudo time steps
|
||||
! dt = time step
|
||||
! omega 1 over-relaxation factor for SSOR
|
||||
! tolrsd = steady state residual tolerance levels
|
||||
! nx, ny, nz = number of grid points in x, y, z directions
|
||||
!---------------------------------------------------------------------
|
||||
write (unit = *,fmt = 1000)
|
||||
open (unit = 3,file = 'inputlu.data',status = 'old',access = 'sequ
|
||||
&ential',form = 'formatted',iostat = fstatus)
|
||||
if (fstatus .eq. 0) then
|
||||
write (unit = *,fmt = *) 'Reading from input file inputlu.data'
|
||||
read (unit = 3,fmt = *)
|
||||
read (unit = 3,fmt = *)
|
||||
read (unit = 3,fmt = *) ipr,inorm
|
||||
read (unit = 3,fmt = *)
|
||||
read (unit = 3,fmt = *)
|
||||
read (unit = 3,fmt = *) itmax
|
||||
read (unit = 3,fmt = *)
|
||||
read (unit = 3,fmt = *)
|
||||
read (unit = 3,fmt = *) dt
|
||||
read (unit = 3,fmt = *)
|
||||
read (unit = 3,fmt = *)
|
||||
read (unit = 3,fmt = *) omega
|
||||
read (unit = 3,fmt = *)
|
||||
read (unit = 3,fmt = *)
|
||||
read (unit = 3,fmt = *) tolrsd(1),tolrsd(2),tolrsd(3),tolrsd(4)
|
||||
&,tolrsd(5)
|
||||
read (unit = 3,fmt = *)
|
||||
read (unit = 3,fmt = *)
|
||||
read (unit = 3,fmt = *) nx0,ny0,nz0
|
||||
close (unit = 3)
|
||||
else
|
||||
ipr = ipr_default
|
||||
inorm = inorm_default
|
||||
itmax = itmax_default
|
||||
dt = dt_default
|
||||
omega = omega_default
|
||||
tolrsd(1) = tolrsd1_def
|
||||
tolrsd(2) = tolrsd2_def
|
||||
tolrsd(3) = tolrsd3_def
|
||||
tolrsd(4) = tolrsd4_def
|
||||
tolrsd(5) = tolrsd5_def
|
||||
nx0 = isiz1
|
||||
ny0 = isiz2
|
||||
nz0 = isiz3
|
||||
endif
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! check problem size
|
||||
!---------------------------------------------------------------------
|
||||
if (nx0 .lt. 4 .or. ny0 .lt. 4 .or. nz0 .lt. 4) then
|
||||
write (unit = *,fmt = 2001)
|
||||
2001 format (5x,'PROBLEM SIZE IS TOO SMALL - ', /
|
||||
&5x,'SET EACH OF NX, NY AND NZ AT LEAST EQUAL TO 5')
|
||||
stop
|
||||
endif
|
||||
if (nx0 .gt. isiz1 .or. ny0 .gt. isiz2 .or. nz0 .gt. isiz3) then
|
||||
write (unit = *,fmt = 2002)
|
||||
2002 format (5x,'PROBLEM SIZE IS TOO LARGE - ', /
|
||||
&5x,'NX, NY AND NZ SHOULD BE EQUAL TO ', /5x,'ISIZ1, ISIZ
|
||||
&2 AND ISIZ3 RESPECTIVELY')
|
||||
stop
|
||||
endif
|
||||
write (unit = *,fmt = 1001) nx0,ny0,nz0
|
||||
write (unit = *,fmt = 1002) itmax
|
||||
write (unit = *,fmt = *)
|
||||
1000 format(//,' NAS Parallel Benchmarks (NPB3.3-SER)', ' - LU
|
||||
& Benchmark', /)
|
||||
1001 format(' Size: ', i4, 'x', i4, 'x', i4)
|
||||
1002 format(' Iterations: ', i4)
|
||||
return
|
||||
end
|
||||
|
||||
@@ -0,0 +1,420 @@
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
subroutine rhs
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c compute the right hand sides
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
implicit none
|
||||
|
||||
include 'applu.incl'
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c local variables
|
||||
c---------------------------------------------------------------------
|
||||
integer i, j, k, m, p
|
||||
double precision q
|
||||
double precision tmp, utmp(6,isiz3), rtmp(5,isiz3)
|
||||
double precision u21, u31, u41
|
||||
double precision u21i, u31i, u41i, u51i
|
||||
double precision u21j, u31j, u41j, u51j
|
||||
double precision u21k, u31k, u41k, u51k
|
||||
double precision u21im1, u31im1, u41im1, u51im1
|
||||
double precision u21jm1, u31jm1, u41jm1, u51jm1
|
||||
double precision u21km1, u31km1, u41km1, u51km1
|
||||
double precision flu(5,-1:1)
|
||||
|
||||
|
||||
if (timeron) call timer_start(t_rhs)
|
||||
!DVM$ region
|
||||
!DVM$ PARALLEL (k,j,i) ON qs(i,j,k), PRIVATE (j,i,tmp,k,m)
|
||||
!DVM$&,SHADOW_COMPUTE,SHADOW_RENEW (u)
|
||||
do k = 1, nz
|
||||
do j = 1, ny
|
||||
do i = 1, nx
|
||||
do m = 1, 5
|
||||
rsd(m,i,j,k) = - frct(m,i,j,k)
|
||||
end do
|
||||
tmp = 1.0d+00 / u(1,i,j,k)
|
||||
qs(i,j,k) = 0.50d+00 * ( u(2,i,j,k) * u(2,i,j,k)
|
||||
> + u(3,i,j,k) * u(3,i,j,k)
|
||||
> + u(4,i,j,k) * u(4,i,j,k) )
|
||||
> * tmp
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
|
||||
! if (timeron) call timer_start(t_rhsx)
|
||||
!DVM$ PARALLEL (k,j,i) on rsd(*,i,j,k),
|
||||
!DVM$&PRIVATE(p, u21, q, m, tmp, u21i, u31i, u41i, u51i, u21im1,
|
||||
!DVM$&u31im1, u41im1, u51im1, u31, u21j, u31j, u41j, u51j, u21jm1,
|
||||
!DVM$&u41jm1, u51jm1, u41, u21k, u31k, u41k, u51k, u21km1, u31km1,
|
||||
!DVM$&u51km1, u31jm1,u41km1,flu), cuda_block (32,4)
|
||||
do k = 2, nz - 1
|
||||
do j = jst, jend
|
||||
do i = ist, iend
|
||||
do p = -1, 1, 2
|
||||
flu(1,p) = u(2,i+p,j,k)
|
||||
u21 = u(2,i+p,j,k) / u(1,i+p,j,k)
|
||||
|
||||
q = qs(i+p,j,k)
|
||||
|
||||
flu(2,p) = u(2,i+p,j,k) * u21 + c2 *
|
||||
> ( u(5,i+p,j,k) - q )
|
||||
flu(3,p) = u(3,i+p,j,k) * u21
|
||||
flu(4,p) = u(4,i+p,j,k) * u21
|
||||
flu(5,p) = ( c1 * u(5,i+p,j,k) - c2 * q ) * u21
|
||||
end do
|
||||
do m = 1, 5
|
||||
rsd(m,i,j,k) = rsd(m,i,j,k)
|
||||
> - tx2 * ( flu(m,1) - flu(m,-1) )
|
||||
end do
|
||||
|
||||
do p = 0, 1
|
||||
tmp = 1.0d+00/ u(1,i+p,j,k)
|
||||
|
||||
u21i = tmp * u(2,i+p,j,k)
|
||||
u31i = tmp * u(3,i+p,j,k)
|
||||
u41i = tmp * u(4,i+p,j,k)
|
||||
u51i = tmp * u(5,i+p,j,k)
|
||||
|
||||
tmp = 1.0d+00/ u(1,i-1+p,j,k)
|
||||
|
||||
u21im1 = tmp * u(2,i-1+p,j,k)
|
||||
u31im1 = tmp * u(3,i-1+p,j,k)
|
||||
u41im1 = tmp * u(4,i-1+p,j,k)
|
||||
u51im1 = tmp * u(5,i-1+p,j,k)
|
||||
|
||||
flu(2,p) = (4.0d+00/3.0d+00) * tx3 * (u21i-u21im1)
|
||||
flu(3,p) = tx3 * ( u31i - u31im1 )
|
||||
flu(4,p) = tx3 * ( u41i - u41im1 )
|
||||
flu(5,p) = 0.50d+00 * ( 1.0d+00 - c1*c5)
|
||||
> * tx3 * ( ( u21i **2 + u31i **2+u41i **2)
|
||||
> - ( u21im1**2 + u31im1**2+u41im1**2))
|
||||
> + (1.0d+00/6.0d+00)
|
||||
> * tx3 * ( u21i**2 - u21im1**2 )
|
||||
> + c1 * c5 * tx3 * ( u51i - u51im1 )
|
||||
enddo
|
||||
|
||||
rsd(1,i,j,k) = rsd(1,i,j,k)
|
||||
> + dx1 * tx1 * ( u(1,i-1,j,k)
|
||||
> - 2.0d+00 * u(1,i,j,k)
|
||||
> + u(1,i+1,j,k) )
|
||||
rsd(2,i,j,k) = rsd(2,i,j,k)
|
||||
> + tx3 * c3 * c4 * ( flu(2,1) - flu(2,0) )
|
||||
> + dx2 * tx1 * ( u(2,i-1,j,k)
|
||||
> - 2.0d+00 * u(2,i,j,k)
|
||||
> + u(2,i+1,j,k) )
|
||||
rsd(3,i,j,k) = rsd(3,i,j,k)
|
||||
> + tx3 * c3 * c4 * ( flu(3,1) - flu(3,0) )
|
||||
> + dx3 * tx1 * ( u(3,i-1,j,k)
|
||||
> - 2.0d+00 * u(3,i,j,k)
|
||||
> + u(3,i+1,j,k) )
|
||||
rsd(4,i,j,k) = rsd(4,i,j,k)
|
||||
> + tx3 * c3 * c4 * ( flu(4,1) - flu(4,0) )
|
||||
> + dx4 * tx1 * ( u(4,i-1,j,k)
|
||||
> - 2.0d+00 * u(4,i,j,k)
|
||||
> + u(4,i+1,j,k) )
|
||||
rsd(5,i,j,k) = rsd(5,i,j,k)
|
||||
> + tx3 * c3 * c4 * ( flu(5,1) - flu(5,0) )
|
||||
> + dx5 * tx1 * ( u(5,i-1,j,k)
|
||||
> - 2.0d+00 * u(5,i,j,k)
|
||||
> + u(5,i+1,j,k) )
|
||||
|
||||
|
||||
if (i .eq. 2)then
|
||||
do m = 1, 5
|
||||
rsd(m,2,j,k) = rsd(m,2,j,k)
|
||||
> - dssp * ( + 5.0d+00 * u(m,2,j,k)
|
||||
> - 4.0d+00 * u(m,3,j,k)
|
||||
> + u(m,4,j,k) )
|
||||
enddo
|
||||
else if (i .eq. 3)then
|
||||
do m = 1, 5
|
||||
rsd(m,3,j,k) = rsd(m,3,j,k)
|
||||
> - dssp * ( - 4.0d+00 * u(m,2,j,k)
|
||||
> + 6.0d+00 * u(m,3,j,k)
|
||||
> - 4.0d+00 * u(m,4,j,k)
|
||||
> + u(m,5,j,k) )
|
||||
enddo
|
||||
else if (i .eq. nx-2)then
|
||||
do m = 1, 5
|
||||
rsd(m,nx-2,j,k) = rsd(m,nx-2,j,k)
|
||||
> - dssp * ( u(m,nx-4,j,k)
|
||||
> - 4.0d+00 * u(m,nx-3,j,k)
|
||||
> + 6.0d+00 * u(m,nx-2,j,k)
|
||||
> - 4.0d+00 * u(m,nx-1,j,k) )
|
||||
enddo
|
||||
else if (i .eq. nx-1)then
|
||||
do m = 1, 5
|
||||
rsd(m,nx-1,j,k) = rsd(m,nx-1,j,k)
|
||||
> - dssp * ( u(m,nx-3,j,k)
|
||||
> - 4.0d+00 * u(m,nx-2,j,k)
|
||||
> + 5.0d+00 * u(m,nx-1,j,k) )
|
||||
enddo
|
||||
else
|
||||
do m = 1, 5
|
||||
rsd(m,i,j,k) = rsd(m,i,j,k)
|
||||
> - dssp * ( u(m,i-2,j,k)
|
||||
> - 4.0d+00 * u(m,i-1,j,k)
|
||||
> + 6.0d+00 * u(m,i,j,k)
|
||||
> - 4.0d+00 * u(m,i+1,j,k)
|
||||
> + u(m,i+2,j,k) )
|
||||
end do
|
||||
endif
|
||||
! end do
|
||||
! end do
|
||||
! end do
|
||||
! if (timeron) call timer_stop(t_rhsx)
|
||||
|
||||
! if (timeron) call timer_start(t_rhsy)
|
||||
! do k = 2, nz - 1
|
||||
! do j = jst, jend
|
||||
! do i = ist, iend
|
||||
do p = -1, 1, 2
|
||||
flu(1,p) = u(3,i,j+p,k)
|
||||
u31 = u(3,i,j+p,k) / u(1,i,j+p,k)
|
||||
|
||||
q = qs(i,j+p,k)
|
||||
|
||||
flu(2,p) = u(2,i,j+p,k) * u31
|
||||
flu(3,p) = u(3,i,j+p,k) * u31 + c2 * (u(5,i,j+p,k)-q)
|
||||
flu(4,p) = u(4,i,j+p,k) * u31
|
||||
flu(5,p) = ( c1 * u(5,i,j+p,k) - c2 * q ) * u31
|
||||
end do
|
||||
do m = 1, 5
|
||||
rsd(m,i,j,k) = rsd(m,i,j,k)
|
||||
> - ty2 * ( flu(m,1) - flu(m,-1) )
|
||||
end do
|
||||
|
||||
do p = 0, 1
|
||||
tmp = 1.0d+00/ u(1,i,j+p,k)
|
||||
|
||||
u21j = tmp * u(2,i,j+p,k)
|
||||
u31j = tmp * u(3,i,j+p,k)
|
||||
u41j = tmp * u(4,i,j+p,k)
|
||||
u51j = tmp * u(5,i,j+p,k)
|
||||
|
||||
tmp = 1.0d+00/ u(1,i,j-1+p,k)
|
||||
u21jm1 = tmp * u(2,i,j-1+p,k)
|
||||
u31jm1 = tmp * u(3,i,j-1+p,k)
|
||||
u41jm1 = tmp * u(4,i,j-1+p,k)
|
||||
u51jm1 = tmp * u(5,i,j-1+p,k)
|
||||
|
||||
flu(2,p) = ty3 * ( u21j - u21jm1 )
|
||||
flu(3,p) = (4.0d+00/3.0d+00) * ty3 * (u31j-u31jm1)
|
||||
flu(4,p) = ty3 * ( u41j - u41jm1 )
|
||||
flu(5,p) = 0.50d+00 * ( 1.0d+00 - c1*c5 )
|
||||
> * ty3 * ( ( u21j **2 + u31j **2 + u41j **2 )
|
||||
> - ( u21jm1**2 + u31jm1**2 + u41jm1**2 ) )
|
||||
> + (1.0d+00/6.0d+00)
|
||||
> * ty3 * ( u31j**2 - u31jm1**2 )
|
||||
> + c1 * c5 * ty3 * ( u51j - u51jm1 )
|
||||
enddo
|
||||
|
||||
rsd(1,i,j,k) = rsd(1,i,j,k)
|
||||
> + dy1 * ty1 * ( u(1,i,j-1,k)
|
||||
> - 2.0d+00 * u(1,i,j,k)
|
||||
> + u(1,i,j+1,k) )
|
||||
|
||||
rsd(2,i,j,k) = rsd(2,i,j,k)
|
||||
> + ty3 * c3 * c4 * ( flu(2,1) - flu(2,0) )
|
||||
> + dy2 * ty1 * ( u(2,i,j-1,k)
|
||||
> - 2.0d+00 * u(2,i,j,k)
|
||||
> + u(2,i,j+1,k) )
|
||||
|
||||
rsd(3,i,j,k) = rsd(3,i,j,k)
|
||||
> + ty3 * c3 * c4 * ( flu(3,1) - flu(3,0) )
|
||||
> + dy3 * ty1 * ( u(3,i,j-1,k)
|
||||
> - 2.0d+00 * u(3,i,j,k)
|
||||
> + u(3,i,j+1,k) )
|
||||
|
||||
rsd(4,i,j,k) = rsd(4,i,j,k)
|
||||
> + ty3 * c3 * c4 * ( flu(4,1) - flu(4,0) )
|
||||
> + dy4 * ty1 * ( u(4,i,j-1,k)
|
||||
> - 2.0d+00 * u(4,i,j,k)
|
||||
> + u(4,i,j+1,k) )
|
||||
|
||||
rsd(5,i,j,k) = rsd(5,i,j,k)
|
||||
> + ty3 * c3 * c4 * ( flu(5,1) - flu(5,0) )
|
||||
> + dy5 * ty1 * ( u(5,i,j-1,k)
|
||||
> - 2.0d+00 * u(5,i,j,k)
|
||||
> + u(5,i,j+1,k) )
|
||||
|
||||
|
||||
|
||||
if (j .eq. 2) then
|
||||
do m = 1, 5
|
||||
rsd(m,i,2,k) = rsd(m,i,2,k)
|
||||
> - dssp * ( + 5.0d+00 * u(m,i,2,k)
|
||||
> - 4.0d+00 * u(m,i,3,k)
|
||||
> + u(m,i,4,k) )
|
||||
enddo
|
||||
elseif (j .eq. 3) then
|
||||
do m = 1, 5
|
||||
rsd(m,i,3,k) = rsd(m,i,3,k)
|
||||
> - dssp * ( - 4.0d+00 * u(m,i,2,k)
|
||||
> + 6.0d+00 * u(m,i,3,k)
|
||||
> - 4.0d+00 * u(m,i,4,k)
|
||||
> + u(m,i,5,k) )
|
||||
end do
|
||||
elseif (j .eq. ny-2) then
|
||||
do m = 1, 5
|
||||
rsd(m,i,ny-2,k) = rsd(m,i,ny-2,k)
|
||||
> - dssp * ( u(m,i,ny-4,k)
|
||||
> - 4.0d+00 * u(m,i,ny-3,k)
|
||||
> + 6.0d+00 * u(m,i,ny-2,k)
|
||||
> - 4.0d+00 * u(m,i,ny-1,k) )
|
||||
enddo
|
||||
elseif (j .eq. ny-1) then
|
||||
do m = 1, 5
|
||||
rsd(m,i,ny-1,k) = rsd(m,i,ny-1,k)
|
||||
> - dssp * ( u(m,i,ny-3,k)
|
||||
> - 4.0d+00 * u(m,i,ny-2,k)
|
||||
> + 5.0d+00 * u(m,i,ny-1,k) )
|
||||
end do
|
||||
else
|
||||
do m = 1, 5
|
||||
rsd(m,i,j,k) = rsd(m,i,j,k)
|
||||
> - dssp * ( u(m,i,j-2,k)
|
||||
> - 4.0d+00 * u(m,i,j-1,k)
|
||||
> + 6.0d+00 * u(m,i,j,k)
|
||||
> - 4.0d+00 * u(m,i,j+1,k)
|
||||
> + u(m,i,j+2,k) )
|
||||
end do
|
||||
endif
|
||||
|
||||
! end do
|
||||
! end do
|
||||
! end do
|
||||
|
||||
! if (timeron) call timer_stop(t_rhsy)
|
||||
|
||||
! if (timeron) call timer_start(t_rhsz)
|
||||
! do k = 2, nz - 1
|
||||
! do j = jst, jend
|
||||
! do i = ist, iend
|
||||
do p=-1,1,2
|
||||
flu(1,p) = u(4,i,j,k+p)
|
||||
u41 = u(4,i,j,k+p) / u(1,i,j,k+p)
|
||||
|
||||
q = qs(i,j,k+p)
|
||||
|
||||
flu(2,p) = u(2,i,j,k+p) * u41
|
||||
flu(3,p) = u(3,i,j,k+p) * u41
|
||||
flu(4,p) = u(4,i,j,k+p) * u41 + c2 * (u(5,i,j,k+p)-q)
|
||||
flu(5,p) = ( c1 * u(5,i,j,k+p) - c2 * q ) * u41
|
||||
enddo
|
||||
|
||||
do m = 1, 5
|
||||
rsd(m,i,j,k) = rsd(m,i,j,k)
|
||||
> - tz2 * ( flu(m,1) - flu(m,-1) )
|
||||
end do
|
||||
|
||||
do p=0,1
|
||||
tmp = 1.0d+00/ u(1,i,j,k+p)
|
||||
|
||||
u21k = tmp * u(2,i,j,k+p)
|
||||
u31k = tmp * u(3,i,j,k+p)
|
||||
u41k = tmp * u(4,i,j,k+p)
|
||||
u51k = tmp * u(5,i,j,k+p)
|
||||
|
||||
tmp = 1.0d+00/ u(1,i,j,k-1+p)
|
||||
|
||||
u21km1 = tmp * u(2,i,j,k-1+p)
|
||||
u31km1 = tmp * u(3,i,j,k-1+p)
|
||||
u41km1 = tmp * u(4,i,j,k-1+p)
|
||||
u51km1 = tmp * u(5,i,j,k-1+p)
|
||||
|
||||
flu(2,p) = tz3 * ( u21k - u21km1 )
|
||||
flu(3,p) = tz3 * ( u31k - u31km1 )
|
||||
flu(4,p) = (4.0d+00/3.0d+00) * tz3 * (u41k-u41km1)
|
||||
flu(5,p) = 0.50d+00 * ( 1.0d+00 - c1*c5 )
|
||||
> * tz3 * ( ( u21k **2 + u31k **2 + u41k **2 )
|
||||
> - ( u21km1**2 + u31km1**2 + u41km1**2 ) )
|
||||
> + (1.0d+00/6.0d+00)
|
||||
> * tz3 * ( u41k**2 - u41km1**2 )
|
||||
> + c1 * c5 * tz3 * ( u51k - u51km1 )
|
||||
enddo
|
||||
rsd(1,i,j,k) = rsd(1,i,j,k)
|
||||
> + dz1 * tz1 * ( u(1,i,j,k-1)
|
||||
> - 2.0d+00 * u(1,i,j,k)
|
||||
> + u(1,i,j,k+1) )
|
||||
rsd(2,i,j,k) = rsd(2,i,j,k)
|
||||
> + tz3 * c3 * c4 * ( flu(2,1) - flu(2,0) )
|
||||
> + dz2 * tz1 * ( u(2,i,j,k-1)
|
||||
> - 2.0d+00 * u(2,i,j,k)
|
||||
> + u(2,i,j,k+1) )
|
||||
rsd(3,i,j,k) = rsd(3,i,j,k)
|
||||
> + tz3 * c3 * c4 * ( flu(3,1) - flu(3,0) )
|
||||
> + dz3 * tz1 * ( u(3,i,j,k-1)
|
||||
> - 2.0d+00 * u(3,i,j,k)
|
||||
> + u(3,i,j,k+1) )
|
||||
rsd(4,i,j,k) = rsd(4,i,j,k)
|
||||
> + tz3 * c3 * c4 * ( flu(4,1) - flu(4,0) )
|
||||
> + dz4 * tz1 * ( u(4,i,j,k-1)
|
||||
> - 2.0d+00 * u(4,i,j,k)
|
||||
> + u(4,i,j,k+1) )
|
||||
rsd(5,i,j,k) = rsd(5,i,j,k)
|
||||
> + tz3 * c3 * c4 * ( flu(5,1) - flu(5,0) )
|
||||
> + dz5 * tz1 * ( u(5,i,j,k-1)
|
||||
> - 2.0d+00 * u(5,i,j,k)
|
||||
> + u(5,i,j,k+1) )
|
||||
|
||||
|
||||
if (k .eq. 2) then
|
||||
do m = 1, 5
|
||||
rsd(m,i,j,2) = rsd(m,i,j,2)
|
||||
> - dssp * ( + 5.0d+00 * u(m,i,j,2)
|
||||
> - 4.0d+00 * u(m,i,j,3)
|
||||
> + u(m,i,j,4) )
|
||||
end do
|
||||
elseif (k .eq. 3) then
|
||||
do m = 1, 5
|
||||
rsd(m,i,j,3) = rsd(m,i,j,3)
|
||||
> - dssp * ( - 4.0d+00 * u(m,i,j,2)
|
||||
> + 6.0d+00 * u(m,i,j,3)
|
||||
> - 4.0d+00 * u(m,i,j,4)
|
||||
> + u(m,i,j,5) )
|
||||
end do
|
||||
elseif (k .eq. nz-2) then
|
||||
do m = 1, 5
|
||||
rsd(m,i,j,nz-2) = rsd(m,i,j,nz-2)
|
||||
> - dssp * ( u(m,i,j,nz-4)
|
||||
> - 4.0d+00 * u(m,i,j,nz-3)
|
||||
> + 6.0d+00 * u(m,i,j,nz-2)
|
||||
> - 4.0d+00 * u(m,i,j,nz-1) )
|
||||
end do
|
||||
elseif (k .eq. nz-1) then
|
||||
do m = 1, 5
|
||||
rsd(m,i,j,nz-1) = rsd(m,i,j,nz-1)
|
||||
> - dssp * ( u(m,i,j,nz-3)
|
||||
> - 4.0d+00 * u(m,i,j,nz-2)
|
||||
> + 5.0d+00 * u(m,i,j,nz-1) )
|
||||
end do
|
||||
else
|
||||
do m = 1, 5
|
||||
rsd(m,i,j,k) = rsd(m,i,j,k)
|
||||
> - dssp * ( u(m,i,j,k-2)
|
||||
> - 4.0d+00 * u(m,i,j,k-1)
|
||||
> + 6.0d+00 * u(m,i,j,k)
|
||||
> - 4.0d+00 * u(m,i,j,k+1)
|
||||
> + u(m,i,j,k+2) )
|
||||
end do
|
||||
endif
|
||||
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
!DVM$ end region
|
||||
! if (timeron) call timer_stop(t_rhsz)
|
||||
if (timeron) call timer_stop(t_rhs)
|
||||
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,536 @@
|
||||
|
||||
! *** generated by SAPFOR with version 1124 and build date: Apr 18 2019 20:56:38
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
subroutine rhs ()
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! compute the right hand sides
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! local variables
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!--- applu.incl
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! npbparams.h defines parameters that depend on the class and
|
||||
! number of nodes
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! end of include file
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!--- applu.incl
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! npbparams.h defines parameters that depend on the class and
|
||||
! number of nodes
|
||||
!---------------------------------------------------------------------
|
||||
implicit none
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! end of include file
|
||||
include 'applu.incl'
|
||||
!---------------------------------------------------------------------
|
||||
integer i,j,k,m
|
||||
|
||||
!DVM$ ALIGN flux_br3(iEX1,iEX2,iEX3,iEX4) WITH dvmh_temp0(iEX1,iEX3,iEX
|
||||
!DVM$&4,iEX2)
|
||||
!DVM$ DYNAMIC flux_br3
|
||||
double precision ,allocatable:: flux_br3(:,:,:,:)
|
||||
!DVM$ SHADOW flux_br3( 0:0,1:1,0:0,0:0 )
|
||||
|
||||
!DVM$ ALIGN flux_br2(iEX1,iEX2,iEX3,iEX4) WITH dvmh_temp0(iEX1,iEX3,iEX
|
||||
!DVM$&2,iEX4)
|
||||
!DVM$ DYNAMIC flux_br2
|
||||
double precision ,allocatable:: flux_br2(:,:,:,:)
|
||||
!DVM$ SHADOW flux_br2( 0:0,1:1,0:0,0:0 )
|
||||
|
||||
!DVM$ ALIGN flux_br1(iEX1,iEX2,iEX3,iEX4) WITH dvmh_temp0(iEX1,iEX2
|
||||
!DVM$&,iEX3,iEX4)
|
||||
!DVM$ DYNAMIC flux_br1
|
||||
double precision ,allocatable:: flux_br1(:,:,:,:)
|
||||
!DVM$ SHADOW flux_br1( 0:0,1:1,0:0,0:0 )
|
||||
double precision q
|
||||
double precision tmp,utmp(6,isiz3),rtmp(5,isiz3)
|
||||
|
||||
!DVM$ ALIGN rtmp_br1(iEX1,iEX2,iEX3,iEX4) WITH dvmh_temp0(iEX1,iEX3,iEX
|
||||
!DVM$&4,iEX2)
|
||||
!DVM$ DYNAMIC rtmp_br1
|
||||
double precision ,allocatable:: rtmp_br1(:,:,:,:)
|
||||
|
||||
!DVM$ ALIGN utmp_br1(iEX1,iEX2,iEX3,iEX4) WITH dvmh_temp0(iEX1,iEX3,iEX
|
||||
!DVM$&4,iEX2)
|
||||
!DVM$ DYNAMIC utmp_br1
|
||||
double precision ,allocatable:: utmp_br1(:,:,:,:)
|
||||
!DVM$ SHADOW utmp_br1( 0:0,2:2,0:0,0:0 )
|
||||
double precision u21,u31,u41
|
||||
double precision u21i,u31i,u41i,u51i
|
||||
double precision u21j,u31j,u41j,u51j
|
||||
double precision u21k,u31k,u41k,u51k
|
||||
double precision u21im1,u31im1,u41im1,u51im1
|
||||
double precision u21jm1,u31jm1,u41jm1,u51jm1
|
||||
double precision u21km1,u31km1,u41km1,u51km1
|
||||
|
||||
!DVM$ interval 11
|
||||
if (timeron) call timer_start(t_rhs)
|
||||
!DVM$ region
|
||||
!DVM$ PARALLEL (k,j,i) ON qs(i,j,k), PRIVATE (j,i,tmp,k,m)
|
||||
!DVM$&,SHADOW_COMPUTE,SHADOW_RENEW (u)
|
||||
do k = 1,nz
|
||||
do j = 1,ny
|
||||
do i = 1,nx
|
||||
tmp = 1.0d+00 / u(1,i,j,k)
|
||||
qs(i,j,k) = 0.50d+00 * (u(2,i,j,k) * u(2,i,j,k) + u(3,i,j
|
||||
&,k) * u(3,i,j,k) + u(4,i,j,k) * u(4,i,j,k)) * tmp
|
||||
tmp = 1.0d+00 / u(1,i,j,k)
|
||||
rho_i(i,j,k) = tmp
|
||||
do m = 1,5
|
||||
rsd(m,i,j,k) = (-(frct(m,i,j,k)))
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ end region
|
||||
allocate(flux_br1(5,isiz1,2:isiz2 - 1,2:isiz3 - 1))
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! xi-direction flux differences
|
||||
!---------------------------------------------------------------------
|
||||
!DVM$ region
|
||||
!DVM$ PARALLEL (k,j,i) ON flux_br1(*,i,j,k), PRIVATE (j,i,k,q,u21),
|
||||
!DVM$&SHADOW_COMPUTE
|
||||
do k = 2,nz - 1
|
||||
do j = jst,jend
|
||||
do i = 1,nx
|
||||
flux_br1(1,i,j,k) = u(2,i,j,k)
|
||||
u21 = u(2,i,j,k) * rho_i(i,j,k)
|
||||
q = qs(i,j,k)
|
||||
flux_br1(2,i,j,k) = u(2,i,j,k) * u21 + c2 * (u(5,i,j,k) -
|
||||
& q)
|
||||
flux_br1(3,i,j,k) = u(3,i,j,k) * u21
|
||||
flux_br1(4,i,j,k) = u(4,i,j,k) * u21
|
||||
flux_br1(5,i,j,k) = (c1 * u(5,i,j,k) - c2 * q) * u21
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ PARALLEL (k,j,i,m) ON rsd(m,i,j,k), PRIVATE (j,i,k,m)
|
||||
do k = 2,nz - 1
|
||||
do j = jst,jend
|
||||
do i = ist,iend
|
||||
do m = 1,5
|
||||
rsd(m,i,j,k) = rsd(m,i,j,k) - tx2 * (flux_br1(m,i + 1,
|
||||
&j,k) - flux_br1(m,i - 1,j,k))
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ PARALLEL (k,j,i) ON flux_br1(*,i,j,k), PRIVATE (j,i,tmp,k,u21i,u31
|
||||
!DVM$&i,u41im1,u51im1,u51i,u41i,u21im1,u31im1),SHADOW_COMPUTE
|
||||
do k = 2,nz - 1
|
||||
do j = jst,jend
|
||||
do i = ist,nx
|
||||
tmp = rho_i(i,j,k)
|
||||
u21i = tmp * u(2,i,j,k)
|
||||
u31i = tmp * u(3,i,j,k)
|
||||
u41i = tmp * u(4,i,j,k)
|
||||
u51i = tmp * u(5,i,j,k)
|
||||
tmp = rho_i(i - 1,j,k)
|
||||
u21im1 = tmp * u(2,i - 1,j,k)
|
||||
u31im1 = tmp * u(3,i - 1,j,k)
|
||||
u41im1 = tmp * u(4,i - 1,j,k)
|
||||
u51im1 = tmp * u(5,i - 1,j,k)
|
||||
flux_br1(2,i,j,k) = 4.0d+00 / 3.0d+00 * tx3 * (u21i - u21
|
||||
&im1)
|
||||
flux_br1(3,i,j,k) = tx3 * (u31i - u31im1)
|
||||
flux_br1(4,i,j,k) = tx3 * (u41i - u41im1)
|
||||
flux_br1(5,i,j,k) = 0.50d+00 * (1.0d+00 - c1 * c5) * tx3
|
||||
&* (u21i** 2 + u31i** 2 + u41i** 2 - (u21im1** 2 + u31im1** 2 + u41
|
||||
&im1** 2)) + 1.0d+00 / 6.0d+00 * tx3 * (u21i** 2 - u21im1** 2) + c1
|
||||
& * c5 * tx3 * (u51i - u51im1)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ PARALLEL (k,j,i) ON rsd(*,i,j,k), PRIVATE (j,i,k)
|
||||
do k = 2,nz - 1
|
||||
do j = jst,jend
|
||||
do i = ist,iend
|
||||
rsd(1,i,j,k) = rsd(1,i,j,k) + dx1 * tx1 * (u(1,i - 1,j,k)
|
||||
& - 2.0d+00 * u(1,i,j,k) + u(1,i + 1,j,k))
|
||||
rsd(2,i,j,k) = rsd(2,i,j,k) + tx3 * c3 * c4 * (flux_br1(2
|
||||
&,i + 1,j,k) - flux_br1(2,i,j,k)) + dx2 * tx1 * (u(2,i - 1,j,k) - 2
|
||||
&.0d+00 * u(2,i,j,k) + u(2,i + 1,j,k))
|
||||
rsd(3,i,j,k) = rsd(3,i,j,k) + tx3 * c3 * c4 * (flux_br1(3
|
||||
&,i + 1,j,k) - flux_br1(3,i,j,k)) + dx3 * tx1 * (u(3,i - 1,j,k) - 2
|
||||
&.0d+00 * u(3,i,j,k) + u(3,i + 1,j,k))
|
||||
rsd(4,i,j,k) = rsd(4,i,j,k) + tx3 * c3 * c4 * (flux_br1(4
|
||||
&,i + 1,j,k) - flux_br1(4,i,j,k)) + dx4 * tx1 * (u(4,i - 1,j,k) - 2
|
||||
&.0d+00 * u(4,i,j,k) + u(4,i + 1,j,k))
|
||||
rsd(5,i,j,k) = rsd(5,i,j,k) + tx3 * c3 * c4 * (flux_br1(5
|
||||
&,i + 1,j,k) - flux_br1(5,i,j,k)) + dx5 * tx1 * (u(5,i - 1,j,k) - 2
|
||||
&.0d+00 * u(5,i,j,k) + u(5,i + 1,j,k))
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Fourth-order dissipation
|
||||
!---------------------------------------------------------------------
|
||||
!DVM$ PARALLEL (k,j,m) ON rsd(m,2,j,k), PRIVATE (j,k,m)
|
||||
do k = 2,nz - 1
|
||||
do j = jst,jend
|
||||
do m = 1,5
|
||||
rsd(m,2,j,k) = rsd(m,2,j,k) - dssp * ((+(5.0d+00)) * u(m,
|
||||
&2,j,k) - 4.0d+00 * u(m,3,j,k) + u(m,4,j,k))
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ PARALLEL (k,j,m) ON rsd(m,3,j,k), PRIVATE (j,k,m)
|
||||
do k = 2,nz - 1
|
||||
do j = jst,jend
|
||||
do m = 1,5
|
||||
rsd(m,3,j,k) = rsd(m,3,j,k) - dssp * ((-(4.0d+00)) * u(m,
|
||||
&2,j,k) + 6.0d+00 * u(m,3,j,k) - 4.0d+00 * u(m,4,j,k) + u(m,5,j,k))
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ PARALLEL (k,j,i,m) ON rsd(m,i,j,k), PRIVATE (j,i,k,m)
|
||||
do k = 2,nz - 1
|
||||
do j = jst,jend
|
||||
do i = 4,nx - 3
|
||||
do m = 1,5
|
||||
rsd(m,i,j,k) = rsd(m,i,j,k) - dssp * (u(m,i - 2,j,k) -
|
||||
& 4.0d+00 * u(m,i - 1,j,k) + 6.0d+00 * u(m,i,j,k) - 4.0d+00 * u(m,i
|
||||
& + 1,j,k) + u(m,i + 2,j,k))
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ PARALLEL (k,j,m) ON rsd(m,nx + -2,j,k), PRIVATE (j,k,m)
|
||||
do k = 2,nz - 1
|
||||
do j = jst,jend
|
||||
do m = 1,5
|
||||
rsd(m,nx - 2,j,k) = rsd(m,nx - 2,j,k) - dssp * (u(m,nx -
|
||||
&4,j,k) - 4.0d+00 * u(m,nx - 3,j,k) + 6.0d+00 * u(m,nx - 2,j,k) - 4
|
||||
&.0d+00 * u(m,nx - 1,j,k))
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ PARALLEL (k,j,m) ON rsd(m,nx + -1,j,k), PRIVATE (j,k,m)
|
||||
do k = 2,nz - 1
|
||||
do j = jst,jend
|
||||
do m = 1,5
|
||||
rsd(m,nx - 1,j,k) = rsd(m,nx - 1,j,k) - dssp * (u(m,nx -
|
||||
&3,j,k) - 4.0d+00 * u(m,nx - 2,j,k) + 5.0d+00 * u(m,nx - 1,j,k))
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ end region
|
||||
deallocate(flux_br1)
|
||||
allocate(flux_br2(5,isiz1,2:isiz1 - 1,2:isiz3 - 1))
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! eta-direction flux differences
|
||||
!---------------------------------------------------------------------
|
||||
!DVM$ region
|
||||
!DVM$ PARALLEL (k,i,j) ON flux_br2(*,j,i,k), PRIVATE (j,i,k,q,u31),
|
||||
!DVM$& SHADOW_COMPUTE
|
||||
do k = 2,nz - 1
|
||||
do i = ist,iend
|
||||
do j = 1,ny
|
||||
flux_br2(1,j,i,k) = u(3,i,j,k)
|
||||
u31 = u(3,i,j,k) * rho_i(i,j,k)
|
||||
q = qs(i,j,k)
|
||||
flux_br2(2,j,i,k) = u(2,i,j,k) * u31
|
||||
flux_br2(3,j,i,k) = u(3,i,j,k) * u31 + c2 * (u(5,i,j,k) -
|
||||
& q)
|
||||
flux_br2(4,j,i,k) = u(4,i,j,k) * u31
|
||||
flux_br2(5,j,i,k) = (c1 * u(5,i,j,k) - c2 * q) * u31
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ PARALLEL (k,i,j,m) ON rsd(m,i,j,k), PRIVATE (j,i,k,m)
|
||||
do k = 2,nz - 1
|
||||
do i = ist,iend
|
||||
do j = jst,jend
|
||||
do m = 1,5
|
||||
rsd(m,i,j,k) = rsd(m,i,j,k) - ty2 * (flux_br2(m,j + 1,
|
||||
&i,k) - flux_br2(m,j - 1,i,k))
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ PARALLEL (k,i,j) ON flux_br2(*,j,i,k), PRIVATE (u51j,j,u31j,u41j,u
|
||||
!DVM$&51jm1,u21j,i,tmp,u41jm1,u21jm1,u31jm1,k),SHADOW_COMPUTE
|
||||
do k = 2,nz - 1
|
||||
do i = ist,iend
|
||||
do j = jst,ny
|
||||
tmp = rho_i(i,j,k)
|
||||
u21j = tmp * u(2,i,j,k)
|
||||
u31j = tmp * u(3,i,j,k)
|
||||
u41j = tmp * u(4,i,j,k)
|
||||
u51j = tmp * u(5,i,j,k)
|
||||
tmp = rho_i(i,j - 1,k)
|
||||
u21jm1 = tmp * u(2,i,j - 1,k)
|
||||
u31jm1 = tmp * u(3,i,j - 1,k)
|
||||
u41jm1 = tmp * u(4,i,j - 1,k)
|
||||
u51jm1 = tmp * u(5,i,j - 1,k)
|
||||
flux_br2(2,j,i,k) = ty3 * (u21j - u21jm1)
|
||||
flux_br2(3,j,i,k) = 4.0d+00 / 3.0d+00 * ty3 * (u31j - u31
|
||||
&jm1)
|
||||
flux_br2(4,j,i,k) = ty3 * (u41j - u41jm1)
|
||||
flux_br2(5,j,i,k) = 0.50d+00 * (1.0d+00 - c1 * c5) * ty3
|
||||
&* (u21j** 2 + u31j** 2 + u41j** 2 - (u21jm1** 2 + u31jm1** 2 + u41
|
||||
&jm1** 2)) + 1.0d+00 / 6.0d+00 * ty3 * (u31j** 2 - u31jm1** 2) + c1
|
||||
& * c5 * ty3 * (u51j - u51jm1)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ PARALLEL (k,i,j) ON rsd(*,i,j,k), PRIVATE (j,i,k)
|
||||
do k = 2,nz - 1
|
||||
do i = ist,iend
|
||||
do j = jst,jend
|
||||
rsd(1,i,j,k) = rsd(1,i,j,k) + dy1 * ty1 * (u(1,i,j - 1,k)
|
||||
& - 2.0d+00 * u(1,i,j,k) + u(1,i,j + 1,k))
|
||||
rsd(2,i,j,k) = rsd(2,i,j,k) + ty3 * c3 * c4 * (flux_br2(2
|
||||
&,j + 1,i,k) - flux_br2(2,j,i,k)) + dy2 * ty1 * (u(2,i,j - 1,k) - 2
|
||||
&.0d+00 * u(2,i,j,k) + u(2,i,j + 1,k))
|
||||
rsd(3,i,j,k) = rsd(3,i,j,k) + ty3 * c3 * c4 * (flux_br2(3
|
||||
&,j + 1,i,k) - flux_br2(3,j,i,k)) + dy3 * ty1 * (u(3,i,j - 1,k) - 2
|
||||
&.0d+00 * u(3,i,j,k) + u(3,i,j + 1,k))
|
||||
rsd(4,i,j,k) = rsd(4,i,j,k) + ty3 * c3 * c4 * (flux_br2(4
|
||||
&,j + 1,i,k) - flux_br2(4,j,i,k)) + dy4 * ty1 * (u(4,i,j - 1,k) - 2
|
||||
&.0d+00 * u(4,i,j,k) + u(4,i,j + 1,k))
|
||||
rsd(5,i,j,k) = rsd(5,i,j,k) + ty3 * c3 * c4 * (flux_br2(5
|
||||
&,j + 1,i,k) - flux_br2(5,j,i,k)) + dy5 * ty1 * (u(5,i,j - 1,k) - 2
|
||||
&.0d+00 * u(5,i,j,k) + u(5,i,j + 1,k))
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ PARALLEL (k,i,m) ON rsd(m,i,2,k), PRIVATE (i,k,m)
|
||||
do k = 2,nz - 1
|
||||
do i = ist,iend
|
||||
do m = 1,5
|
||||
rsd(m,i,2,k) = rsd(m,i,2,k) - dssp * ((+(5.0d+00)) * u(m,
|
||||
&i,2,k) - 4.0d+00 * u(m,i,3,k) + u(m,i,4,k))
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ PARALLEL (k,i,m) ON rsd(m,i,3,k), PRIVATE (i,k,m)
|
||||
do k = 2,nz - 1
|
||||
do i = ist,iend
|
||||
do m = 1,5
|
||||
rsd(m,i,3,k) = rsd(m,i,3,k) - dssp * ((-(4.0d+00)) * u(m,
|
||||
&i,2,k) + 6.0d+00 * u(m,i,3,k) - 4.0d+00 * u(m,i,4,k) + u(m,i,5,k))
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ PARALLEL (k,i,j,m) ON rsd(m,i,j,k), PRIVATE (j,i,k,m)
|
||||
do k = 2,nz - 1
|
||||
do i = ist,iend
|
||||
do j = 4,ny - 3
|
||||
do m = 1,5
|
||||
rsd(m,i,j,k) = rsd(m,i,j,k) - dssp * (u(m,i,j - 2,k) -
|
||||
& 4.0d+00 * u(m,i,j - 1,k) + 6.0d+00 * u(m,i,j,k) - 4.0d+00 * u(m,i
|
||||
&,j + 1,k) + u(m,i,j + 2,k))
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ PARALLEL (k,i,m) ON rsd(m,i,ny + -2,k), PRIVATE (i,k,m)
|
||||
do k = 2,nz - 1
|
||||
do i = ist,iend
|
||||
do m = 1,5
|
||||
rsd(m,i,ny - 2,k) = rsd(m,i,ny - 2,k) - dssp * (u(m,i,ny
|
||||
&- 4,k) - 4.0d+00 * u(m,i,ny - 3,k) + 6.0d+00 * u(m,i,ny - 2,k) - 4
|
||||
&.0d+00 * u(m,i,ny - 1,k))
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ PARALLEL (k,i,m) ON rsd(m,i,ny + -1,k), PRIVATE (i,k,m)
|
||||
do k = 2,nz - 1
|
||||
do i = ist,iend
|
||||
do m = 1,5
|
||||
rsd(m,i,ny - 1,k) = rsd(m,i,ny - 1,k) - dssp * (u(m,i,ny
|
||||
&- 3,k) - 4.0d+00 * u(m,i,ny - 2,k) + 5.0d+00 * u(m,i,ny - 1,k))
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ end region
|
||||
deallocate(flux_br2)
|
||||
allocate(utmp_br1(6,isiz3,2:isiz1 - 1,2:isiz2 - 1))
|
||||
allocate(rtmp_br1(5,isiz3,2:isiz1 - 1,2:isiz2 - 1))
|
||||
allocate(flux_br3(5,isiz1,2:isiz1 - 1,2:isiz2 - 1))
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! zeta-direction flux differences
|
||||
!---------------------------------------------------------------------
|
||||
!DVM$ region
|
||||
!DVM$ PARALLEL (j,i,k) ON utmp_br1(*,k,i,j), PRIVATE (j,i,k),
|
||||
!DVM$& SHADOW_COMPUTE
|
||||
do j = jst,jend
|
||||
do i = ist,iend
|
||||
do k = 1,nz
|
||||
utmp_br1(1,k,i,j) = u(1,i,j,k)
|
||||
utmp_br1(2,k,i,j) = u(2,i,j,k)
|
||||
utmp_br1(3,k,i,j) = u(3,i,j,k)
|
||||
utmp_br1(4,k,i,j) = u(4,i,j,k)
|
||||
utmp_br1(5,k,i,j) = u(5,i,j,k)
|
||||
utmp_br1(6,k,i,j) = rho_i(i,j,k)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ PARALLEL (j,i,k) ON flux_br3(*,k,i,j), PRIVATE (j,i,k,q,u41),
|
||||
!DVM$& SHADOW_COMPUTE
|
||||
do j = jst,jend
|
||||
do i = ist,iend
|
||||
do k = 1,nz
|
||||
flux_br3(1,k,i,j) = utmp_br1(4,k,i,j)
|
||||
u41 = utmp_br1(4,k,i,j) * utmp_br1(6,k,i,j)
|
||||
q = qs(i,j,k)
|
||||
flux_br3(2,k,i,j) = utmp_br1(2,k,i,j) * u41
|
||||
flux_br3(3,k,i,j) = utmp_br1(3,k,i,j) * u41
|
||||
flux_br3(4,k,i,j) = utmp_br1(4,k,i,j) * u41 + c2 * (utmp_
|
||||
&br1(5,k,i,j) - q)
|
||||
flux_br3(5,k,i,j) = (c1 * utmp_br1(5,k,i,j) - c2 * q) * u
|
||||
&41
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ PARALLEL (j,i,k,m) ON rtmp_br1(m,k,i,j), PRIVATE (j,i,k,m)
|
||||
do j = jst,jend
|
||||
do i = ist,iend
|
||||
do k = 2,nz - 1
|
||||
do m = 1,5
|
||||
rtmp_br1(m,k,i,j) = rsd(m,i,j,k) - tz2 * (flux_br3(m,k
|
||||
& + 1,i,j) - flux_br3(m,k - 1,i,j))
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ PARALLEL (j,i,k) ON flux_br3(*,k,i,j), PRIVATE (j,i,tmp,k,u21km1,u
|
||||
!DVM$&41km1,u51k,u51km1,u31km1,u31k,u41k,u21k),SHADOW_COMPUTE
|
||||
do j = jst,jend
|
||||
do i = ist,iend
|
||||
do k = 2,nz
|
||||
tmp = utmp_br1(6,k,i,j)
|
||||
u21k = tmp * utmp_br1(2,k,i,j)
|
||||
u31k = tmp * utmp_br1(3,k,i,j)
|
||||
u41k = tmp * utmp_br1(4,k,i,j)
|
||||
u51k = tmp * utmp_br1(5,k,i,j)
|
||||
tmp = utmp_br1(6,k - 1,i,j)
|
||||
u21km1 = tmp * utmp_br1(2,k - 1,i,j)
|
||||
u31km1 = tmp * utmp_br1(3,k - 1,i,j)
|
||||
u41km1 = tmp * utmp_br1(4,k - 1,i,j)
|
||||
u51km1 = tmp * utmp_br1(5,k - 1,i,j)
|
||||
flux_br3(2,k,i,j) = tz3 * (u21k - u21km1)
|
||||
flux_br3(3,k,i,j) = tz3 * (u31k - u31km1)
|
||||
flux_br3(4,k,i,j) = 4.0d+00 / 3.0d+00 * tz3 * (u41k - u41
|
||||
&km1)
|
||||
flux_br3(5,k,i,j) = 0.50d+00 * (1.0d+00 - c1 * c5) * tz3
|
||||
&* (u21k** 2 + u31k** 2 + u41k** 2 - (u21km1** 2 + u31km1** 2 + u41
|
||||
&km1** 2)) + 1.0d+00 / 6.0d+00 * tz3 * (u41k** 2 - u41km1** 2) + c1
|
||||
& * c5 * tz3 * (u51k - u51km1)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ PARALLEL (j,i,k) ON rtmp_br1(*,k,i,j), PRIVATE (j,i,k)
|
||||
do j = jst,jend
|
||||
do i = ist,iend
|
||||
do k = 2,nz - 1
|
||||
rtmp_br1(1,k,i,j) = rtmp_br1(1,k,i,j) + dz1 * tz1 * (utmp
|
||||
&_br1(1,k - 1,i,j) - 2.0d+00 * utmp_br1(1,k,i,j) + utmp_br1(1,k + 1
|
||||
&,i,j))
|
||||
rtmp_br1(2,k,i,j) = rtmp_br1(2,k,i,j) + tz3 * c3 * c4 * (
|
||||
&flux_br3(2,k + 1,i,j) - flux_br3(2,k,i,j)) + dz2 * tz1 * (utmp_br1
|
||||
&(2,k - 1,i,j) - 2.0d+00 * utmp_br1(2,k,i,j) + utmp_br1(2,k + 1,i,j
|
||||
&))
|
||||
rtmp_br1(3,k,i,j) = rtmp_br1(3,k,i,j) + tz3 * c3 * c4 * (
|
||||
&flux_br3(3,k + 1,i,j) - flux_br3(3,k,i,j)) + dz3 * tz1 * (utmp_br1
|
||||
&(3,k - 1,i,j) - 2.0d+00 * utmp_br1(3,k,i,j) + utmp_br1(3,k + 1,i,j
|
||||
&))
|
||||
rtmp_br1(4,k,i,j) = rtmp_br1(4,k,i,j) + tz3 * c3 * c4 * (
|
||||
&flux_br3(4,k + 1,i,j) - flux_br3(4,k,i,j)) + dz4 * tz1 * (utmp_br1
|
||||
&(4,k - 1,i,j) - 2.0d+00 * utmp_br1(4,k,i,j) + utmp_br1(4,k + 1,i,j
|
||||
&))
|
||||
rtmp_br1(5,k,i,j) = rtmp_br1(5,k,i,j) + tz3 * c3 * c4 * (
|
||||
&flux_br3(5,k + 1,i,j) - flux_br3(5,k,i,j)) + dz5 * tz1 * (utmp_br1
|
||||
&(5,k - 1,i,j) - 2.0d+00 * utmp_br1(5,k,i,j) + utmp_br1(5,k + 1,i,j
|
||||
&))
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! fourth-order dissipation
|
||||
!---------------------------------------------------------------------
|
||||
!DVM$ PARALLEL (j,i,m) ON rsd(m,i,j,2), PRIVATE (j,i,m)
|
||||
do j = jst,jend
|
||||
do i = ist,iend
|
||||
do m = 1,5
|
||||
rsd(m,i,j,2) = rtmp_br1(m,2,i,j) - dssp * ((+(5.0d+00)) *
|
||||
& utmp_br1(m,2,i,j) - 4.0d+00 * utmp_br1(m,3,i,j) + utmp_br1(m,4,i,
|
||||
&j))
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ PARALLEL (j,i,m) ON rsd(m,i,j,3), PRIVATE (j,i,m)
|
||||
do j = jst,jend
|
||||
do i = ist,iend
|
||||
do m = 1,5
|
||||
rsd(m,i,j,3) = rtmp_br1(m,3,i,j) - dssp * ((-(4.0d+00)) *
|
||||
& utmp_br1(m,2,i,j) + 6.0d+00 * utmp_br1(m,3,i,j) - 4.0d+00 * utmp_
|
||||
&br1(m,4,i,j) + utmp_br1(m,5,i,j))
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ PARALLEL (j,i,k,m) ON rsd(m,i,j,k), PRIVATE (j,i,k,m)
|
||||
do j = jst,jend
|
||||
do i = ist,iend
|
||||
do k = 4,nz - 3
|
||||
do m = 1,5
|
||||
rsd(m,i,j,k) = rtmp_br1(m,k,i,j) - dssp * (utmp_br1(m,
|
||||
&k - 2,i,j) - 4.0d+00 * utmp_br1(m,k - 1,i,j) + 6.0d+00 * utmp_br1(
|
||||
&m,k,i,j) - 4.0d+00 * utmp_br1(m,k + 1,i,j) + utmp_br1(m,k + 2,i,j)
|
||||
&)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ PARALLEL (j,i,m) ON rsd(m,i,j,nz + -2), PRIVATE (j,i,m)
|
||||
do j = jst,jend
|
||||
do i = ist,iend
|
||||
do m = 1,5
|
||||
rsd(m,i,j,nz - 2) = rtmp_br1(m,nz - 2,i,j) - dssp * (utmp
|
||||
&_br1(m,nz - 4,i,j) - 4.0d+00 * utmp_br1(m,nz - 3,i,j) + 6.0d+00 *
|
||||
&utmp_br1(m,nz - 2,i,j) - 4.0d+00 * utmp_br1(m,nz - 1,i,j))
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ PARALLEL (j,i,m) ON rsd(m,i,j,nz + -1), PRIVATE (j,i,m)
|
||||
do j = jst,jend
|
||||
do i = ist,iend
|
||||
do m = 1,5
|
||||
rsd(m,i,j,nz - 1) = rtmp_br1(m,nz - 1,i,j) - dssp * (utmp
|
||||
&_br1(m,nz - 3,i,j) - 4.0d+00 * utmp_br1(m,nz - 2,i,j) + 5.0d+00 *
|
||||
&utmp_br1(m,nz - 1,i,j))
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ end region
|
||||
deallocate(flux_br3)
|
||||
deallocate(rtmp_br1)
|
||||
deallocate(utmp_br1)
|
||||
if (timeron) call timer_stop(t_rhs)
|
||||
!DVM$ end interval
|
||||
return
|
||||
end
|
||||
|
||||
@@ -0,0 +1,415 @@
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
subroutine rhs
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c compute the right hand sides
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
implicit none
|
||||
|
||||
include 'applu.incl'
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c local variables
|
||||
c---------------------------------------------------------------------
|
||||
integer i, j, k, m, p
|
||||
double precision q
|
||||
double precision tmp, utmp(6,isiz3), rtmp(5,isiz3)
|
||||
double precision u21, u31, u41
|
||||
double precision u21i, u31i, u41i, u51i
|
||||
double precision u21j, u31j, u41j, u51j
|
||||
double precision u21k, u31k, u41k, u51k
|
||||
double precision u21im1, u31im1, u41im1, u51im1
|
||||
double precision u21jm1, u31jm1, u41jm1, u51jm1
|
||||
double precision u21km1, u31km1, u41km1, u51km1
|
||||
double precision flu(5,-1:1)
|
||||
|
||||
|
||||
if (timeron) call timer_start(t_rhs)
|
||||
!DVM$ PARALLEL (k,j,i) ON qs(i,j,k), PRIVATE (j,i,tmp,k,m)
|
||||
!DVM$&,SHADOW_COMPUTE,SHADOW_RENEW (u)
|
||||
do k = 1, nz
|
||||
do j = 1, ny
|
||||
do i = 1, nx
|
||||
do m = 1, 5
|
||||
rsd(m,i,j,k) = - frct(m,i,j,k)
|
||||
end do
|
||||
tmp = 1.0d+00 / u(1,i,j,k)
|
||||
rho_i(i,j,k) = tmp
|
||||
qs(i,j,k) = 0.50d+00 * ( u(2,i,j,k) * u(2,i,j,k)
|
||||
> + u(3,i,j,k) * u(3,i,j,k)
|
||||
> + u(4,i,j,k) * u(4,i,j,k) )
|
||||
> * tmp
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
|
||||
! if (timeron) call timer_start(t_rhsx)
|
||||
!DVM$ PARALLEL (k,j,i) on rsd(*,i,j,k)
|
||||
do k = 2, nz - 1
|
||||
do j = jst, jend
|
||||
do i = ist, iend
|
||||
do p = -1, 1, 2
|
||||
flu(1,p) = u(2,i+p,j,k)
|
||||
u21 = u(2,i+p,j,k) * rho_i(i+p,j,k)
|
||||
|
||||
q = qs(i+p,j,k)
|
||||
|
||||
flu(2,p) = u(2,i+p,j,k) * u21 + c2 *
|
||||
> ( u(5,i+p,j,k) - q )
|
||||
flu(3,p) = u(3,i+p,j,k) * u21
|
||||
flu(4,p) = u(4,i+p,j,k) * u21
|
||||
flu(5,p) = ( c1 * u(5,i+p,j,k) - c2 * q ) * u21
|
||||
end do
|
||||
do m = 1, 5
|
||||
rsd(m,i,j,k) = rsd(m,i,j,k)
|
||||
> - tx2 * ( flu(m,1) - flu(m,-1) )
|
||||
end do
|
||||
|
||||
do p = 0, 1
|
||||
tmp = rho_i(i+p,j,k)
|
||||
|
||||
u21i = tmp * u(2,i+p,j,k)
|
||||
u31i = tmp * u(3,i+p,j,k)
|
||||
u41i = tmp * u(4,i+p,j,k)
|
||||
u51i = tmp * u(5,i+p,j,k)
|
||||
|
||||
tmp = rho_i(i-1+p,j,k)
|
||||
|
||||
u21im1 = tmp * u(2,i-1+p,j,k)
|
||||
u31im1 = tmp * u(3,i-1+p,j,k)
|
||||
u41im1 = tmp * u(4,i-1+p,j,k)
|
||||
u51im1 = tmp * u(5,i-1+p,j,k)
|
||||
|
||||
flu(2,p) = (4.0d+00/3.0d+00) * tx3 * (u21i-u21im1)
|
||||
flu(3,p) = tx3 * ( u31i - u31im1 )
|
||||
flu(4,p) = tx3 * ( u41i - u41im1 )
|
||||
flu(5,p) = 0.50d+00 * ( 1.0d+00 - c1*c5)
|
||||
> * tx3 * ( ( u21i **2 + u31i **2+u41i **2)
|
||||
> - ( u21im1**2 + u31im1**2+u41im1**2))
|
||||
> + (1.0d+00/6.0d+00)
|
||||
> * tx3 * ( u21i**2 - u21im1**2 )
|
||||
> + c1 * c5 * tx3 * ( u51i - u51im1 )
|
||||
enddo
|
||||
|
||||
rsd(1,i,j,k) = rsd(1,i,j,k)
|
||||
> + dx1 * tx1 * ( u(1,i-1,j,k)
|
||||
> - 2.0d+00 * u(1,i,j,k)
|
||||
> + u(1,i+1,j,k) )
|
||||
rsd(2,i,j,k) = rsd(2,i,j,k)
|
||||
> + tx3 * c3 * c4 * ( flu(2,1) - flu(2,0) )
|
||||
> + dx2 * tx1 * ( u(2,i-1,j,k)
|
||||
> - 2.0d+00 * u(2,i,j,k)
|
||||
> + u(2,i+1,j,k) )
|
||||
rsd(3,i,j,k) = rsd(3,i,j,k)
|
||||
> + tx3 * c3 * c4 * ( flu(3,1) - flu(3,0) )
|
||||
> + dx3 * tx1 * ( u(3,i-1,j,k)
|
||||
> - 2.0d+00 * u(3,i,j,k)
|
||||
> + u(3,i+1,j,k) )
|
||||
rsd(4,i,j,k) = rsd(4,i,j,k)
|
||||
> + tx3 * c3 * c4 * ( flu(4,1) - flu(4,0) )
|
||||
> + dx4 * tx1 * ( u(4,i-1,j,k)
|
||||
> - 2.0d+00 * u(4,i,j,k)
|
||||
> + u(4,i+1,j,k) )
|
||||
rsd(5,i,j,k) = rsd(5,i,j,k)
|
||||
> + tx3 * c3 * c4 * ( flu(5,1) - flu(5,0) )
|
||||
> + dx5 * tx1 * ( u(5,i-1,j,k)
|
||||
> - 2.0d+00 * u(5,i,j,k)
|
||||
> + u(5,i+1,j,k) )
|
||||
|
||||
|
||||
if (i .eq. 2)then
|
||||
do m = 1, 5
|
||||
rsd(m,2,j,k) = rsd(m,2,j,k)
|
||||
> - dssp * ( + 5.0d+00 * u(m,2,j,k)
|
||||
> - 4.0d+00 * u(m,3,j,k)
|
||||
> + u(m,4,j,k) )
|
||||
enddo
|
||||
else if (i .eq. 3)then
|
||||
do m = 1, 5
|
||||
rsd(m,3,j,k) = rsd(m,3,j,k)
|
||||
> - dssp * ( - 4.0d+00 * u(m,2,j,k)
|
||||
> + 6.0d+00 * u(m,3,j,k)
|
||||
> - 4.0d+00 * u(m,4,j,k)
|
||||
> + u(m,5,j,k) )
|
||||
enddo
|
||||
else if (i .eq. nx-2)then
|
||||
do m = 1, 5
|
||||
rsd(m,nx-2,j,k) = rsd(m,nx-2,j,k)
|
||||
> - dssp * ( u(m,nx-4,j,k)
|
||||
> - 4.0d+00 * u(m,nx-3,j,k)
|
||||
> + 6.0d+00 * u(m,nx-2,j,k)
|
||||
> - 4.0d+00 * u(m,nx-1,j,k) )
|
||||
enddo
|
||||
else if (i .eq. nx-1)then
|
||||
do m = 1, 5
|
||||
rsd(m,nx-1,j,k) = rsd(m,nx-1,j,k)
|
||||
> - dssp * ( u(m,nx-3,j,k)
|
||||
> - 4.0d+00 * u(m,nx-2,j,k)
|
||||
> + 5.0d+00 * u(m,nx-1,j,k) )
|
||||
enddo
|
||||
else
|
||||
do m = 1, 5
|
||||
rsd(m,i,j,k) = rsd(m,i,j,k)
|
||||
> - dssp * ( u(m,i-2,j,k)
|
||||
> - 4.0d+00 * u(m,i-1,j,k)
|
||||
> + 6.0d+00 * u(m,i,j,k)
|
||||
> - 4.0d+00 * u(m,i+1,j,k)
|
||||
> + u(m,i+2,j,k) )
|
||||
end do
|
||||
endif
|
||||
! end do
|
||||
! end do
|
||||
! end do
|
||||
! if (timeron) call timer_stop(t_rhsx)
|
||||
|
||||
! if (timeron) call timer_start(t_rhsy)
|
||||
! do k = 2, nz - 1
|
||||
! do j = jst, jend
|
||||
! do i = ist, iend
|
||||
do p = -1, 1, 2
|
||||
flu(1,p) = u(3,i,j+p,k)
|
||||
u31 = u(3,i,j+p,k) * rho_i(i,j+p,k)
|
||||
|
||||
q = qs(i,j+p,k)
|
||||
|
||||
flu(2,p) = u(2,i,j+p,k) * u31
|
||||
flu(3,p) = u(3,i,j+p,k) * u31 + c2 * (u(5,i,j+p,k)-q)
|
||||
flu(4,p) = u(4,i,j+p,k) * u31
|
||||
flu(5,p) = ( c1 * u(5,i,j+p,k) - c2 * q ) * u31
|
||||
end do
|
||||
do m = 1, 5
|
||||
rsd(m,i,j,k) = rsd(m,i,j,k)
|
||||
> - ty2 * ( flu(m,1) - flu(m,-1) )
|
||||
end do
|
||||
|
||||
do p = 0, 1
|
||||
tmp = rho_i(i,j+p,k)
|
||||
|
||||
u21j = tmp * u(2,i,j+p,k)
|
||||
u31j = tmp * u(3,i,j+p,k)
|
||||
u41j = tmp * u(4,i,j+p,k)
|
||||
u51j = tmp * u(5,i,j+p,k)
|
||||
|
||||
tmp = rho_i(i,j-1+p,k)
|
||||
u21jm1 = tmp * u(2,i,j-1+p,k)
|
||||
u31jm1 = tmp * u(3,i,j-1+p,k)
|
||||
u41jm1 = tmp * u(4,i,j-1+p,k)
|
||||
u51jm1 = tmp * u(5,i,j-1+p,k)
|
||||
|
||||
flu(2,p) = ty3 * ( u21j - u21jm1 )
|
||||
flu(3,p) = (4.0d+00/3.0d+00) * ty3 * (u31j-u31jm1)
|
||||
flu(4,p) = ty3 * ( u41j - u41jm1 )
|
||||
flu(5,p) = 0.50d+00 * ( 1.0d+00 - c1*c5 )
|
||||
> * ty3 * ( ( u21j **2 + u31j **2 + u41j **2 )
|
||||
> - ( u21jm1**2 + u31jm1**2 + u41jm1**2 ) )
|
||||
> + (1.0d+00/6.0d+00)
|
||||
> * ty3 * ( u31j**2 - u31jm1**2 )
|
||||
> + c1 * c5 * ty3 * ( u51j - u51jm1 )
|
||||
enddo
|
||||
|
||||
rsd(1,i,j,k) = rsd(1,i,j,k)
|
||||
> + dy1 * ty1 * ( u(1,i,j-1,k)
|
||||
> - 2.0d+00 * u(1,i,j,k)
|
||||
> + u(1,i,j+1,k) )
|
||||
|
||||
rsd(2,i,j,k) = rsd(2,i,j,k)
|
||||
> + ty3 * c3 * c4 * ( flu(2,1) - flu(2,0) )
|
||||
> + dy2 * ty1 * ( u(2,i,j-1,k)
|
||||
> - 2.0d+00 * u(2,i,j,k)
|
||||
> + u(2,i,j+1,k) )
|
||||
|
||||
rsd(3,i,j,k) = rsd(3,i,j,k)
|
||||
> + ty3 * c3 * c4 * ( flu(3,1) - flu(3,0) )
|
||||
> + dy3 * ty1 * ( u(3,i,j-1,k)
|
||||
> - 2.0d+00 * u(3,i,j,k)
|
||||
> + u(3,i,j+1,k) )
|
||||
|
||||
rsd(4,i,j,k) = rsd(4,i,j,k)
|
||||
> + ty3 * c3 * c4 * ( flu(4,1) - flu(4,0) )
|
||||
> + dy4 * ty1 * ( u(4,i,j-1,k)
|
||||
> - 2.0d+00 * u(4,i,j,k)
|
||||
> + u(4,i,j+1,k) )
|
||||
|
||||
rsd(5,i,j,k) = rsd(5,i,j,k)
|
||||
> + ty3 * c3 * c4 * ( flu(5,1) - flu(5,0) )
|
||||
> + dy5 * ty1 * ( u(5,i,j-1,k)
|
||||
> - 2.0d+00 * u(5,i,j,k)
|
||||
> + u(5,i,j+1,k) )
|
||||
|
||||
|
||||
|
||||
if (j .eq. 2) then
|
||||
do m = 1, 5
|
||||
rsd(m,i,2,k) = rsd(m,i,2,k)
|
||||
> - dssp * ( + 5.0d+00 * u(m,i,2,k)
|
||||
> - 4.0d+00 * u(m,i,3,k)
|
||||
> + u(m,i,4,k) )
|
||||
enddo
|
||||
elseif (j .eq. 3) then
|
||||
do m = 1, 5
|
||||
rsd(m,i,3,k) = rsd(m,i,3,k)
|
||||
> - dssp * ( - 4.0d+00 * u(m,i,2,k)
|
||||
> + 6.0d+00 * u(m,i,3,k)
|
||||
> - 4.0d+00 * u(m,i,4,k)
|
||||
> + u(m,i,5,k) )
|
||||
end do
|
||||
elseif (j .eq. ny-2) then
|
||||
do m = 1, 5
|
||||
rsd(m,i,ny-2,k) = rsd(m,i,ny-2,k)
|
||||
> - dssp * ( u(m,i,ny-4,k)
|
||||
> - 4.0d+00 * u(m,i,ny-3,k)
|
||||
> + 6.0d+00 * u(m,i,ny-2,k)
|
||||
> - 4.0d+00 * u(m,i,ny-1,k) )
|
||||
enddo
|
||||
elseif (j .eq. ny-1) then
|
||||
do m = 1, 5
|
||||
rsd(m,i,ny-1,k) = rsd(m,i,ny-1,k)
|
||||
> - dssp * ( u(m,i,ny-3,k)
|
||||
> - 4.0d+00 * u(m,i,ny-2,k)
|
||||
> + 5.0d+00 * u(m,i,ny-1,k) )
|
||||
end do
|
||||
else
|
||||
do m = 1, 5
|
||||
rsd(m,i,j,k) = rsd(m,i,j,k)
|
||||
> - dssp * ( u(m,i,j-2,k)
|
||||
> - 4.0d+00 * u(m,i,j-1,k)
|
||||
> + 6.0d+00 * u(m,i,j,k)
|
||||
> - 4.0d+00 * u(m,i,j+1,k)
|
||||
> + u(m,i,j+2,k) )
|
||||
end do
|
||||
endif
|
||||
|
||||
! end do
|
||||
! end do
|
||||
! end do
|
||||
|
||||
! if (timeron) call timer_stop(t_rhsy)
|
||||
|
||||
! if (timeron) call timer_start(t_rhsz)
|
||||
! do k = 2, nz - 1
|
||||
! do j = jst, jend
|
||||
! do i = ist, iend
|
||||
do p=-1,1,2
|
||||
flu(1,p) = u(4,i,j,k+p)
|
||||
u41 = u(4,i,j,k+p) * rho_i(i,j,k+p)
|
||||
|
||||
q = qs(i,j,k+p)
|
||||
|
||||
flu(2,p) = u(2,i,j,k+p) * u41
|
||||
flu(3,p) = u(3,i,j,k+p) * u41
|
||||
flu(4,p) = u(4,i,j,k+p) * u41 + c2 * (u(5,i,j,k+p)-q)
|
||||
flu(5,p) = ( c1 * u(5,i,j,k+p) - c2 * q ) * u41
|
||||
enddo
|
||||
|
||||
do m = 1, 5
|
||||
rsd(m,i,j,k) = rsd(m,i,j,k)
|
||||
> - tz2 * ( flu(m,1) - flu(m,-1) )
|
||||
end do
|
||||
|
||||
do p=0,1
|
||||
tmp = rho_i(i,j,k+p)
|
||||
|
||||
u21k = tmp * u(2,i,j,k+p)
|
||||
u31k = tmp * u(3,i,j,k+p)
|
||||
u41k = tmp * u(4,i,j,k+p)
|
||||
u51k = tmp * u(5,i,j,k+p)
|
||||
|
||||
tmp = rho_i(i,j,k-1+p)
|
||||
|
||||
u21km1 = tmp * u(2,i,j,k-1+p)
|
||||
u31km1 = tmp * u(3,i,j,k-1+p)
|
||||
u41km1 = tmp * u(4,i,j,k-1+p)
|
||||
u51km1 = tmp * u(5,i,j,k-1+p)
|
||||
|
||||
flu(2,p) = tz3 * ( u21k - u21km1 )
|
||||
flu(3,p) = tz3 * ( u31k - u31km1 )
|
||||
flu(4,p) = (4.0d+00/3.0d+00) * tz3 * (u41k-u41km1)
|
||||
flu(5,p) = 0.50d+00 * ( 1.0d+00 - c1*c5 )
|
||||
> * tz3 * ( ( u21k **2 + u31k **2 + u41k **2 )
|
||||
> - ( u21km1**2 + u31km1**2 + u41km1**2 ) )
|
||||
> + (1.0d+00/6.0d+00)
|
||||
> * tz3 * ( u41k**2 - u41km1**2 )
|
||||
> + c1 * c5 * tz3 * ( u51k - u51km1 )
|
||||
enddo
|
||||
rsd(1,i,j,k) = rsd(1,i,j,k)
|
||||
> + dz1 * tz1 * ( u(1,i,j,k-1)
|
||||
> - 2.0d+00 * u(1,i,j,k)
|
||||
> + u(1,i,j,k+1) )
|
||||
rsd(2,i,j,k) = rsd(2,i,j,k)
|
||||
> + tz3 * c3 * c4 * ( flu(2,1) - flu(2,0) )
|
||||
> + dz2 * tz1 * ( u(2,i,j,k-1)
|
||||
> - 2.0d+00 * u(2,i,j,k)
|
||||
> + u(2,i,j,k+1) )
|
||||
rsd(3,i,j,k) = rsd(3,i,j,k)
|
||||
> + tz3 * c3 * c4 * ( flu(3,1) - flu(3,0) )
|
||||
> + dz3 * tz1 * ( u(3,i,j,k-1)
|
||||
> - 2.0d+00 * u(3,i,j,k)
|
||||
> + u(3,i,j,k+1) )
|
||||
rsd(4,i,j,k) = rsd(4,i,j,k)
|
||||
> + tz3 * c3 * c4 * ( flu(4,1) - flu(4,0) )
|
||||
> + dz4 * tz1 * ( u(4,i,j,k-1)
|
||||
> - 2.0d+00 * u(4,i,j,k)
|
||||
> + u(4,i,j,k+1) )
|
||||
rsd(5,i,j,k) = rsd(5,i,j,k)
|
||||
> + tz3 * c3 * c4 * ( flu(5,1) - flu(5,0) )
|
||||
> + dz5 * tz1 * ( u(5,i,j,k-1)
|
||||
> - 2.0d+00 * u(5,i,j,k)
|
||||
> + u(5,i,j,k+1) )
|
||||
|
||||
|
||||
if (k .eq. 2) then
|
||||
do m = 1, 5
|
||||
rsd(m,i,j,2) = rsd(m,i,j,2)
|
||||
> - dssp * ( + 5.0d+00 * u(m,i,j,2)
|
||||
> - 4.0d+00 * u(m,i,j,3)
|
||||
> + u(m,i,j,4) )
|
||||
end do
|
||||
elseif (k .eq. 3) then
|
||||
do m = 1, 5
|
||||
rsd(m,i,j,3) = rsd(m,i,j,3)
|
||||
> - dssp * ( - 4.0d+00 * u(m,i,j,2)
|
||||
> + 6.0d+00 * u(m,i,j,3)
|
||||
> - 4.0d+00 * u(m,i,j,4)
|
||||
> + u(m,i,j,5) )
|
||||
end do
|
||||
elseif (k .eq. nz-2) then
|
||||
do m = 1, 5
|
||||
rsd(m,i,j,nz-2) = rsd(m,i,j,nz-2)
|
||||
> - dssp * ( u(m,i,j,nz-4)
|
||||
> - 4.0d+00 * u(m,i,j,nz-3)
|
||||
> + 6.0d+00 * u(m,i,j,nz-2)
|
||||
> - 4.0d+00 * u(m,i,j,nz-1) )
|
||||
end do
|
||||
elseif (k .eq. nz-1) then
|
||||
do m = 1, 5
|
||||
rsd(m,i,j,nz-1) = rsd(m,i,j,nz-1)
|
||||
> - dssp * ( u(m,i,j,nz-3)
|
||||
> - 4.0d+00 * u(m,i,j,nz-2)
|
||||
> + 5.0d+00 * u(m,i,j,nz-1) )
|
||||
end do
|
||||
else
|
||||
do m = 1, 5
|
||||
rsd(m,i,j,k) = rsd(m,i,j,k)
|
||||
> - dssp * ( u(m,i,j,k-2)
|
||||
> - 4.0d+00 * u(m,i,j,k-1)
|
||||
> + 6.0d+00 * u(m,i,j,k)
|
||||
> - 4.0d+00 * u(m,i,j,k+1)
|
||||
> + u(m,i,j,k+2) )
|
||||
end do
|
||||
endif
|
||||
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
! if (timeron) call timer_stop(t_rhsz)
|
||||
if (timeron) call timer_stop(t_rhs)
|
||||
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,104 @@
|
||||
|
||||
! *** generated by SAPFOR with version 1124 and build date: Apr 18 2019 20:56:38
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
subroutine setbv ()
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! set the boundary values of dependent variables
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! local variables
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!--- applu.incl
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! npbparams.h defines parameters that depend on the class and
|
||||
! number of nodes
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! end of include file
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!--- applu.incl
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! npbparams.h defines parameters that depend on the class and
|
||||
! number of nodes
|
||||
!---------------------------------------------------------------------
|
||||
implicit none
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! end of include file
|
||||
include 'applu.incl'
|
||||
!---------------------------------------------------------------------
|
||||
integer i,j,k,m
|
||||
double precision temp1(5),temp2(5)
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! set the dependent variable values along the top and bottom faces
|
||||
!---------------------------------------------------------------------
|
||||
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0_r1(iEX0,iEX1,iEX2,iE
|
||||
!DVM$&X3) :: u
|
||||
!DVM$ PARALLEL (j,i) ON u(*,i,j,*), PRIVATE (m,i,j,temp2,temp1)
|
||||
do j = 1,ny
|
||||
do i = 1,nx
|
||||
call exact(i,j,1,temp1)
|
||||
call exact(i,j,nz,temp2)
|
||||
do m = 1,5
|
||||
u(m,i,j,1) = temp1(m)
|
||||
u(m,i,j,nz) = temp2(m)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! set the dependent variable values along north and south faces
|
||||
!---------------------------------------------------------------------
|
||||
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0(iEX0,iEX1,iEX2,iEX3)
|
||||
!DVM$& :: u
|
||||
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0_r2(iEX0,iEX1,iEX2,iE
|
||||
!DVM$&X3) :: u
|
||||
!DVM$ PARALLEL (k,i) ON u(*,i,*,k), PRIVATE (m,i,temp2,k,temp1)
|
||||
do k = 1,nz
|
||||
do i = 1,nx
|
||||
call exact(i,1,k,temp1)
|
||||
call exact(i,ny,k,temp2)
|
||||
do m = 1,5
|
||||
u(m,i,1,k) = temp1(m)
|
||||
u(m,i,ny,k) = temp2(m)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! set the dependent variable values along east and west faces
|
||||
!---------------------------------------------------------------------
|
||||
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0(iEX0,iEX1,iEX2,iEX3)
|
||||
!DVM$& :: u
|
||||
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0_r3(iEX0,iEX1,iEX2,iE
|
||||
!DVM$&X3) :: u
|
||||
!DVM$ PARALLEL (k,j) ON u(*,*,j,k), PRIVATE (m,j,temp2,k,temp1)
|
||||
do k = 1,nz
|
||||
do j = 1,ny
|
||||
call exact(1,j,k,temp1)
|
||||
call exact(nx,j,k,temp2)
|
||||
do m = 1,5
|
||||
u(m,1,j,k) = temp1(m)
|
||||
u(m,nx,j,k) = temp2(m)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0(iEX0,iEX1,iEX2,iEX3)
|
||||
!DVM$& :: u
|
||||
return
|
||||
end
|
||||
|
||||
@@ -0,0 +1,166 @@
|
||||
|
||||
! *** generated by SAPFOR with version 1124 and build date: Apr 18 2019 20:56:38
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
subroutine setcoeff ()
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! local variables
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! set up coefficients
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!--- applu.incl
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! npbparams.h defines parameters that depend on the class and
|
||||
! number of nodes
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! end of include file
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!--- applu.incl
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! npbparams.h defines parameters that depend on the class and
|
||||
! number of nodes
|
||||
!---------------------------------------------------------------------
|
||||
implicit none
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! end of include file
|
||||
include 'applu.incl'
|
||||
!---------------------------------------------------------------------
|
||||
dxi = 1.0d+00 / (nx0 - 1)
|
||||
deta = 1.0d+00 / (ny0 - 1)
|
||||
dzeta = 1.0d+00 / (nz0 - 1)
|
||||
tx1 = 1.0d+00 / (dxi * dxi)
|
||||
tx2 = 1.0d+00 / (2.0d+00 * dxi)
|
||||
tx3 = 1.0d+00 / dxi
|
||||
ty1 = 1.0d+00 / (deta * deta)
|
||||
ty2 = 1.0d+00 / (2.0d+00 * deta)
|
||||
ty3 = 1.0d+00 / deta
|
||||
tz1 = 1.0d+00 / (dzeta * dzeta)
|
||||
tz2 = 1.0d+00 / (2.0d+00 * dzeta)
|
||||
tz3 = 1.0d+00 / dzeta
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! diffusion coefficients
|
||||
!---------------------------------------------------------------------
|
||||
dx1 = 0.75d+00
|
||||
dx2 = dx1
|
||||
dx3 = dx1
|
||||
dx4 = dx1
|
||||
dx5 = dx1
|
||||
dy1 = 0.75d+00
|
||||
dy2 = dy1
|
||||
dy3 = dy1
|
||||
dy4 = dy1
|
||||
dy5 = dy1
|
||||
dz1 = 1.00d+00
|
||||
dz2 = dz1
|
||||
dz3 = dz1
|
||||
dz4 = dz1
|
||||
dz5 = dz1
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! fourth difference dissipation
|
||||
!---------------------------------------------------------------------
|
||||
dssp = max (dx1,dy1,dz1) / 4.0d+00
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! coefficients of the exact solution to the first pde
|
||||
!---------------------------------------------------------------------
|
||||
ce(1,1) = 2.0d+00
|
||||
ce(1,2) = 0.0d+00
|
||||
ce(1,3) = 0.0d+00
|
||||
ce(1,4) = 4.0d+00
|
||||
ce(1,5) = 5.0d+00
|
||||
ce(1,6) = 3.0d+00
|
||||
ce(1,7) = 5.0d-01
|
||||
ce(1,8) = 2.0d-02
|
||||
ce(1,9) = 1.0d-02
|
||||
ce(1,10) = 3.0d-02
|
||||
ce(1,11) = 5.0d-01
|
||||
ce(1,12) = 4.0d-01
|
||||
ce(1,13) = 3.0d-01
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! coefficients of the exact solution to the second pde
|
||||
!---------------------------------------------------------------------
|
||||
ce(2,1) = 1.0d+00
|
||||
ce(2,2) = 0.0d+00
|
||||
ce(2,3) = 0.0d+00
|
||||
ce(2,4) = 0.0d+00
|
||||
ce(2,5) = 1.0d+00
|
||||
ce(2,6) = 2.0d+00
|
||||
ce(2,7) = 3.0d+00
|
||||
ce(2,8) = 1.0d-02
|
||||
ce(2,9) = 3.0d-02
|
||||
ce(2,10) = 2.0d-02
|
||||
ce(2,11) = 4.0d-01
|
||||
ce(2,12) = 3.0d-01
|
||||
ce(2,13) = 5.0d-01
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! coefficients of the exact solution to the third pde
|
||||
!---------------------------------------------------------------------
|
||||
ce(3,1) = 2.0d+00
|
||||
ce(3,2) = 2.0d+00
|
||||
ce(3,3) = 0.0d+00
|
||||
ce(3,4) = 0.0d+00
|
||||
ce(3,5) = 0.0d+00
|
||||
ce(3,6) = 2.0d+00
|
||||
ce(3,7) = 3.0d+00
|
||||
ce(3,8) = 4.0d-02
|
||||
ce(3,9) = 3.0d-02
|
||||
ce(3,10) = 5.0d-02
|
||||
ce(3,11) = 3.0d-01
|
||||
ce(3,12) = 5.0d-01
|
||||
ce(3,13) = 4.0d-01
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! coefficients of the exact solution to the fourth pde
|
||||
!---------------------------------------------------------------------
|
||||
ce(4,1) = 2.0d+00
|
||||
ce(4,2) = 2.0d+00
|
||||
ce(4,3) = 0.0d+00
|
||||
ce(4,4) = 0.0d+00
|
||||
ce(4,5) = 0.0d+00
|
||||
ce(4,6) = 2.0d+00
|
||||
ce(4,7) = 3.0d+00
|
||||
ce(4,8) = 3.0d-02
|
||||
ce(4,9) = 5.0d-02
|
||||
ce(4,10) = 4.0d-02
|
||||
ce(4,11) = 2.0d-01
|
||||
ce(4,12) = 1.0d-01
|
||||
ce(4,13) = 3.0d-01
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! coefficients of the exact solution to the fifth pde
|
||||
!---------------------------------------------------------------------
|
||||
ce(5,1) = 5.0d+00
|
||||
ce(5,2) = 4.0d+00
|
||||
ce(5,3) = 3.0d+00
|
||||
ce(5,4) = 2.0d+00
|
||||
ce(5,5) = 1.0d-01
|
||||
ce(5,6) = 4.0d-01
|
||||
ce(5,7) = 3.0d-01
|
||||
ce(5,8) = 5.0d-02
|
||||
ce(5,9) = 4.0d-02
|
||||
ce(5,10) = 3.0d-02
|
||||
ce(5,11) = 1.0d-01
|
||||
ce(5,12) = 3.0d-01
|
||||
ce(5,13) = 2.0d-01
|
||||
return
|
||||
end
|
||||
|
||||
@@ -0,0 +1,82 @@
|
||||
|
||||
! *** generated by SAPFOR with version 1124 and build date: Apr 18 2019 20:56:38
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
subroutine setiv ()
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!
|
||||
! set the initial values of independent variables based on tri-linear
|
||||
! interpolation of boundary values in the computational space.
|
||||
!
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! local variables
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!--- applu.incl
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! npbparams.h defines parameters that depend on the class and
|
||||
! number of nodes
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! end of include file
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!--- applu.incl
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! npbparams.h defines parameters that depend on the class and
|
||||
! number of nodes
|
||||
!---------------------------------------------------------------------
|
||||
implicit none
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! end of include file
|
||||
include 'applu.incl'
|
||||
!---------------------------------------------------------------------
|
||||
integer i,j,k,m
|
||||
double precision xi,eta,zeta
|
||||
double precision pxi,peta,pzeta
|
||||
double precision ue_1jk(5),ue_nx0jk(5),ue_i1k(5),ue_iny0k(5),ue_i
|
||||
&j1(5),ue_ijnz(5)
|
||||
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0_r0(iEX0,iEX1,iEX2,iE
|
||||
!DVM$&X3) :: u
|
||||
!DVM$ PARALLEL (k) ON u(*,*,*,k), PRIVATE (xi,m,peta,pxi,pzeta,i,j,eta,u
|
||||
!DVM$&e_ij1,zeta,ue_i1k,ue_iny0k,k,ue_1jk,ue_nx0jk,ue_ijnz)
|
||||
do k = 2,nz - 1
|
||||
zeta = dble (k - 1) / (nz - 1)
|
||||
do j = 2,ny - 1
|
||||
eta = dble (j - 1) / (ny0 - 1)
|
||||
do i = 2,nx - 1
|
||||
xi = dble (i - 1) / (nx0 - 1)
|
||||
call exact(1,j,k,ue_1jk)
|
||||
call exact(nx0,j,k,ue_nx0jk)
|
||||
call exact(i,1,k,ue_i1k)
|
||||
call exact(i,ny0,k,ue_iny0k)
|
||||
call exact(i,j,1,ue_ij1)
|
||||
call exact(i,j,nz,ue_ijnz)
|
||||
do m = 1,5
|
||||
pxi = (1.0d+00 - xi) * ue_1jk(m) + xi * ue_nx0jk(m)
|
||||
peta = (1.0d+00 - eta) * ue_i1k(m) + eta * ue_iny0k(m)
|
||||
pzeta = (1.0d+00 - zeta) * ue_ij1(m) + zeta * ue_ijnz(
|
||||
&m)
|
||||
u(m,i,j,k) = pxi + peta + pzeta - pxi * peta - peta *
|
||||
&pzeta - pzeta * pxi + pxi * peta * pzeta
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0(iEX0,iEX1,iEX2,iEX3)
|
||||
!DVM$& :: u
|
||||
return
|
||||
end
|
||||
|
||||
@@ -0,0 +1,765 @@
|
||||
|
||||
! *** generated by SAPFOR with version 1124 and build date: Apr 18 2019 20:56:38
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
subroutine ssor (niter)
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! to perform pseudo-time stepping SSOR iterations
|
||||
! for five nonlinear pde's.
|
||||
!---------------------------------------------------------------------
|
||||
implicit none
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!--- applu.incl
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! npbparams.h defines parameters that depend on the class and
|
||||
! number of nodes
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! end of include file
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!--- applu.incl
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! npbparams.h defines parameters that depend on the class and
|
||||
! number of nodes
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! end of include file
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!--- applu.incl
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! npbparams.h defines parameters that depend on the class and
|
||||
! number of nodes
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! end of include file
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!--- applu.incl
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! npbparams.h defines parameters that depend on the class and
|
||||
! number of nodes
|
||||
!---------------------------------------------------------------------
|
||||
integer :: niter
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! end of include file
|
||||
INCLUDE 'applu.incl'
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
integer :: i,j,k,m,n
|
||||
integer :: istep
|
||||
double precision :: tmp,tv(5),d_(5,5),a_(5,5),b_(5,5),c_(5,5)
|
||||
double precision :: delunm(5),rs(5)
|
||||
external timer_read
|
||||
double precision :: timer_read
|
||||
integer :: mod_522_2
|
||||
integer :: mod_522_1
|
||||
integer :: mod_522_0
|
||||
double precision :: tmp3
|
||||
double precision :: tmp2
|
||||
double precision :: tmp1
|
||||
double precision :: c34
|
||||
double precision :: c1345
|
||||
double precision :: r43
|
||||
integer :: j__3
|
||||
integer :: i__4
|
||||
integer :: d_15_14
|
||||
integer :: d_15_13
|
||||
integer :: ldx_14_12
|
||||
integer :: ldx_14_11
|
||||
integer :: ldy_13_10
|
||||
integer :: ldy_13_9
|
||||
integer :: ldz_12_8
|
||||
integer :: ldz_12_7
|
||||
integer :: v_11_6
|
||||
integer :: v_11_5
|
||||
double precision :: tv__15(5)
|
||||
double precision :: tmat(5,5)
|
||||
double precision :: tmp1__16
|
||||
double precision :: tmp__17
|
||||
integer :: m__18
|
||||
integer :: j__19
|
||||
integer :: i__20
|
||||
double precision :: tmp3__21
|
||||
double precision :: tmp2__22
|
||||
double precision :: tmp1__23
|
||||
double precision :: c34__24
|
||||
double precision :: c1345__25
|
||||
double precision :: r43__26
|
||||
integer :: j__27
|
||||
integer :: i__28
|
||||
integer :: udz_43_38
|
||||
integer :: udz_43_37
|
||||
integer :: udy_42_36
|
||||
integer :: udy_42_35
|
||||
integer :: udx_41_34
|
||||
integer :: udx_41_33
|
||||
integer :: d_40_32
|
||||
integer :: d_40_31
|
||||
integer :: v_38_30
|
||||
integer :: v_38_29
|
||||
double precision :: tmat__39(5,5)
|
||||
double precision :: tmp1__40
|
||||
double precision :: tmp__41
|
||||
integer :: m__42
|
||||
integer :: j__43
|
||||
integer :: i__44
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! begin pseudo-time stepping iterations
|
||||
!---------------------------------------------------------------------
|
||||
tmp = 1.0d+00 / (omega * (2.0d+00 - omega))
|
||||
do i = 1,11
|
||||
call timer_clear(i)
|
||||
enddo
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! compute the steady-state residuals
|
||||
!---------------------------------------------------------------------
|
||||
call rhs()
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! compute the L2 norms of newton iteration residuals
|
||||
!---------------------------------------------------------------------
|
||||
call l2norm(isiz1,isiz2,isiz3,nx0,ny0,nz0,ist,iend,jst,jend,rsd,rs
|
||||
&dnm)
|
||||
|
||||
! if ( ipr .eq. 1 ) then
|
||||
! write (*,*) ' Initial residual norms'
|
||||
! write (*,*)
|
||||
! write (*,1007) ( rsdnm(m), m = 1, 5 )
|
||||
! write (*,'(/a)') 'Iteration RMS-residual of 5th PDE'
|
||||
! end if
|
||||
do i = 1,11
|
||||
call timer_clear(i)
|
||||
enddo
|
||||
call timer_start(1)
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! the timestep loop
|
||||
!---------------------------------------------------------------------
|
||||
do istep = 1,niter
|
||||
mod_522_0 = mod (istep,20)
|
||||
|
||||
! if ( ( mod ( istep, inorm ) .eq. 0 ) .and.
|
||||
! > ipr .eq. 1 ) then
|
||||
! write ( *, 1001 ) istep
|
||||
! end if
|
||||
if (mod_522_0 .eq. 0 .or. istep .eq. itmax .or. istep .eq. 1) t
|
||||
&hen
|
||||
if (niter .gt. 1) write (unit = *,fmt = 200) istep
|
||||
200 FORMAT(' Time step ', I4)
|
||||
endif
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! perform SSOR iteration
|
||||
!---------------------------------------------------------------------
|
||||
if (timeron) then
|
||||
call timer_start(5)
|
||||
endif
|
||||
!DVM$ INTERVAL 22
|
||||
if (timeron) then
|
||||
call timer_stop(5)
|
||||
endif
|
||||
r43 = 4.0d+00 / 3.0d+00
|
||||
c1345 = c1 * c3 * c4 * c5
|
||||
c34 = c3 * c4
|
||||
r43__26 = 4.0d+00 / 3.0d+00
|
||||
c1345__25 = c1 * c3 * c4 * c5
|
||||
c34__24 = c3 * c4
|
||||
!DVM$ REGION
|
||||
!DVM$ PARALLEL (k,j,i) ON rsd(*,i,j,k), PRIVATE (tmp3,tmp1,tmp2,tmat,
|
||||
!DVM$&k,tv,rs,rmk,rmj,rmi,ro),ACROSS (rsd(0:0,1:0,1:0,1:0)),CUDA_BLOCK (
|
||||
!DVM$&16,16)
|
||||
do k = 2,nz - 1
|
||||
do j = jst,jend
|
||||
do i = ist,iend
|
||||
rmk = 1.0d+00 / u(1,i,j,k - 1)
|
||||
rmj = 1.0d+00 / u(1,i,j - 1,k)
|
||||
rmi = 1.0d+00 / u(1,i - 1,j,k)
|
||||
ro = 1.0d+00 / u(1,i,j,k)
|
||||
rs(1) = dt * rsd(1,i,j,k)
|
||||
rs(2) = dt * rsd(2,i,j,k)
|
||||
rs(3) = dt * rsd(3,i,j,k)
|
||||
rs(4) = dt * rsd(4,i,j,k)
|
||||
rs(5) = dt * rsd(5,i,j,k)
|
||||
rs(1) = rs(1) - omega * ((-(dt)) * tz1 * dz1 * rsd(1,i
|
||||
&,j,k - 1) + (-(dt)) * tz2 * rsd(4,i,j,k - 1))
|
||||
tv(1) = rs(1) - omega * ((-(dt)) * ty1 * dy1 * rsd(1,i
|
||||
&,j - 1,k) + (-(dt)) * tx1 * dx1 * rsd(1,i - 1,j,k) + (-(dt)) * tx2
|
||||
& * rsd(2,i - 1,j,k) + (-(dt)) * ty2 * rsd(3,i,j - 1,k) + 0.0d+00 *
|
||||
& rsd(3,i - 1,j,k) + 0.0d+00 * rsd(4,i - 1,j,k) + 0.0d+00 * rsd(5,i
|
||||
& - 1,j,k))
|
||||
tmat(1,1) = 1.0d+00 + dt * 2.0d+00 * (tx1 * dx1 + ty1
|
||||
&* dy1 + tz1 * dz1)
|
||||
tmat(1,2) = 0
|
||||
tmat(1,3) = 0
|
||||
tmat(1,4) = 0
|
||||
tmat(1,5) = 0
|
||||
rs(2) = rs(2) - omega * (((-(dt)) * tz2 * ((-(u(2,i,j,
|
||||
&k - 1) * u(4,i,j,k - 1))) * rmk * rmk) - dt * tz1 * ((-(c34)) * rm
|
||||
&k * rmk * u(2,i,j,k - 1))) * rsd(1,i,j,k - 1) + ((-(dt)) * tz2 * (
|
||||
&u(4,i,j,k - 1) * rmk) - dt * tz1 * c34 * rmk - dt * tz1 * dz2) * r
|
||||
&sd(2,i,j,k - 1) + (-(dt)) * tz2 * (u(2,i,j,k - 1) * rmk) * rsd(4,i
|
||||
&,j,k - 1))
|
||||
tv(2) = rs(2) - omega * (((-(dt)) * ty2 * ((-(u(2,i,j
|
||||
&- 1,k) * u(3,i,j - 1,k))) * rmj * rmj) - dt * ty1 * ((-(c34)) * rm
|
||||
&j * rmj * u(2,i,j - 1,k))) * rsd(1,i,j - 1,k) + ((-(dt)) * tx2 * (
|
||||
&(-((u(2,i - 1,j,k) * rmi)** 2)) + c2 * qs(i - 1,j,k) * rmi) - dt *
|
||||
& tx1 * ((-(r43)) * c34 * rmi * rmi * u(2,i - 1,j,k))) * rsd(1,i -
|
||||
&1,j,k) + ((-(dt)) * ty2 * (u(3,i,j - 1,k) * rmj) - dt * ty1 * (c34
|
||||
& * rmj) - dt * ty1 * dy2) * rsd(2,i,j - 1,k) + ((-(dt)) * tx2 * ((
|
||||
&2.0d+00 - c2) * (u(2,i - 1,j,k) * rmi)) - dt * tx1 * (r43 * c34 *
|
||||
&rmi) - dt * tx1 * dx2) * rsd(2,i - 1,j,k) + (-(dt)) * ty2 * (u(2,i
|
||||
&,j - 1,k) * rmj) * rsd(3,i,j - 1,k) + (-(dt)) * tx2 * ((-(c2)) * (
|
||||
&u(3,i - 1,j,k) * rmi)) * rsd(3,i - 1,j,k) + 0.0d+00 * rsd(4,i,j -
|
||||
&1,k) + (-(dt)) * tx2 * ((-(c2)) * (u(4,i - 1,j,k) * rmi)) * rsd(4,
|
||||
&i - 1,j,k) + 0.0d+00 * rsd(5,i,j - 1,k) + (-(dt)) * tx2 * c2 * rsd
|
||||
&(5,i - 1,j,k))
|
||||
tmat(2,1) = (-(dt)) * 2.0d+00 * (tx1 * r43 + ty1 + tz1
|
||||
&) * c34 * ro * ro * u(2,i,j,k)
|
||||
tmat(2,2) = 1.0d+00 + dt * 2.0d+00 * c34 * ro * (tx1 *
|
||||
& r43 + ty1 + tz1) + dt * 2.0d+00 * (tx1 * dx2 + ty1 * dy2 + tz1 *
|
||||
&dz2)
|
||||
tmat(2,3) = 0
|
||||
tmat(2,4) = 0
|
||||
tmat(2,5) = 0
|
||||
rs(3) = rs(3) - omega * (((-(dt)) * tz2 * ((-(u(3,i,j,
|
||||
&k - 1) * u(4,i,j,k - 1))) * rmk * rmk) - dt * tz1 * ((-(c34)) * rm
|
||||
&k * rmk * u(3,i,j,k - 1))) * rsd(1,i,j,k - 1) + ((-(dt)) * tz2 * (
|
||||
&u(4,i,j,k - 1) * rmk) - dt * tz1 * (c34 * rmk) - dt * tz1 * dz3) *
|
||||
& rsd(3,i,j,k - 1) + (-(dt)) * tz2 * (u(3,i,j,k - 1) * rmk) * rsd(4
|
||||
&,i,j,k - 1))
|
||||
tv(3) = rs(3) - omega * (((-(dt)) * ty2 * ((-((u(3,i,j
|
||||
& - 1,k) * rmj)** 2)) + c2 * (qs(i,j - 1,k) * rmj)) - dt * ty1 * ((
|
||||
&-(r43)) * c34 * rmj * rmj * u(3,i,j - 1,k))) * rsd(1,i,j - 1,k) +
|
||||
&((-(dt)) * tx2 * ((-(u(2,i - 1,j,k) * u(3,i - 1,j,k))) * rmi * rmi
|
||||
&) - dt * tx1 * ((-(c34)) * rmi * rmi * u(3,i - 1,j,k))) * rsd(1,i
|
||||
&- 1,j,k) + (-(dt)) * ty2 * ((-(c2)) * (u(2,i,j - 1,k) * rmj)) * rs
|
||||
&d(2,i,j - 1,k) + (-(dt)) * tx2 * (u(3,i - 1,j,k) * rmi) * rsd(2,i
|
||||
&- 1,j,k) + ((-(dt)) * ty2 * ((2.0d+00 - c2) * (u(3,i,j - 1,k) * rm
|
||||
&j)) - dt * ty1 * (r43 * c34 * rmj) - dt * ty1 * dy3) * rsd(3,i,j -
|
||||
& 1,k) + ((-(dt)) * tx2 * (u(2,i - 1,j,k) * rmi) - dt * tx1 * (c34
|
||||
&* rmi) - dt * tx1 * dx3) * rsd(3,i - 1,j,k) + (-(dt)) * ty2 * ((-(
|
||||
&c2)) * (u(4,i,j - 1,k) * rmj)) * rsd(4,i,j - 1,k) + 0.0d+00 * rsd(
|
||||
&4,i - 1,j,k) + (-(dt)) * ty2 * c2 * rsd(5,i,j - 1,k) + 0.0d+00 * r
|
||||
&sd(5,i - 1,j,k))
|
||||
tmat(3,1) = (-(dt)) * 2.0d+00 * (tx1 + ty1 * r43 + tz1
|
||||
&) * c34 * ro * ro * u(3,i,j,k)
|
||||
tmat(3,2) = 0
|
||||
tmat(3,3) = 1.0d+00 + dt * 2.0d+00 * c34 * ro * (tx1 +
|
||||
& ty1 * r43 + tz1) + dt * 2.0d+00 * (tx1 * dx3 + ty1 * dy3 + tz1 *
|
||||
&dz3)
|
||||
tmat(3,4) = 0
|
||||
tmat(3,5) = 0
|
||||
rs(4) = rs(4) - omega * (((-(dt)) * tz2 * ((-((u(4,i,j
|
||||
&,k - 1) * rmk)** 2)) + c2 * qs(i,j,k - 1) * rmk) - dt * tz1 * ((-(
|
||||
&r43)) * c34 * rmk * rmk * u(4,i,j,k - 1))) * rsd(1,i,j,k - 1) + (-
|
||||
&(dt)) * tz2 * ((-(c2)) * (u(2,i,j,k - 1) * rmk)) * rsd(2,i,j,k - 1
|
||||
&) + (-(dt)) * tz2 * ((-(c2)) * (u(3,i,j,k - 1) * rmk)) * rsd(3,i,j
|
||||
&,k - 1) + ((-(dt)) * tz2 * (2.0d+00 - c2) * (u(4,i,j,k - 1) * rmk)
|
||||
& - dt * tz1 * (r43 * c34 * rmk) - dt * tz1 * dz4) * rsd(4,i,j,k -
|
||||
&1) + (-(dt)) * tz2 * c2 * rsd(5,i,j,k - 1))
|
||||
tv(4) = rs(4) - omega * (((-(dt)) * ty2 * ((-(u(3,i,j
|
||||
&- 1,k) * u(4,i,j - 1,k))) * rmj * rmj) - dt * ty1 * ((-(c34)) * rm
|
||||
&j * rmj * u(4,i,j - 1,k))) * rsd(1,i,j - 1,k) + ((-(dt)) * tx2 * (
|
||||
&(-(u(2,i - 1,j,k) * u(4,i - 1,j,k))) * rmi * rmi) - dt * tx1 * ((-
|
||||
&(c34)) * rmi * rmi * u(4,i - 1,j,k))) * rsd(1,i - 1,j,k) + 0.0d+00
|
||||
& * rsd(2,i,j - 1,k) + (-(dt)) * tx2 * (u(4,i - 1,j,k) * rmi) * rsd
|
||||
&(2,i - 1,j,k) + (-(dt)) * ty2 * (u(4,i,j - 1,k) * rmj) * rsd(3,i,j
|
||||
& - 1,k) + 0.0d+00 * rsd(3,i - 1,j,k) + ((-(dt)) * ty2 * (u(3,i,j -
|
||||
& 1,k) * rmj) - dt * ty1 * (c34 * rmj) - dt * ty1 * dy4) * rsd(4,i,
|
||||
&j - 1,k) + ((-(dt)) * tx2 * (u(2,i - 1,j,k) * rmi) - dt * tx1 * (c
|
||||
&34 * rmi) - dt * tx1 * dx4) * rsd(4,i - 1,j,k) + 0.0d+00 * rsd(5,i
|
||||
&,j - 1,k) + 0.0d+00 * rsd(5,i - 1,j,k))
|
||||
tmat(4,1) = (-(dt)) * 2.0d+00 * (tx1 + ty1 + tz1 * r43
|
||||
&) * c34 * ro * ro * u(4,i,j,k)
|
||||
tmat(4,2) = 0
|
||||
tmat(4,3) = 0
|
||||
tmat(4,4) = 1.0d+00 + dt * 2.0d+00 * c34 * ro * (tx1 +
|
||||
& ty1 + tz1 * r43) + dt * 2.0d+00 * (tx1 * dx4 + ty1 * dy4 + tz1 *
|
||||
&dz4)
|
||||
tmat(4,5) = 0
|
||||
rs(5) = rs(5) - omega * (((-(dt)) * tz2 * ((c2 * 2.0d0
|
||||
& * qs(i,j,k - 1) - c1 * u(5,i,j,k - 1)) * u(4,i,j,k - 1) * rmk * r
|
||||
&mk) - dt * tz1 * ((-(c34 - c1345)) * rmk * rmk * rmk * u(2,i,j,k -
|
||||
& 1)** 2 - (c34 - c1345) * rmk * rmk * rmk * u(3,i,j,k - 1)** 2 - (
|
||||
&r43 * c34 - c1345) * rmk * rmk * rmk * u(4,i,j,k - 1)** 2 - c1345
|
||||
&* rmk * rmk * u(5,i,j,k - 1))) * rsd(1,i,j,k - 1) + ((-(dt)) * tz2
|
||||
& * ((-(c2)) * (u(2,i,j,k - 1) * u(4,i,j,k - 1)) * rmk * rmk) - dt
|
||||
&* tz1 * (c34 - c1345) * rmk * rmk * u(2,i,j,k - 1)) * rsd(2,i,j,k
|
||||
&- 1) + ((-(dt)) * tz2 * ((-(c2)) * (u(3,i,j,k - 1) * u(4,i,j,k - 1
|
||||
&)) * rmk * rmk) - dt * tz1 * (c34 - c1345) * rmk * rmk * u(3,i,j,k
|
||||
& - 1)) * rsd(3,i,j,k - 1) + ((-(dt)) * tz2 * (c1 * (u(5,i,j,k - 1)
|
||||
& * rmk) - c2 * (qs(i,j,k - 1) * rmk + u(4,i,j,k - 1) * u(4,i,j,k -
|
||||
& 1) * rmk * rmk)) - dt * tz1 * (r43 * c34 - c1345) * rmk * rmk * u
|
||||
&(4,i,j,k - 1)) * rsd(4,i,j,k - 1) + ((-(dt)) * tz2 * (c1 * (u(4,i,
|
||||
&j,k - 1) * rmk)) - dt * tz1 * c1345 * rmk - dt * tz1 * dz5) * rsd(
|
||||
&5,i,j,k - 1))
|
||||
tv(5) = rs(5) - omega * (((-(dt)) * ty2 * ((c2 * 2.0d0
|
||||
& * qs(i,j - 1,k) - c1 * u(5,i,j - 1,k)) * (u(3,i,j - 1,k) * rmj *
|
||||
&rmj)) - dt * ty1 * ((-(c34 - c1345)) * rmj * rmj * rmj * u(2,i,j -
|
||||
& 1,k)** 2 - (r43 * c34 - c1345) * rmj * rmj * rmj * u(3,i,j - 1,k)
|
||||
&** 2 - (c34 - c1345) * rmj * rmj * rmj * u(4,i,j - 1,k)** 2 - c134
|
||||
&5 * rmj * rmj * u(5,i,j - 1,k))) * rsd(1,i,j - 1,k) + ((-(dt)) * t
|
||||
&x2 * ((c2 * 2.0d0 * qs(i - 1,j,k) - c1 * u(5,i - 1,j,k)) * u(2,i -
|
||||
& 1,j,k) * rmi * rmi) - dt * tx1 * ((-(r43 * c34 - c1345)) * rmi *
|
||||
&rmi * rmi * u(2,i - 1,j,k)** 2 - (c34 - c1345) * rmi * rmi * rmi *
|
||||
& u(3,i - 1,j,k)** 2 - (c34 - c1345) * rmi * rmi * rmi * u(4,i - 1,
|
||||
&j,k)** 2 - c1345 * rmi * rmi * u(5,i - 1,j,k))) * rsd(1,i - 1,j,k)
|
||||
& + ((-(dt)) * ty2 * ((-(c2)) * (u(2,i,j - 1,k) * u(3,i,j - 1,k)) *
|
||||
& rmj * rmj) - dt * ty1 * (c34 - c1345) * rmj * rmj * u(2,i,j - 1,k
|
||||
&)) * rsd(2,i,j - 1,k) + ((-(dt)) * tx2 * (c1 * (u(5,i - 1,j,k) * r
|
||||
&mi) - c2 * (u(2,i - 1,j,k) * u(2,i - 1,j,k) * rmi * rmi + qs(i - 1
|
||||
&,j,k) * rmi)) - dt * tx1 * (r43 * c34 - c1345) * rmi * rmi * u(2,i
|
||||
& - 1,j,k)) * rsd(2,i - 1,j,k) + ((-(dt)) * ty2 * (c1 * (u(5,i,j -
|
||||
&1,k) * rmj) - c2 * (qs(i,j - 1,k) * rmj + u(3,i,j - 1,k) * u(3,i,j
|
||||
& - 1,k) * rmj * rmj)) - dt * ty1 * (r43 * c34 - c1345) * rmj * rmj
|
||||
& * u(3,i,j - 1,k)) * rsd(3,i,j - 1,k) + ((-(dt)) * tx2 * ((-(c2))
|
||||
&* (u(3,i - 1,j,k) * u(2,i - 1,j,k)) * rmi * rmi) - dt * tx1 * (c34
|
||||
& - c1345) * rmi * rmi * u(3,i - 1,j,k)) * rsd(3,i - 1,j,k) + ((-(d
|
||||
&t)) * ty2 * ((-(c2)) * (u(3,i,j - 1,k) * u(4,i,j - 1,k)) * rmj * r
|
||||
&mj) - dt * ty1 * (c34 - c1345) * rmj * rmj * u(4,i,j - 1,k)) * rsd
|
||||
&(4,i,j - 1,k) + ((-(dt)) * tx2 * ((-(c2)) * (u(4,i - 1,j,k) * u(2,
|
||||
&i - 1,j,k)) * rmi * rmi) - dt * tx1 * (c34 - c1345) * rmi * rmi *
|
||||
&u(4,i - 1,j,k)) * rsd(4,i - 1,j,k) + ((-(dt)) * ty2 * (c1 * (u(3,i
|
||||
&,j - 1,k) * rmj)) - dt * ty1 * c1345 * rmj - dt * ty1 * dy5) * rsd
|
||||
&(5,i,j - 1,k) + ((-(dt)) * tx2 * (c1 * (u(2,i - 1,j,k) * rmi)) - d
|
||||
&t * tx1 * c1345 * rmi - dt * tx1 * dx5) * rsd(5,i - 1,j,k))
|
||||
tmat(5,1) = (-(dt)) * 2.0d+00 * (((tx1 * (r43 * c34 -
|
||||
&c1345) + ty1 * (c34 - c1345) + tz1 * (c34 - c1345)) * u(2,i,j,k)**
|
||||
& 2 + (tx1 * (c34 - c1345) + ty1 * (r43 * c34 - c1345) + tz1 * (c34
|
||||
& - c1345)) * u(3,i,j,k)** 2 + (tx1 * (c34 - c1345) + ty1 * (c34 -
|
||||
&c1345) + tz1 * (r43 * c34 - c1345)) * u(4,i,j,k)** 2) * ro * ro *
|
||||
&ro + (tx1 + ty1 + tz1) * c1345 * ro * ro * u(5,i,j,k))
|
||||
tmat(5,2) = dt * 2.0d+00 * ro * ro * u(2,i,j,k) * (tx1
|
||||
& * (r43 * c34 - c1345) + ty1 * (c34 - c1345) + tz1 * (c34 - c1345)
|
||||
&)
|
||||
tmat(5,3) = dt * 2.0d+00 * ro * ro * u(3,i,j,k) * (tx1
|
||||
& * (c34 - c1345) + ty1 * (r43 * c34 - c1345) + tz1 * (c34 - c1345)
|
||||
&)
|
||||
tmat(5,4) = dt * 2.0d+00 * ro * ro * u(4,i,j,k) * (tx1
|
||||
& * (c34 - c1345) + ty1 * (c34 - c1345) + tz1 * (r43 * c34 - c1345)
|
||||
&)
|
||||
tmat(5,5) = 1.0d+00 + dt * 2.0d+00 * (tx1 + ty1 + tz1)
|
||||
& * c1345 * ro + dt * 2.0d+00 * (tx1 * dx5 + ty1 * dy5 + tz1 * dz5)
|
||||
tmp1 = 1.0d+00 / tmat(1,1)
|
||||
tmp2 = tmp1 * tmat(2,1)
|
||||
tmat(2,2) = tmat(2,2) - tmp2 * tmat(1,2)
|
||||
tmat(2,3) = tmat(2,3) - tmp2 * tmat(1,3)
|
||||
tmat(2,4) = tmat(2,4) - tmp2 * tmat(1,4)
|
||||
tmat(2,5) = tmat(2,5) - tmp2 * tmat(1,5)
|
||||
tv(2) = tv(2) - tv(1) * tmp2
|
||||
tmp2 = tmp1 * tmat(3,1)
|
||||
tmat(3,2) = tmat(3,2) - tmp2 * tmat(1,2)
|
||||
tmat(3,3) = tmat(3,3) - tmp2 * tmat(1,3)
|
||||
tmat(3,4) = tmat(3,4) - tmp2 * tmat(1,4)
|
||||
tmat(3,5) = tmat(3,5) - tmp2 * tmat(1,5)
|
||||
tv(3) = tv(3) - tv(1) * tmp2
|
||||
tmp2 = tmp1 * tmat(4,1)
|
||||
tmat(4,2) = tmat(4,2) - tmp2 * tmat(1,2)
|
||||
tmat(4,3) = tmat(4,3) - tmp2 * tmat(1,3)
|
||||
tmat(4,4) = tmat(4,4) - tmp2 * tmat(1,4)
|
||||
tmat(4,5) = tmat(4,5) - tmp2 * tmat(1,5)
|
||||
tv(4) = tv(4) - tv(1) * tmp2
|
||||
tmp2 = tmp1 * tmat(5,1)
|
||||
tmat(5,2) = tmat(5,2) - tmp2 * tmat(1,2)
|
||||
tmat(5,3) = tmat(5,3) - tmp2 * tmat(1,3)
|
||||
tmat(5,4) = tmat(5,4) - tmp2 * tmat(1,4)
|
||||
tmat(5,5) = tmat(5,5) - tmp2 * tmat(1,5)
|
||||
tv(5) = tv(5) - tv(1) * tmp2
|
||||
tmp1 = 1.0d+00 / tmat(2,2)
|
||||
tmp2 = tmp1 * tmat(3,2)
|
||||
tmat(3,3) = tmat(3,3) - tmp2 * tmat(2,3)
|
||||
tmat(3,4) = tmat(3,4) - tmp2 * tmat(2,4)
|
||||
tmat(3,5) = tmat(3,5) - tmp2 * tmat(2,5)
|
||||
tv(3) = tv(3) - tv(2) * tmp2
|
||||
tmp2 = tmp1 * tmat(4,2)
|
||||
tmat(4,3) = tmat(4,3) - tmp2 * tmat(2,3)
|
||||
tmat(4,4) = tmat(4,4) - tmp2 * tmat(2,4)
|
||||
tmat(4,5) = tmat(4,5) - tmp2 * tmat(2,5)
|
||||
tv(4) = tv(4) - tv(2) * tmp2
|
||||
tmp2 = tmp1 * tmat(5,2)
|
||||
tmat(5,3) = tmat(5,3) - tmp2 * tmat(2,3)
|
||||
tmat(5,4) = tmat(5,4) - tmp2 * tmat(2,4)
|
||||
tmat(5,5) = tmat(5,5) - tmp2 * tmat(2,5)
|
||||
tv(5) = tv(5) - tv(2) * tmp2
|
||||
tmp1 = 1.0d+00 / tmat(3,3)
|
||||
tmp2 = tmp1 * tmat(4,3)
|
||||
tmat(4,4) = tmat(4,4) - tmp2 * tmat(3,4)
|
||||
tmat(4,5) = tmat(4,5) - tmp2 * tmat(3,5)
|
||||
tv(4) = tv(4) - tv(3) * tmp2
|
||||
tmp2 = tmp1 * tmat(5,3)
|
||||
tmat(5,4) = tmat(5,4) - tmp2 * tmat(3,4)
|
||||
tmat(5,5) = tmat(5,5) - tmp2 * tmat(3,5)
|
||||
tv(5) = tv(5) - tv(3) * tmp2
|
||||
tmp1 = 1.0d+00 / tmat(4,4)
|
||||
tmp2 = tmp1 * tmat(5,4)
|
||||
tmat(5,5) = tmat(5,5) - tmp2 * tmat(4,5)
|
||||
tv(5) = tv(5) - tv(4) * tmp2
|
||||
rs(5) = tv(5) / tmat(5,5)
|
||||
tv(4) = tv(4) - tmat(4,5) * rs(5)
|
||||
rs(4) = tv(4) / tmat(4,4)
|
||||
tv(3) = tv(3) - tmat(3,4) * rs(4) - tmat(3,5) * rs(5)
|
||||
rs(3) = tv(3) / tmat(3,3)
|
||||
tv(2) = tv(2) - tmat(2,3) * rs(3) - tmat(2,4) * rs(4)
|
||||
&- tmat(2,5) * rs(5)
|
||||
rs(2) = tv(2) / tmat(2,2)
|
||||
tv(1) = tv(1) - tmat(1,2) * rs(2) - tmat(1,3) * rs(3)
|
||||
&- tmat(1,4) * rs(4) - tmat(1,5) * rs(5)
|
||||
rs(1) = tv(1) / tmat(1,1)
|
||||
rsd(1,i,j,k) = rs(1)
|
||||
rsd(2,i,j,k) = rs(2)
|
||||
rsd(3,i,j,k) = rs(3)
|
||||
rsd(4,i,j,k) = rs(4)
|
||||
rsd(5,i,j,k) = rs(5)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ PARALLEL (k,j,i) ON rsd(*,i,j,k), PRIVATE (tv,tmat,tmp2,tmp1,rm
|
||||
!DVM$&k,rmj,rmi,ro),ACROSS (rsd(0:0,0:1,0:1,0:1)),CUDA_BLOCK (16,16)
|
||||
do k = nz - 1,2,(-(1))
|
||||
do j = jend,jst,(-(1))
|
||||
do i = iend,ist,(-(1))
|
||||
rmk = 1.0d+00 / u(1,i,j,k + 1)
|
||||
rmj = 1.0d+00 / u(1,i,j + 1,k)
|
||||
rmi = 1.0d+00 / u(1,i + 1,j,k)
|
||||
ro = 1.0d+00 / u(1,i,j,k)
|
||||
tv(1) = omega * ((-(dt)) * tz1 * dz1 * rsd(1,i,j,k + 1
|
||||
&) + 0.0d+00 * rsd(2,i,j,k + 1) + 0.0d+00 * rsd(3,i,j,k + 1) + dt *
|
||||
& tz2 * rsd(4,i,j,k + 1) + 0.0d+00 * rsd(5,i,j,k + 1))
|
||||
tv(1) = tv(1) + omega * ((-(dt)) * ty1 * dy1 * rsd(1,i
|
||||
&,j + 1,k) + (-(dt)) * tx1 * dx1 * rsd(1,i + 1,j,k) + 0.0d+00 * rsd
|
||||
&(2,i,j + 1,k) + dt * tx2 * rsd(2,i + 1,j,k) + dt * ty2 * rsd(3,i,j
|
||||
& + 1,k) + 0.0d+00 * rsd(3,i + 1,j,k) + 0.0d+00 * rsd(4,i,j + 1,k)
|
||||
&+ 0.0d+00 * rsd(4,i + 1,j,k) + 0.0d+00 * rsd(5,i,j + 1,k) + 0.0d+0
|
||||
&0 * rsd(5,i + 1,j,k))
|
||||
tmat(1,1) = 1.0d+00 + dt * 2.0d+00 * (tx1 * dx1 + ty1
|
||||
&* dy1 + tz1 * dz1)
|
||||
tmat(1,2) = 0.0d+00
|
||||
tmat(1,3) = 0.0d+00
|
||||
tmat(1,4) = 0.0d+00
|
||||
tmat(1,5) = 0.0d+00
|
||||
tv(2) = omega * ((dt * tz2 * ((-(u(2,i,j,k + 1) * u(4,
|
||||
&i,j,k + 1))) * rmk * rmk) - dt * tz1 * ((-(c34__24)) * rmk * rmk *
|
||||
& u(2,i,j,k + 1))) * rsd(1,i,j,k + 1) + (dt * tz2 * (u(4,i,j,k + 1)
|
||||
& * rmk) - dt * tz1 * c34__24 * rmk - dt * tz1 * dz2) * rsd(2,i,j,k
|
||||
& + 1) + 0.0d+00 * rsd(3,i,j,k + 1) + dt * tz2 * (u(2,i,j,k + 1) *
|
||||
&rmk) * rsd(4,i,j,k + 1) + 0.0d+00 * rsd(5,i,j,k + 1))
|
||||
tv(2) = tv(2) + omega * ((dt * ty2 * ((-(u(2,i,j + 1,k
|
||||
&) * u(3,i,j + 1,k))) * rmj * rmj) - dt * ty1 * ((-(c34__24)) * rmj
|
||||
& * rmj * u(2,i,j + 1,k))) * rsd(1,i,j + 1,k) + (dt * tx2 * ((-((u(
|
||||
&2,i + 1,j,k) * rmi)** 2)) + c2 * qs(i + 1,j,k) * rmi) - dt * tx1 *
|
||||
& ((-(r43__26)) * c34__24 * rmi * rmi * u(2,i + 1,j,k))) * rsd(1,i
|
||||
&+ 1,j,k) + (dt * ty2 * (u(3,i,j + 1,k) * rmj) - dt * ty1 * (c34__2
|
||||
&4 * rmj) - dt * ty1 * dy2) * rsd(2,i,j + 1,k) + (dt * tx2 * ((2.0d
|
||||
&+00 - c2) * (u(2,i + 1,j,k) * rmi)) - dt * tx1 * (r43__26 * c34__2
|
||||
&4 * rmi) - dt * tx1 * dx2) * rsd(2,i + 1,j,k) + dt * ty2 * (u(2,i,
|
||||
&j + 1,k) * rmj) * rsd(3,i,j + 1,k) + dt * tx2 * ((-(c2)) * (u(3,i
|
||||
&+ 1,j,k) * rmi)) * rsd(3,i + 1,j,k) + 0.0d+00 * rsd(4,i,j + 1,k) +
|
||||
& dt * tx2 * ((-(c2)) * (u(4,i + 1,j,k) * rmi)) * rsd(4,i + 1,j,k)
|
||||
&+ 0.0d+00 * rsd(5,i,j + 1,k) + dt * tx2 * c2 * rsd(5,i + 1,j,k))
|
||||
tmat(2,1) = dt * 2.0d+00 * ((-(tx1)) * r43__26 - ty1 -
|
||||
& tz1) * (c34__24 * ro * ro * u(2,i,j,k))
|
||||
tmat(2,2) = 1.0d+00 + dt * 2.0d+00 * c34__24 * ro * (t
|
||||
&x1 * r43__26 + ty1 + tz1) + dt * 2.0d+00 * (tx1 * dx2 + ty1 * dy2
|
||||
&+ tz1 * dz2)
|
||||
tmat(2,3) = 0.0d+00
|
||||
tmat(2,4) = 0.0d+00
|
||||
tmat(2,5) = 0.0d+00
|
||||
tv(3) = omega * ((dt * tz2 * ((-(u(3,i,j,k + 1) * u(4,
|
||||
&i,j,k + 1))) * rmk * rmk) - dt * tz1 * ((-(c34__24)) * rmk * rmk *
|
||||
& u(3,i,j,k + 1))) * rsd(1,i,j,k + 1) + 0.0d+00 * rsd(2,i,j,k + 1)
|
||||
&+ (dt * tz2 * (u(4,i,j,k + 1) * rmk) - dt * tz1 * (c34__24 * rmk)
|
||||
&- dt * tz1 * dz3) * rsd(3,i,j,k + 1) + dt * tz2 * (u(3,i,j,k + 1)
|
||||
&* rmk) * rsd(4,i,j,k + 1) + 0.0d+00 * rsd(5,i,j,k + 1))
|
||||
tv(3) = tv(3) + omega * ((dt * ty2 * ((-((u(3,i,j + 1,
|
||||
&k) * rmj)** 2)) + c2 * (qs(i,j + 1,k) * rmj)) - dt * ty1 * ((-(r43
|
||||
&__26)) * c34__24 * rmj * rmj * u(3,i,j + 1,k))) * rsd(1,i,j + 1,k)
|
||||
& + (dt * tx2 * ((-(u(2,i + 1,j,k) * u(3,i + 1,j,k))) * rmi * rmi)
|
||||
&- dt * tx1 * ((-(c34__24)) * rmi * rmi * u(3,i + 1,j,k))) * rsd(1,
|
||||
&i + 1,j,k) + dt * ty2 * ((-(c2)) * (u(2,i,j + 1,k) * rmj)) * rsd(2
|
||||
&,i,j + 1,k) + dt * tx2 * (u(3,i + 1,j,k) * rmi) * rsd(2,i + 1,j,k)
|
||||
& + (dt * ty2 * ((2.0d+00 - c2) * (u(3,i,j + 1,k) * rmj)) - dt * ty
|
||||
&1 * (r43__26 * c34__24 * rmj) - dt * ty1 * dy3) * rsd(3,i,j + 1,k)
|
||||
& + (dt * tx2 * (u(2,i + 1,j,k) * rmi) - dt * tx1 * (c34__24 * rmi)
|
||||
& - dt * tx1 * dx3) * rsd(3,i + 1,j,k) + dt * ty2 * ((-(c2)) * (u(4
|
||||
&,i,j + 1,k) * rmj)) * rsd(4,i,j + 1,k) + 0.0d+00 * rsd(4,i + 1,j,k
|
||||
&) + dt * ty2 * c2 * rsd(5,i,j + 1,k) + 0.0d+00 * rsd(5,i + 1,j,k))
|
||||
tmat(3,1) = dt * 2.0d+00 * ((-(tx1)) - ty1 * r43__26 -
|
||||
& tz1) * (c34__24 * ro * ro * u(3,i,j,k))
|
||||
tmat(3,2) = 0.0d+00
|
||||
tmat(3,3) = 1.0d+00 + dt * 2.0d+00 * c34__24 * ro * (t
|
||||
&x1 + ty1 * r43__26 + tz1) + dt * 2.0d+00 * (tx1 * dx3 + ty1 * dy3
|
||||
&+ tz1 * dz3)
|
||||
tmat(3,4) = 0.0d+00
|
||||
tmat(3,5) = 0.0d+00
|
||||
tv(4) = omega * ((dt * tz2 * ((-((u(4,i,j,k + 1) * rmk
|
||||
&)** 2)) + c2 * (qs(i,j,k + 1) * rmk)) - dt * tz1 * ((-(r43__26)) *
|
||||
& c34__24 * rmk * rmk * u(4,i,j,k + 1))) * rsd(1,i,j,k + 1) + dt *
|
||||
&tz2 * ((-(c2)) * (u(2,i,j,k + 1) * rmk)) * rsd(2,i,j,k + 1) + dt *
|
||||
& tz2 * ((-(c2)) * (u(3,i,j,k + 1) * rmk)) * rsd(3,i,j,k + 1) + (dt
|
||||
& * tz2 * (2.0d+00 - c2) * (u(4,i,j,k + 1) * rmk) - dt * tz1 * (r43
|
||||
&__26 * c34__24 * rmk) - dt * tz1 * dz4) * rsd(4,i,j,k + 1) + dt *
|
||||
&tz2 * c2 * rsd(5,i,j,k + 1))
|
||||
tv(4) = tv(4) + omega * ((dt * ty2 * ((-(u(3,i,j + 1,k
|
||||
&) * u(4,i,j + 1,k))) * rmj * rmj) - dt * ty1 * ((-(c34__24)) * rmj
|
||||
& * rmj * u(4,i,j + 1,k))) * rsd(1,i,j + 1,k) + (dt * tx2 * ((-(u(2
|
||||
&,i + 1,j,k) * u(4,i + 1,j,k))) * rmi * rmi) - dt * tx1 * ((-(c34__
|
||||
&24)) * rmi * rmi * u(4,i + 1,j,k))) * rsd(1,i + 1,j,k) + 0.0d+00 *
|
||||
& rsd(2,i,j + 1,k) + dt * tx2 * (u(4,i + 1,j,k) * rmi) * rsd(2,i +
|
||||
&1,j,k) + dt * ty2 * (u(4,i,j + 1,k) * rmj) * rsd(3,i,j + 1,k) + 0.
|
||||
&0d+00 * rsd(3,i + 1,j,k) + (dt * ty2 * (u(3,i,j + 1,k) * rmj) - dt
|
||||
& * ty1 * (c34__24 * rmj) - dt * ty1 * dy4) * rsd(4,i,j + 1,k) + (d
|
||||
&t * tx2 * (u(2,i + 1,j,k) * rmi) - dt * tx1 * (c34__24 * rmi) - dt
|
||||
& * tx1 * dx4) * rsd(4,i + 1,j,k) + 0.0d+00 * rsd(5,i,j + 1,k) + 0.
|
||||
&0d+00 * rsd(5,i + 1,j,k))
|
||||
tmat(4,1) = dt * 2.0d+00 * ((-(tx1)) - ty1 - tz1 * r43
|
||||
&__26) * (c34__24 * ro * ro * u(4,i,j,k))
|
||||
tmat(4,2) = 0.0d+00
|
||||
tmat(4,3) = 0.0d+00
|
||||
tmat(4,4) = 1.0d+00 + dt * 2.0d+00 * c34__24 * ro * (t
|
||||
&x1 + ty1 + tz1 * r43__26) + dt * 2.0d+00 * (tx1 * dx4 + ty1 * dy4
|
||||
&+ tz1 * dz4)
|
||||
tmat(4,5) = 0.0d+00
|
||||
tv(5) = omega * ((dt * tz2 * ((c2 * 2.0d0 * qs(i,j,k +
|
||||
& 1) - c1 * u(5,i,j,k + 1)) * (u(4,i,j,k + 1) * rmk * rmk)) - dt *
|
||||
&tz1 * ((-(c34__24 - c1345__25)) * rmk * rmk * rmk * u(2,i,j,k + 1)
|
||||
&** 2 - (c34__24 - c1345__25) * rmk * rmk * rmk * u(3,i,j,k + 1)**
|
||||
&2 - (r43__26 * c34__24 - c1345__25) * rmk * rmk * rmk * u(4,i,j,k
|
||||
&+ 1)** 2 - c1345__25 * rmk * rmk * u(5,i,j,k + 1))) * rsd(1,i,j,k
|
||||
&+ 1) + (dt * tz2 * ((-(c2)) * (u(2,i,j,k + 1) * u(4,i,j,k + 1)) *
|
||||
&rmk * rmk) - dt * tz1 * (c34__24 - c1345__25) * rmk * rmk * u(2,i,
|
||||
&j,k + 1)) * rsd(2,i,j,k + 1) + (dt * tz2 * ((-(c2)) * (u(3,i,j,k +
|
||||
& 1) * u(4,i,j,k + 1)) * rmk * rmk) - dt * tz1 * (c34__24 - c1345__
|
||||
&25) * rmk * rmk * u(3,i,j,k + 1)) * rsd(3,i,j,k + 1) + (dt * tz2 *
|
||||
& (c1 * (u(5,i,j,k + 1) * rmk) - c2 * (qs(i,j,k + 1) * rmk + u(4,i,
|
||||
&j,k + 1) * u(4,i,j,k + 1) * rmk * rmk)) - dt * tz1 * (r43__26 * c3
|
||||
&4__24 - c1345__25) * rmk * rmk * u(4,i,j,k + 1)) * rsd(4,i,j,k + 1
|
||||
&) + (dt * tz2 * (c1 * (u(4,i,j,k + 1) * rmk)) - dt * tz1 * c1345__
|
||||
&25 * rmk - dt * tz1 * dz5) * rsd(5,i,j,k + 1))
|
||||
tv(5) = tv(5) + omega * ((dt * ty2 * ((c2 * 2.0d0 * qs
|
||||
&(i,j + 1,k) - c1 * u(5,i,j + 1,k)) * (u(3,i,j + 1,k) * rmj * rmj))
|
||||
& - dt * ty1 * ((-(c34__24 - c1345__25)) * rmj * rmj * rmj * u(2,i,
|
||||
&j + 1,k)** 2 - (r43__26 * c34__24 - c1345__25) * rmj * rmj * rmj *
|
||||
& u(3,i,j + 1,k)** 2 - (c34__24 - c1345__25) * rmj * rmj * rmj * u(
|
||||
&4,i,j + 1,k)** 2 - c1345__25 * rmj * rmj * u(5,i,j + 1,k))) * rsd(
|
||||
&1,i,j + 1,k) + (dt * tx2 * ((c2 * 2.0d0 * qs(i + 1,j,k) - c1 * u(5
|
||||
&,i + 1,j,k)) * (u(2,i + 1,j,k) * rmi * rmi)) - dt * tx1 * ((-(r43_
|
||||
&_26 * c34__24 - c1345__25)) * rmi * rmi * rmi * u(2,i + 1,j,k)** 2
|
||||
& - (c34__24 - c1345__25) * rmi * rmi * rmi * u(3,i + 1,j,k)** 2 -
|
||||
&(c34__24 - c1345__25) * rmi * rmi * rmi * u(4,i + 1,j,k)** 2 - c13
|
||||
&45__25 * rmi * rmi * u(5,i + 1,j,k))) * rsd(1,i + 1,j,k) + (dt * t
|
||||
&y2 * ((-(c2)) * (u(2,i,j + 1,k) * u(3,i,j + 1,k)) * rmj * rmj) - d
|
||||
&t * ty1 * (c34__24 - c1345__25) * rmj * rmj * u(2,i,j + 1,k)) * rs
|
||||
&d(2,i,j + 1,k) + (dt * tx2 * (c1 * (u(5,i + 1,j,k) * rmi) - c2 * (
|
||||
&u(2,i + 1,j,k) * u(2,i + 1,j,k) * rmi * rmi + qs(i + 1,j,k) * rmi)
|
||||
&) - dt * tx1 * (r43__26 * c34__24 - c1345__25) * rmi * rmi * u(2,i
|
||||
& + 1,j,k)) * rsd(2,i + 1,j,k) + (dt * ty2 * (c1 * (u(5,i,j + 1,k)
|
||||
&* rmj) - c2 * (qs(i,j + 1,k) * rmj + u(3,i,j + 1,k) * u(3,i,j + 1,
|
||||
&k) * rmj * rmj)) - dt * ty1 * (r43__26 * c34__24 - c1345__25) * rm
|
||||
&j * rmj * u(3,i,j + 1,k)) * rsd(3,i,j + 1,k) + (dt * tx2 * ((-(c2)
|
||||
&) * (u(3,i + 1,j,k) * u(2,i + 1,j,k)) * rmi * rmi) - dt * tx1 * (c
|
||||
&34__24 - c1345__25) * rmi * rmi * u(3,i + 1,j,k)) * rsd(3,i + 1,j,
|
||||
&k) + (dt * ty2 * ((-(c2)) * (u(3,i,j + 1,k) * u(4,i,j + 1,k)) * rm
|
||||
&j * rmj) - dt * ty1 * (c34__24 - c1345__25) * rmj * rmj * u(4,i,j
|
||||
&+ 1,k)) * rsd(4,i,j + 1,k) + (dt * tx2 * ((-(c2)) * (u(4,i + 1,j,k
|
||||
&) * u(2,i + 1,j,k)) * rmi * rmi) - dt * tx1 * (c34__24 - c1345__25
|
||||
&) * rmi * rmi * u(4,i + 1,j,k)) * rsd(4,i + 1,j,k) + (dt * ty2 * (
|
||||
&c1 * (u(3,i,j + 1,k) * rmj)) - dt * ty1 * c1345__25 * rmj - dt * t
|
||||
&y1 * dy5) * rsd(5,i,j + 1,k) + (dt * tx2 * (c1 * (u(2,i + 1,j,k) *
|
||||
& rmi)) - dt * tx1 * c1345__25 * rmi - dt * tx1 * dx5) * rsd(5,i +
|
||||
&1,j,k))
|
||||
tmat(5,1) = (-(dt)) * 2.0d+00 * (((tx1 * (r43__26 * c3
|
||||
&4__24 - c1345__25) + ty1 * (c34__24 - c1345__25) + tz1 * (c34__24
|
||||
&- c1345__25)) * u(2,i,j,k)** 2 + (tx1 * (c34__24 - c1345__25) + ty
|
||||
&1 * (r43__26 * c34__24 - c1345__25) + tz1 * (c34__24 - c1345__25))
|
||||
& * u(3,i,j,k)** 2 + (tx1 * (c34__24 - c1345__25) + ty1 * (c34__24
|
||||
&- c1345__25) + tz1 * (r43__26 * c34__24 - c1345__25)) * u(4,i,j,k)
|
||||
&** 2) * ro * ro * ro + (tx1 + ty1 + tz1) * c1345__25 * ro * ro * u
|
||||
&(5,i,j,k))
|
||||
tmat(5,2) = dt * 2.0d+00 * (tx1 * (r43__26 * c34__24 -
|
||||
& c1345__25) + ty1 * (c34__24 - c1345__25) + tz1 * (c34__24 - c1345
|
||||
&__25)) * ro * ro * u(2,i,j,k)
|
||||
tmat(5,3) = dt * 2.0d+00 * (tx1 * (c34__24 - c1345__25
|
||||
&) + ty1 * (r43__26 * c34__24 - c1345__25) + tz1 * (c34__24 - c1345
|
||||
&__25)) * ro * ro * u(3,i,j,k)
|
||||
tmat(5,4) = dt * 2.0d+00 * (tx1 * (c34__24 - c1345__25
|
||||
&) + ty1 * (c34__24 - c1345__25) + tz1 * (r43__26 * c34__24 - c1345
|
||||
&__25)) * ro * ro * u(4,i,j,k)
|
||||
tmat(5,5) = 1.0d+00 + dt * 2.0d+00 * (tx1 + ty1 + tz1)
|
||||
& * c1345__25 * ro + dt * 2.0d+00 * (tx1 * dx5 + ty1 * dy5 + tz1 *
|
||||
&dz5)
|
||||
tmp1 = 1.0d+00 / tmat(1,1)
|
||||
tmp2 = tmp1 * tmat(2,1)
|
||||
tmat(2,2) = tmat(2,2) - tmp2 * tmat(1,2)
|
||||
tmat(2,3) = tmat(2,3) - tmp2 * tmat(1,3)
|
||||
tmat(2,4) = tmat(2,4) - tmp2 * tmat(1,4)
|
||||
tmat(2,5) = tmat(2,5) - tmp2 * tmat(1,5)
|
||||
tv(2) = tv(2) - tv(1) * tmp2
|
||||
tmp2 = tmp1 * tmat(3,1)
|
||||
tmat(3,2) = tmat(3,2) - tmp2 * tmat(1,2)
|
||||
tmat(3,3) = tmat(3,3) - tmp2 * tmat(1,3)
|
||||
tmat(3,4) = tmat(3,4) - tmp2 * tmat(1,4)
|
||||
tmat(3,5) = tmat(3,5) - tmp2 * tmat(1,5)
|
||||
tv(3) = tv(3) - tv(1) * tmp2
|
||||
tmp2 = tmp1 * tmat(4,1)
|
||||
tmat(4,2) = tmat(4,2) - tmp2 * tmat(1,2)
|
||||
tmat(4,3) = tmat(4,3) - tmp2 * tmat(1,3)
|
||||
tmat(4,4) = tmat(4,4) - tmp2 * tmat(1,4)
|
||||
tmat(4,5) = tmat(4,5) - tmp2 * tmat(1,5)
|
||||
tv(4) = tv(4) - tv(1) * tmp2
|
||||
tmp2 = tmp1 * tmat(5,1)
|
||||
tmat(5,2) = tmat(5,2) - tmp2 * tmat(1,2)
|
||||
tmat(5,3) = tmat(5,3) - tmp2 * tmat(1,3)
|
||||
tmat(5,4) = tmat(5,4) - tmp2 * tmat(1,4)
|
||||
tmat(5,5) = tmat(5,5) - tmp2 * tmat(1,5)
|
||||
tv(5) = tv(5) - tv(1) * tmp2
|
||||
tmp1 = 1.0d+00 / tmat(2,2)
|
||||
tmp2 = tmp1 * tmat(3,2)
|
||||
tmat(3,3) = tmat(3,3) - tmp2 * tmat(2,3)
|
||||
tmat(3,4) = tmat(3,4) - tmp2 * tmat(2,4)
|
||||
tmat(3,5) = tmat(3,5) - tmp2 * tmat(2,5)
|
||||
tv(3) = tv(3) - tv(2) * tmp2
|
||||
tmp2 = tmp1 * tmat(4,2)
|
||||
tmat(4,3) = tmat(4,3) - tmp2 * tmat(2,3)
|
||||
tmat(4,4) = tmat(4,4) - tmp2 * tmat(2,4)
|
||||
tmat(4,5) = tmat(4,5) - tmp2 * tmat(2,5)
|
||||
tv(4) = tv(4) - tv(2) * tmp2
|
||||
tmp2 = tmp1 * tmat(5,2)
|
||||
tmat(5,3) = tmat(5,3) - tmp2 * tmat(2,3)
|
||||
tmat(5,4) = tmat(5,4) - tmp2 * tmat(2,4)
|
||||
tmat(5,5) = tmat(5,5) - tmp2 * tmat(2,5)
|
||||
tv(5) = tv(5) - tv(2) * tmp2
|
||||
tmp1 = 1.0d+00 / tmat(3,3)
|
||||
tmp2 = tmp1 * tmat(4,3)
|
||||
tmat(4,4) = tmat(4,4) - tmp2 * tmat(3,4)
|
||||
tmat(4,5) = tmat(4,5) - tmp2 * tmat(3,5)
|
||||
tv(4) = tv(4) - tv(3) * tmp2
|
||||
tmp2 = tmp1 * tmat(5,3)
|
||||
tmat(5,4) = tmat(5,4) - tmp2 * tmat(3,4)
|
||||
tmat(5,5) = tmat(5,5) - tmp2 * tmat(3,5)
|
||||
tv(5) = tv(5) - tv(3) * tmp2
|
||||
tmp1 = 1.0d+00 / tmat(4,4)
|
||||
tmp2 = tmp1 * tmat(5,4)
|
||||
tmat(5,5) = tmat(5,5) - tmp2 * tmat(4,5)
|
||||
tv(5) = tv(5) - tv(4) * tmp2
|
||||
tv(5) = tv(5) / tmat(5,5)
|
||||
tv(4) = tv(4) - tmat(4,5) * tv(5)
|
||||
tv(4) = tv(4) / tmat(4,4)
|
||||
tv(3) = tv(3) - tmat(3,4) * tv(4) - tmat(3,5) * tv(5)
|
||||
tv(3) = tv(3) / tmat(3,3)
|
||||
tv(2) = tv(2) - tmat(2,3) * tv(3) - tmat(2,4) * tv(4)
|
||||
&- tmat(2,5) * tv(5)
|
||||
tv(2) = tv(2) / tmat(2,2)
|
||||
tv(1) = tv(1) - tmat(1,2) * tv(2) - tmat(1,3) * tv(3)
|
||||
&- tmat(1,4) * tv(4) - tmat(1,5) * tv(5)
|
||||
tv(1) = tv(1) / tmat(1,1)
|
||||
rsd(1,i,j,k) = rsd(1,i,j,k) - tv(1)
|
||||
rsd(2,i,j,k) = rsd(2,i,j,k) - tv(2)
|
||||
rsd(3,i,j,k) = rsd(3,i,j,k) - tv(3)
|
||||
rsd(4,i,j,k) = rsd(4,i,j,k) - tv(4)
|
||||
rsd(5,i,j,k) = rsd(5,i,j,k) - tv(5)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! update the variables
|
||||
!---------------------------------------------------------------------
|
||||
! if (timeron) then
|
||||
! call timer_start(10)
|
||||
! endif
|
||||
!DVM$ PARALLEL (k,j,i,m) ON u(m,i,j,k), PRIVATE (j,m,i,k)
|
||||
do k = 1,nz
|
||||
do j = jst,jend
|
||||
do i = ist,iend
|
||||
do m = 1,5
|
||||
u(m,i,j,k) = u(m,i,j,k) + tmp * rsd(m,i,j,k)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ END REGION
|
||||
|
||||
! if (timeron) then
|
||||
! call timer_stop(10)
|
||||
! endif
|
||||
!---------------------------------------------------------------------
|
||||
! compute the steady-state residuals
|
||||
!---------------------------------------------------------------------
|
||||
!DVM$ END INTERVAL
|
||||
call rhs()
|
||||
mod_522_2 = mod (istep,inorm)
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! compute the max-norms of newton iteration residuals
|
||||
!---------------------------------------------------------------------
|
||||
if (mod_522_2 .eq. 0 .or. istep .eq. itmax) then
|
||||
if (timeron) then
|
||||
call timer_start(11)
|
||||
endif
|
||||
call l2norm(isiz1,isiz2,isiz3,nx0,ny0,nz0,ist,iend,jst,jend,
|
||||
&rsd,rsdnm)
|
||||
if (timeron) then
|
||||
call timer_stop(11)
|
||||
endif
|
||||
|
||||
! if ( ipr .eq. 1 ) then
|
||||
! write (*,1007) ( rsdnm(m), m = 1, 5 )
|
||||
! end if
|
||||
endif
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! check the newton-iteration residuals against the tolerance levels
|
||||
!---------------------------------------------------------------------
|
||||
if (rsdnm(1) .lt. tolrsd(1) .and. rsdnm(2) .lt. tolrsd(2) .and.
|
||||
& rsdnm(3) .lt. tolrsd(3) .and. rsdnm(4) .lt. tolrsd(4) .and. rsdnm
|
||||
&(5) .lt. tolrsd(5)) then
|
||||
|
||||
! if (ipr .eq. 1 ) then
|
||||
write (unit = *,fmt = 1004) istep
|
||||
|
||||
! end if
|
||||
goto 900
|
||||
endif
|
||||
enddo
|
||||
900 continue
|
||||
call timer_stop(1)
|
||||
maxtime = timer_read (1)
|
||||
return
|
||||
1001 FORMAT (1X/5X,'pseudo-time SSOR iteration no.=',I4/)
|
||||
1004 FORMAT (1X/1X,'convergence was achieved after ',I4, ' pseudo-tim
|
||||
&e steps' )
|
||||
1006 FORMAT (1X/1X,'RMS-norm of SSOR-iteration correction ', 'for first
|
||||
& pde = ',1PE12.5/, 1X,'RMS-norm of SSOR-iteration correction ', '
|
||||
&for second pde = ',1PE12.5/, 1X,'RMS-norm of SSOR-iteration correc
|
||||
&tion ', 'for third pde = ',1PE12.5/, 1X,'RMS-norm of SSOR-iterati
|
||||
&on correction ', 'for fourth pde = ',1PE12.5/, 1X,'RMS-norm of SSO
|
||||
&R-iteration correction ', 'for fifth pde = ',1PE12.5)
|
||||
1007 FORMAT (1X/1X,'RMS-norm of steady-state residual for ', 'first pde
|
||||
& = ',1PE12.5/, 1X,'RMS-norm of steady-state residual for ', 'seco
|
||||
&nd pde = ',1PE12.5/, 1X,'RMS-norm of steady-state residual for ',
|
||||
&'third pde = ',1PE12.5/, 1X,'RMS-norm of steady-state residual fo
|
||||
&r ', 'fourth pde = ',1PE12.5/, 1X,'RMS-norm of steady-state residu
|
||||
&al for ', 'fifth pde = ',1PE12.5)
|
||||
end
|
||||
|
||||
@@ -0,0 +1,97 @@
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
subroutine timer_clear(n)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
implicit none
|
||||
integer n
|
||||
|
||||
double precision start(64), elapsed(64)
|
||||
common /tt/ start, elapsed
|
||||
|
||||
elapsed(n) = 0.0
|
||||
return
|
||||
end
|
||||
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
subroutine timer_start(n)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
implicit none
|
||||
external elapsed_time
|
||||
double precision elapsed_time
|
||||
integer n
|
||||
double precision start(64), elapsed(64)
|
||||
common /tt/ start, elapsed
|
||||
|
||||
start(n) = elapsed_time()
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
subroutine timer_stop(n)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
implicit none
|
||||
external elapsed_time
|
||||
double precision elapsed_time
|
||||
integer n
|
||||
double precision start(64), elapsed(64)
|
||||
common /tt/ start, elapsed
|
||||
double precision t, now
|
||||
now = elapsed_time()
|
||||
t = now - start(n)
|
||||
elapsed(n) = elapsed(n) + t
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
double precision function timer_read(n)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
implicit none
|
||||
integer n
|
||||
double precision start(64), elapsed(64)
|
||||
common /tt/ start, elapsed
|
||||
|
||||
timer_read = elapsed(n)
|
||||
return
|
||||
end
|
||||
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
double precision function elapsed_time()
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
implicit none
|
||||
|
||||
double precision dvtime
|
||||
elapsed_time = dvtime()
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
@@ -0,0 +1,382 @@
|
||||
|
||||
! *** generated by SAPFOR with version 1124 and build date: Apr 18 2019 20:56:38
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
subroutine verify (xcr, xce, xci, class, verified)
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! verification routine
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!--- applu.incl
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! npbparams.h defines parameters that depend on the class and
|
||||
! number of nodes
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!--- applu.incl
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! npbparams.h defines parameters that depend on the class and
|
||||
! number of nodes
|
||||
!---------------------------------------------------------------------
|
||||
implicit none
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! end of include file
|
||||
!---------------------------------------------------------------------
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! end of include file
|
||||
include 'applu.incl'
|
||||
!---------------------------------------------------------------------
|
||||
double precision xcr(5),xce(5),xci
|
||||
double precision xcrref(5),xceref(5),xciref,xcrdif(5),xcedif(5),x
|
||||
&cidif,epsilon,dtref
|
||||
integer m
|
||||
character class
|
||||
logical verified
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! tolerance level
|
||||
!---------------------------------------------------------------------
|
||||
epsilon = 1.0d-08
|
||||
class = 'U'
|
||||
verified = .TRUE.
|
||||
do m = 1,5
|
||||
xcrref(m) = 1.0
|
||||
xceref(m) = 1.0
|
||||
enddo
|
||||
xciref = 1.0
|
||||
if (nx0 .eq. 12 .and. ny0 .eq. 12 .and. nz0 .eq. 12 .and. itmax .e
|
||||
&q. 50) then
|
||||
class = 'S'
|
||||
dtref = 5.0d-1
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Reference values of RMS-norms of residual, for the (12X12X12) grid,
|
||||
! after 50 time steps, with DT = 5.0d-01
|
||||
!---------------------------------------------------------------------
|
||||
xcrref(1) = 1.6196343210976702d-02
|
||||
xcrref(2) = 2.1976745164821318d-03
|
||||
xcrref(3) = 1.5179927653399185d-03
|
||||
xcrref(4) = 1.5029584435994323d-03
|
||||
xcrref(5) = 3.4264073155896461d-02
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Reference values of RMS-norms of solution error, for the (12X12X12) grid,
|
||||
! after 50 time steps, with DT = 5.0d-01
|
||||
!---------------------------------------------------------------------
|
||||
xceref(1) = 6.4223319957960924d-04
|
||||
xceref(2) = 8.4144342047347926d-05
|
||||
xceref(3) = 5.8588269616485186d-05
|
||||
xceref(4) = 5.8474222595157350d-05
|
||||
xceref(5) = 1.3103347914111294d-03
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Reference value of surface integral, for the (12X12X12) grid,
|
||||
! after 50 time steps, with DT = 5.0d-01
|
||||
!---------------------------------------------------------------------
|
||||
xciref = 7.8418928865937083d+00
|
||||
else if (nx0 .eq. 33 .and. ny0 .eq. 33 .and. nz0 .eq. 33 .and. itm
|
||||
&ax .eq. 300) then
|
||||
|
||||
!SPEC95fp size
|
||||
class = 'W'
|
||||
dtref = 1.5d-3
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Reference values of RMS-norms of residual, for the (33x33x33) grid,
|
||||
! after 300 time steps, with DT = 1.5d-3
|
||||
!---------------------------------------------------------------------
|
||||
xcrref(1) = 0.1236511638192d+02
|
||||
xcrref(2) = 0.1317228477799d+01
|
||||
xcrref(3) = 0.2550120713095d+01
|
||||
xcrref(4) = 0.2326187750252d+01
|
||||
xcrref(5) = 0.2826799444189d+02
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Reference values of RMS-norms of solution error, for the (33X33X33) grid,
|
||||
!---------------------------------------------------------------------
|
||||
xceref(1) = 0.4867877144216d+00
|
||||
xceref(2) = 0.5064652880982d-01
|
||||
xceref(3) = 0.9281818101960d-01
|
||||
xceref(4) = 0.8570126542733d-01
|
||||
xceref(5) = 0.1084277417792d+01
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Reference value of surface integral, for the (33X33X33) grid,
|
||||
! after 300 time steps, with DT = 1.5d-3
|
||||
!---------------------------------------------------------------------
|
||||
xciref = 0.1161399311023d+02
|
||||
else if (nx0 .eq. 64 .and. ny0 .eq. 64 .and. nz0 .eq. 64 .and. itm
|
||||
&ax .eq. 250) then
|
||||
class = 'A'
|
||||
dtref = 2.0d+0
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Reference values of RMS-norms of residual, for the (64X64X64) grid,
|
||||
! after 250 time steps, with DT = 2.0d+00
|
||||
!---------------------------------------------------------------------
|
||||
xcrref(1) = 7.7902107606689367d+02
|
||||
xcrref(2) = 6.3402765259692870d+01
|
||||
xcrref(3) = 1.9499249727292479d+02
|
||||
xcrref(4) = 1.7845301160418537d+02
|
||||
xcrref(5) = 1.8384760349464247d+03
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Reference values of RMS-norms of solution error, for the (64X64X64) grid,
|
||||
! after 250 time steps, with DT = 2.0d+00
|
||||
!---------------------------------------------------------------------
|
||||
xceref(1) = 2.9964085685471943d+01
|
||||
xceref(2) = 2.8194576365003349d+00
|
||||
xceref(3) = 7.3473412698774742d+00
|
||||
xceref(4) = 6.7139225687777051d+00
|
||||
xceref(5) = 7.0715315688392578d+01
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Reference value of surface integral, for the (64X64X64) grid,
|
||||
! after 250 time steps, with DT = 2.0d+00
|
||||
!---------------------------------------------------------------------
|
||||
xciref = 2.6030925604886277d+01
|
||||
else if (nx0 .eq. 102 .and. ny0 .eq. 102 .and. nz0 .eq. 102 .and.
|
||||
&itmax .eq. 250) then
|
||||
class = 'B'
|
||||
dtref = 2.0d+0
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Reference values of RMS-norms of residual, for the (102X102X102) grid,
|
||||
! after 250 time steps, with DT = 2.0d+00
|
||||
!---------------------------------------------------------------------
|
||||
xcrref(1) = 3.5532672969982736d+03
|
||||
xcrref(2) = 2.6214750795310692d+02
|
||||
xcrref(3) = 8.8333721850952190d+02
|
||||
xcrref(4) = 7.7812774739425265d+02
|
||||
xcrref(5) = 7.3087969592545314d+03
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Reference values of RMS-norms of solution error, for the (102X102X102)
|
||||
! grid, after 250 time steps, with DT = 2.0d+00
|
||||
!---------------------------------------------------------------------
|
||||
xceref(1) = 1.1401176380212709d+02
|
||||
xceref(2) = 8.1098963655421574d+00
|
||||
xceref(3) = 2.8480597317698308d+01
|
||||
xceref(4) = 2.5905394567832939d+01
|
||||
xceref(5) = 2.6054907504857413d+02
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Reference value of surface integral, for the (102X102X102) grid,
|
||||
! after 250 time steps, with DT = 2.0d+00
|
||||
!---------------------------------------------------------------------
|
||||
xciref = 4.7887162703308227d+01
|
||||
else if (nx0 .eq. 162 .and. ny0 .eq. 162 .and. nz0 .eq. 162 .and.
|
||||
&itmax .eq. 250) then
|
||||
class = 'C'
|
||||
dtref = 2.0d+0
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Reference values of RMS-norms of residual, for the (162X162X162) grid,
|
||||
! after 250 time steps, with DT = 2.0d+00
|
||||
!---------------------------------------------------------------------
|
||||
xcrref(1) = 1.03766980323537846d+04
|
||||
xcrref(2) = 8.92212458801008552d+02
|
||||
xcrref(3) = 2.56238814582660871d+03
|
||||
xcrref(4) = 2.19194343857831427d+03
|
||||
xcrref(5) = 1.78078057261061185d+04
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Reference values of RMS-norms of solution error, for the (162X162X162)
|
||||
! grid, after 250 time steps, with DT = 2.0d+00
|
||||
!---------------------------------------------------------------------
|
||||
xceref(1) = 2.15986399716949279d+02
|
||||
xceref(2) = 1.55789559239863600d+01
|
||||
xceref(3) = 5.41318863077207766d+01
|
||||
xceref(4) = 4.82262643154045421d+01
|
||||
xceref(5) = 4.55902910043250358d+02
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Reference value of surface integral, for the (162X162X162) grid,
|
||||
! after 250 time steps, with DT = 2.0d+00
|
||||
!---------------------------------------------------------------------
|
||||
xciref = 6.66404553572181300d+01
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Reference value of surface integral, for the (162X162X162) grid,
|
||||
! after 250 time steps, with DT = 2.0d+00
|
||||
!---------------------------------------------------------------------
|
||||
xciref = 6.66404553572181300d+01
|
||||
else if (nx0 .eq. 408 .and. ny0 .eq. 408 .and. nz0 .eq. 408 .and.
|
||||
&itmax .eq. 300) then
|
||||
class = 'D'
|
||||
dtref = 1.0d+0
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Reference values of RMS-norms of residual, for the (408X408X408) grid,
|
||||
! after 300 time steps, with DT = 1.0d+00
|
||||
!---------------------------------------------------------------------
|
||||
xcrref(1) = 0.4868417937025d+05
|
||||
xcrref(2) = 0.4696371050071d+04
|
||||
xcrref(3) = 0.1218114549776d+05
|
||||
xcrref(4) = 0.1033801493461d+05
|
||||
xcrref(5) = 0.7142398413817d+05
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Reference values of RMS-norms of solution error, for the (408X408X408)
|
||||
! grid, after 300 time steps, with DT = 1.0d+00
|
||||
!---------------------------------------------------------------------
|
||||
xceref(1) = 0.3752393004482d+03
|
||||
xceref(2) = 0.3084128893659d+02
|
||||
xceref(3) = 0.9434276905469d+02
|
||||
xceref(4) = 0.8230686681928d+02
|
||||
xceref(5) = 0.7002620636210d+03
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Reference value of surface integral, for the (408X408X408) grid,
|
||||
! after 300 time steps, with DT = 1.0d+00
|
||||
!---------------------------------------------------------------------
|
||||
xciref = 0.8334101392503d+02
|
||||
else if (nx0 .eq. 1020 .and. ny0 .eq. 1020 .and. nz0 .eq. 1020 .an
|
||||
&d. itmax .eq. 300) then
|
||||
class = 'E'
|
||||
dtref = 0.5d+0
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Reference values of RMS-norms of residual, for the (1020X1020X1020) grid,
|
||||
! after 300 time steps, with DT = 0.5d+00
|
||||
!---------------------------------------------------------------------
|
||||
xcrref(1) = 0.2099641687874d+06
|
||||
xcrref(2) = 0.2130403143165d+05
|
||||
xcrref(3) = 0.5319228789371d+05
|
||||
xcrref(4) = 0.4509761639833d+05
|
||||
xcrref(5) = 0.2932360006590d+06
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Reference values of RMS-norms of solution error, for the (1020X1020X1020)
|
||||
! grid, after 300 time steps, with DT = 0.5d+00
|
||||
!---------------------------------------------------------------------
|
||||
xceref(1) = 0.4800572578333d+03
|
||||
xceref(2) = 0.4221993400184d+02
|
||||
xceref(3) = 0.1210851906824d+03
|
||||
xceref(4) = 0.1047888986770d+03
|
||||
xceref(5) = 0.8363028257389d+03
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Reference value of surface integral, for the (1020X1020X1020) grid,
|
||||
! after 300 time steps, with DT = 0.5d+00
|
||||
!---------------------------------------------------------------------
|
||||
xciref = 0.9512163272273d+02
|
||||
else
|
||||
verified = .FALSE.
|
||||
endif
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! verification test for residuals if gridsize is one of
|
||||
! the defined grid sizes above (class .ne. 'U')
|
||||
!---------------------------------------------------------------------
|
||||
!---------------------------------------------------------------------
|
||||
! Compute the difference of solution values and the known reference values.
|
||||
!---------------------------------------------------------------------
|
||||
do m = 1,5
|
||||
xcrdif(m) = dabs ((xcr(m) - xcrref(m)) / xcrref(m))
|
||||
xcedif(m) = dabs ((xce(m) - xceref(m)) / xceref(m))
|
||||
enddo
|
||||
xcidif = dabs ((xci - xciref) / xciref)
|
||||
|
||||
!---------------------------------------------------------------------
|
||||
! Output the comparison of computed results to known cases.
|
||||
!---------------------------------------------------------------------
|
||||
if (class .ne. 'U') then
|
||||
write (unit = *,fmt = 1990) class
|
||||
1990 format(/, ' Verification being performed for class ', a
|
||||
&)
|
||||
write (unit = *,fmt = 2000) epsilon
|
||||
2000 format(' Accuracy setting for epsilon = ', E20.13)
|
||||
verified = dabs (dt - dtref) .le. epsilon
|
||||
if (.not.(verified)) then
|
||||
class = 'U'
|
||||
write (unit = *,fmt = 1000) dtref
|
||||
1000 format(' DT does not match the reference value
|
||||
& of ', E15.8)
|
||||
endif
|
||||
else
|
||||
write (unit = *,fmt = 1995)
|
||||
1995 format(' Unknown class')
|
||||
endif
|
||||
if (class .ne. 'U') then
|
||||
write (unit = *,fmt = 2001)
|
||||
else
|
||||
write (unit = *,fmt = 2005)
|
||||
endif
|
||||
2001 format(' Comparison of RMS-norms of residual')
|
||||
2005 format(' RMS-norms of residual')
|
||||
do m = 1,5
|
||||
if (class .eq. 'U') then
|
||||
write (unit = *,fmt = 2015) m,xcr(m)
|
||||
else if (xcrdif(m) .le. epsilon) then
|
||||
write (unit = *,fmt = 2011) m,xcr(m),xcrref(m),xcrdif(m)
|
||||
else
|
||||
verified = .FALSE.
|
||||
write (unit = *,fmt = 2010) m,xcr(m),xcrref(m),xcrdif(m)
|
||||
endif
|
||||
enddo
|
||||
if (class .ne. 'U') then
|
||||
write (unit = *,fmt = 2002)
|
||||
else
|
||||
write (unit = *,fmt = 2006)
|
||||
endif
|
||||
2002 format(' Comparison of RMS-norms of solution error')
|
||||
2006 format(' RMS-norms of solution error')
|
||||
do m = 1,5
|
||||
if (class .eq. 'U') then
|
||||
write (unit = *,fmt = 2015) m,xce(m)
|
||||
else if (xcedif(m) .le. epsilon) then
|
||||
write (unit = *,fmt = 2011) m,xce(m),xceref(m),xcedif(m)
|
||||
else
|
||||
verified = .FALSE.
|
||||
write (unit = *,fmt = 2010) m,xce(m),xceref(m),xcedif(m)
|
||||
endif
|
||||
enddo
|
||||
2010 format(' FAILURE: ', i2, 2x, E20.13, E20.13, E20.13)
|
||||
2011 format(' ', i2, 2x, E20.13, E20.13, E20.13)
|
||||
2015 format(' ', i2, 2x, E20.13)
|
||||
if (class .ne. 'U') then
|
||||
write (unit = *,fmt = 2025)
|
||||
else
|
||||
write (unit = *,fmt = 2026)
|
||||
endif
|
||||
2025 format(' Comparison of surface integral')
|
||||
2026 format(' Surface integral')
|
||||
if (class .eq. 'U') then
|
||||
write (unit = *,fmt = 2030) xci
|
||||
else if (xcidif .le. epsilon) then
|
||||
write (unit = *,fmt = 2032) xci,xciref,xcidif
|
||||
else
|
||||
verified = .FALSE.
|
||||
write (unit = *,fmt = 2031) xci,xciref,xcidif
|
||||
endif
|
||||
2030 format(' ', 4x, E20.13)
|
||||
2031 format(' FAILURE: ', 4x, E20.13, E20.13, E20.13)
|
||||
2032 format(' ', 4x, E20.13, E20.13, E20.13)
|
||||
if (class .eq. 'U') then
|
||||
write (unit = *,fmt = 2022)
|
||||
write (unit = *,fmt = 2023)
|
||||
2022 format(' No reference values provided')
|
||||
2023 format(' No verification performed')
|
||||
else if (verified) then
|
||||
write (unit = *,fmt = 2020)
|
||||
2020 format(' Verification Successful')
|
||||
else
|
||||
write (unit = *,fmt = 2021)
|
||||
2021 format(' Verification failed')
|
||||
endif
|
||||
return
|
||||
end
|
||||
|
||||
@@ -0,0 +1,31 @@
|
||||
SHELL=/bin/sh
|
||||
BENCHMARK=mg
|
||||
BENCHMARKU=MG
|
||||
|
||||
include ../config/make.def
|
||||
include ../sys/make.common
|
||||
|
||||
SOURCES = mg.fdv \
|
||||
mg3p.fdv \
|
||||
comm3.fdv \
|
||||
interp.fdv \
|
||||
norm2u3.fdv \
|
||||
psinv.fdv \
|
||||
resid.fdv \
|
||||
rjrp3.fdv \
|
||||
setupDVM.fdv \
|
||||
utilities.fdv \
|
||||
zran3.fdv
|
||||
|
||||
OBJS = ${SOURCES:.fdv=.o}
|
||||
|
||||
${PROGRAM}: config $(OBJS)
|
||||
${FLINK} -o ${PROGRAM} ${OBJS}
|
||||
|
||||
%.o: %.fdv npbparams.h globals.h dvmvars.h
|
||||
${F77} ${FFLAGS} -c -o $@ $<
|
||||
|
||||
clean:
|
||||
rm -f npbparams.h
|
||||
rm -f *.o *~
|
||||
rm -f *.cu *.cuf *.c *.f
|
||||
@@ -0,0 +1,12 @@
|
||||
@echo off
|
||||
|
||||
set CLASS=%1
|
||||
set OPT=%2
|
||||
|
||||
CALL ..\sys\setparams MG %CLASS%
|
||||
CALL %F77% %OPT% mg 1>out_%CLASS%.txt 2>err_%CLASS%.txt
|
||||
if exist mg.exe (
|
||||
copy mg.exe %BIN%\mg.%CLASS%.x.exe
|
||||
del mg.exe
|
||||
)
|
||||
|
||||
@@ -0,0 +1,88 @@
|
||||
c---------------------------------------------------------------------
|
||||
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>.
|
||||
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>.
|
||||
c---------------------------------------------------------------------
|
||||
c @param double precission :: u(n1 ,n2 ,n3) - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
c @param integer :: kk - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
c---------------------------------------------------------------------
|
||||
subroutine comm3(u,n1,n2,n3,kk)
|
||||
c---------------------------------------------------------------------
|
||||
!DVM$ INHERIT u
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c comm3 organizes the communication on all borders
|
||||
c---------------------------------------------------------------------
|
||||
implicit none
|
||||
|
||||
include 'globals.h'
|
||||
|
||||
integer n1, n2, n3, kk, i1, i2, i3
|
||||
integer blockX, blockY
|
||||
double precision u(n1,n2,n3)
|
||||
!DVM$ interval 5
|
||||
!DVM$ REGION
|
||||
!, REMOTE_ACCESS (u(n1-1,:,:))
|
||||
!DVM$ PARALLEL (i3,i2) ON u(1,i2,i3)
|
||||
CDVM$& ,cuda_block(128)
|
||||
do i3=2,n3-1
|
||||
do i2=2,n2-1
|
||||
u(1,i2,i3) = u(n1-1,i2,i3)
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!, REMOTE_ACCESS (u(2,:,:))
|
||||
!DVM$ PARALLEL (i3,i2) ON u(n1,i2,i3 )
|
||||
CDVM$& ,cuda_block(128)
|
||||
do i3=2,n3-1
|
||||
do i2=2,n2-1
|
||||
u(n1,i2,i3) = u(2,i2,i3)
|
||||
enddo
|
||||
enddo
|
||||
|
||||
c----------------------------------
|
||||
!, REMOTE_ACCESS (u(:,n2-1,:))
|
||||
|
||||
!DVM$ PARALLEL (i3,i1) ON u(i1,1,i3)
|
||||
CDVM$& ,cuda_block(128)
|
||||
do i3=2,n3-1
|
||||
do i1=1,n1
|
||||
u(i1,1,i3) = u(i1,n2-1,i3)
|
||||
enddo
|
||||
enddo
|
||||
|
||||
! , REMOTE_ACCESS (u(:,2,:))
|
||||
!DVM$ PARALLEL (i3,i1) ON u(i1,n2,i3)
|
||||
CDVM$& ,cuda_block(128)
|
||||
do i3=2,n3-1
|
||||
do i1=1,n1
|
||||
u(i1,n2,i3) = u(i1,2,i3)
|
||||
enddo
|
||||
enddo
|
||||
|
||||
c----------------------------------
|
||||
|
||||
!, REMOTE_ACCESS (u(:,:,n3-1))
|
||||
!DVM$ PARALLEL (i2,i1) ON u(i1,i2,1)
|
||||
CDVM$& ,cuda_block(128)
|
||||
do i2=1,n2
|
||||
do i1=1,n1
|
||||
u(i1,i2,1) = u(i1,i2,n3-1)
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!, REMOTE_ACCESS (u(:,:,2))
|
||||
!DVM$ PARALLEL (i2,i1) ON u(i1,i2,n3)
|
||||
CDVM$& ,cuda_block(128)
|
||||
do i2=1,n2
|
||||
do i1=1,n1
|
||||
u(i1,i2,n3) = u(i1,i2,2)
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ END REGION
|
||||
!DVM$ end interval
|
||||
if (timeron) call timer_stop(T_comm3)
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
@@ -0,0 +1,57 @@
|
||||
c---------------------------------------------------------------------
|
||||
c FDVM specifications
|
||||
c---------------------------------------------------------------------
|
||||
integer p_u_ir(maxlevel) !p_u_ir(k) ≡ u(ir(k))
|
||||
integer p_r_ir(maxlevel) !p_u_ir(k) ≡ r(ir(k))
|
||||
integer pu !pu ≡ u(lt) ≡ p_u_ir(lt)
|
||||
integer pr !pr ≡ r(lt) ≡ p_r_ir(lt)
|
||||
integer pv !pv ≡ v
|
||||
|
||||
integer p_curr_u_k !p_curr_u_k ≡ u(k)
|
||||
integer p_curr_u_j !p_curr_u_j ≡ u(j)
|
||||
|
||||
integer p_curr_r_k !p_curr_r_k ≡ r(k)
|
||||
integer p_curr_r_j !p_curr_r_j ≡ r(j)
|
||||
|
||||
common /pointers/ p_u_ir, p_r_ir
|
||||
common /pointers/ pu, pr, pv
|
||||
common /pointers/ p_curr_r_j, p_curr_r_k
|
||||
common /pointers/ p_curr_u_j, p_curr_u_k
|
||||
|
||||
CDVM$ DOUBLE PRECISION, POINTER(:,:,:) :: p_u_ir, p_r_ir,
|
||||
CDVM$& pu, pr, pv,
|
||||
CDVM$& p_curr_r_j, p_curr_r_k,
|
||||
CDVM$& p_curr_u_j, p_curr_u_k
|
||||
|
||||
CDVM$ ALIGN :: pu, pr, pv,
|
||||
CDVM$& p_u_ir, p_r_ir,
|
||||
CDVM$& p_curr_r_k, p_curr_r_j,
|
||||
CDVM$& p_curr_u_k, p_curr_u_j
|
||||
|
||||
CDVM$ DYNAMIC p_u_ir, p_r_ir,
|
||||
CDVM$& pu, pr, pv,
|
||||
CDVM$& p_curr_r_j, p_curr_r_k,
|
||||
CDVM$& p_curr_u_j, p_curr_u_k
|
||||
|
||||
CDVM$ SHADOW pu(1:1,1:1,1:1)
|
||||
CDVM$ SHADOW pr(1:1,1:1,1:1)
|
||||
|
||||
CDVM$ SHADOW p_curr_r_k(1:1,1:1,1:1)
|
||||
CDVM$ SHADOW p_curr_u_k(1:1,1:1,1:1)
|
||||
CDVM$ SHADOW p_curr_u_j(1:1,1:1,1:1)
|
||||
|
||||
CDVM$ TEMPLATE EXT (nv1, nv2, nv3)
|
||||
CDVM$ DISTRIBUTE EXT (BLOCK, BLOCK, BLOCK)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Distribution from programm
|
||||
c---------------------------------------------------------------------
|
||||
double precision u(nr), r(nr)
|
||||
double precision v(nv)
|
||||
|
||||
common /noautom/ u,r,v
|
||||
CDVM$ HEAP u, r, v
|
||||
|
||||
double precision a(0:3), c(0:3)
|
||||
common /coefficients/ a,c
|
||||
CDVM$ DISTRIBUTE (*) :: a, c
|
||||
@@ -0,0 +1,68 @@
|
||||
c---------------------------------------------------------------------
|
||||
c Parameter lm (declared and set in "npbparams.h") is the log-base2 of
|
||||
c the edge size max for the partition on a given node, so must be changed
|
||||
c either to save space (if running a small case) or made bigger for larger
|
||||
c cases, for example, 512^3. Thus lm=7 means that the largest dimension
|
||||
c of a partition that can be solved on a node is 2^7 = 128. lm is set
|
||||
c automatically in npbparams.h
|
||||
c Parameters ndim1, ndim2, ndim3 are the local problem dimensions.
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
include 'npbparams.h'
|
||||
|
||||
integer nm ! actual dimension including ghost cells for communications
|
||||
c *** type of nv, nr and ir is set in npbparams.h
|
||||
c > , nv ! size of rhs array
|
||||
c > , nr ! size of residual array
|
||||
> , nm2 ! size of communication buffer
|
||||
> , maxlevel! maximum number of levels
|
||||
|
||||
integer nv1, nv2, nv3
|
||||
parameter( nv1=one*(2+2**ndim1) )
|
||||
parameter( nv2=one*(2+2**ndim2) )
|
||||
parameter( nv3=one*(2+2**ndim3) )
|
||||
|
||||
parameter( nm=2+2**lm, maxlevel=(lt_default+1) )
|
||||
parameter( nm2=2*nm*nm)
|
||||
parameter( nv=nv1*nv2*nv3/one/one )
|
||||
parameter( nr = ((nv+nm**2+5*nm+7*lm+6)/7)*8 )
|
||||
c---------------------------------------------------------------------
|
||||
integer nbr(3,-1:1,maxlevel), msg_type(3,-1:1)
|
||||
integer msg_id(3,-1:1,2),nx(maxlevel),ny(maxlevel),nz(maxlevel)
|
||||
common /mg3/ nbr,msg_type,msg_id,nx,ny,nz
|
||||
|
||||
character class
|
||||
common /ClassType/class
|
||||
|
||||
integer debug_vec(0:7)
|
||||
common /my_debug/ debug_vec
|
||||
|
||||
integer m1(maxlevel), m2(maxlevel), m3(maxlevel)
|
||||
integer lt, lb
|
||||
common /fap/ ir(maxlevel),m1,m2,m3,lt,lb
|
||||
|
||||
logical ver
|
||||
! FALSE for GPU and TRUE for CPU
|
||||
parameter (ver = .false. )
|
||||
logical dead(maxlevel), give_ex(3,maxlevel), take_ex(3,maxlevel)
|
||||
common /comm_ex/ dead, give_ex, take_ex
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Set at m=1024, can handle cases up to 1024^3 case
|
||||
c---------------------------------------------------------------------
|
||||
integer m
|
||||
c parameter( m=1037 )
|
||||
parameter( m=nm+1 )
|
||||
|
||||
double precision buff(nm2,4)
|
||||
common /buffer/ buff
|
||||
|
||||
logical timeron
|
||||
common /timers/ timeron
|
||||
integer T_init, T_bench, T_psinv, T_resid, T_rprj3, T_interp,
|
||||
> T_norm2, T_mg3P, T_resid2, T_comm3, T_last
|
||||
parameter (T_init=1, T_bench=2, T_mg3P=3,
|
||||
> T_psinv=4, T_resid=5, T_resid2=6, T_rprj3=7,
|
||||
> T_interp=8, T_norm2=9, T_comm3=10, T_last=10)
|
||||
|
||||
|
||||
@@ -0,0 +1,169 @@
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
|
||||
c---------------------------------------------------------------------
|
||||
c u(h) = u(h) + Q u(H)
|
||||
c <20><><EFBFBD> H = 2h - <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>,
|
||||
c Q - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
c u - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
c---------------------------------------------------------------------
|
||||
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>,
|
||||
c <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> V-<2D><><EFBFBD><EFBFBD><EFBFBD>
|
||||
c---------------------------------------------------------------------
|
||||
c @param double precission :: z(mm1,mm2,mm3) ? u(H) - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
|
||||
c @param double precission :: u(n1 ,n2 ,n3 ) ? u(h) - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
|
||||
c @param integer :: k - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
|
||||
c---------------------------------------------------------------------
|
||||
subroutine interp( z,mm1,mm2,mm3,u,n1,n2,n3,k )
|
||||
c---------------------------------------------------------------------
|
||||
!DVM$ INHERIT z,u
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c interp adds the trilinear interpolation of the correction
|
||||
c from the coarser grid to the current approximation: u = u + Qu'
|
||||
c
|
||||
c Observe that this implementation costs 16A + 4M, where
|
||||
c A and M denote the costs of Addition and Multiplication.
|
||||
c Note that this vectorizes, and is also fine for cache
|
||||
c based machines. Vector machines may get slightly better
|
||||
c performance however, with 8 separate "do i1" loops, rather than 4.
|
||||
c---------------------------------------------------------------------
|
||||
implicit none
|
||||
|
||||
include 'globals.h'
|
||||
|
||||
integer mm1, mm2, mm3, n1, n2, n3,k
|
||||
double precision z(mm1,mm2,mm3),u(n1,n2,n3),z1,z1_p1,z2,z2_p1
|
||||
double precision z3,z3_p1,z4,z4_p1
|
||||
integer i3, i2, i1, d1, d2, d3, t1, t2, t3
|
||||
!DVM$ interval 1
|
||||
if( n1 .ne. 3 .and. n2 .ne. 3 .and. n3 .ne. 3 ) then
|
||||
if ( ver ) then
|
||||
|
||||
!DVM$ REGION
|
||||
!DVM$ PARALLEL (i3,i2) ON u(*, 2*i2, 2*i3),
|
||||
!DVM$& SHADOW_RENEW(z(CORNER)), cuda_block(32,6)
|
||||
!DVM$& ,private(i1,z1,z1_p1, z2,z2_p1, z3,z3_p1, z4,z4_p1)
|
||||
do i3=1,mm3-1
|
||||
do i2=1,mm2-1
|
||||
z1 = z(1,i2,i3)
|
||||
z2 = z(1,i2+1,i3+1)
|
||||
z3 = z(1,i2,i3+1)
|
||||
z4 = z(1,i2+1,i3)
|
||||
do i1=1, mm1-1
|
||||
z1_p1 = z(i1+1,i2,i3)
|
||||
z2_p1 = z(i1+1,i2+1,i3+1)
|
||||
z3_p1 = z(i1+1,i2,i3+1)
|
||||
z4_p1 = z(i1+1,i2+1,i3)
|
||||
u(2*i1-1,2*i2-1,2*i3-1) = u(2*i1-1,2*i2-1,2*i3-1) + z1
|
||||
u(2*i1,2*i2-1,2*i3-1) = u(2*i1,2*i2-1,2*i3-1) +
|
||||
& 0.5d0 * ( z1_p1 + z1 )
|
||||
u(2*i1-1,2*i2,2*i3-1) = u(2*i1-1,2*i2,2*i3-1) +
|
||||
& 0.5d0 * ( z4 + z1 )
|
||||
u(2*i1,2*i2,2*i3-1) = u(2*i1,2*i2,2*i3-1) +
|
||||
& 0.25d0*( z4 + z1 + z4_p1 + z1_p1)
|
||||
u(2*i1-1,2*i2-1,2*i3) = u(2*i1-1,2*i2-1,2*i3) +
|
||||
& +0.5d0 * ( z3 + z1 )
|
||||
u(2*i1,2*i2-1,2*i3) = u(2*i1,2*i2-1,2*i3) +
|
||||
& 0.25d0*( z3 + z1 + z3_p1 + z1_p1)
|
||||
u(2*i1-1,2*i2,2*i3) = u(2*i1-1,2*i2,2*i3) +
|
||||
& 0.25d0* (z2 + z3 + z4 + z1 )
|
||||
u(2*i1,2*i2,2*i3) = u(2*i1,2*i2,2*i3) +
|
||||
& 0.125d0*( z2 + z3 + z4 + z1 + z2_p1 + z3_p1
|
||||
& + z4_p1 + z1_p1 )
|
||||
z1 = z1_p1
|
||||
z2 = z2_p1
|
||||
z3 = z3_p1
|
||||
z4 = z4_p1
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ END REGION
|
||||
|
||||
else
|
||||
!DVM$ REGION
|
||||
!DVM$ PARALLEL (i3,i1) ON u(2*i1, *, 2*i3),
|
||||
!DVM$& SHADOW_RENEW(z(CORNER)), cuda_block(32,6)
|
||||
!DVM$& ,private(i2,z1,z1_p1, z2,z2_p1, z3,z3_p1, z4,z4_p1)
|
||||
do i3=1,mm3-1
|
||||
do i1=1, mm1-1
|
||||
z1 = z(i1,1,i3)
|
||||
z2 = z(i1+1,1,i3)
|
||||
z3 = z(i1,1,i3+1)
|
||||
z4 = z(i1+1,1,i3+1)
|
||||
do i2=1,mm2-1
|
||||
z1_p1 = z(i1,i2+1,i3)
|
||||
z2_p1 = z(i1+1,i2+1,i3)
|
||||
z3_p1 = z(i1,i2+1,i3+1)
|
||||
z4_p1 = z(i1+1,i2+1,i3+1)
|
||||
u(2*i1-1,2*i2-1,2*i3-1) = u(2*i1-1,2*i2-1,2*i3-1) + z1
|
||||
u(2*i1,2*i2-1,2*i3-1) = u(2*i1,2*i2-1,2*i3-1) +
|
||||
& 0.5d0 * ( z2 + z1 )
|
||||
u(2*i1-1,2*i2,2*i3-1) = u(2*i1-1,2*i2,2*i3-1) +
|
||||
& 0.5d0 * ( z1_p1 + z1 )
|
||||
u(2*i1,2*i2,2*i3-1) = u(2*i1,2*i2,2*i3-1) +
|
||||
& 0.25d0*( z1_p1 + z1 + z2_p1 + z2)
|
||||
u(2*i1-1,2*i2-1,2*i3) = u(2*i1-1,2*i2-1,2*i3) +
|
||||
& +0.5d0 * ( z3 + z1 )
|
||||
u(2*i1,2*i2-1,2*i3) = u(2*i1,2*i2-1,2*i3) +
|
||||
& 0.25d0*( z3 + z1 + z4 + z2)
|
||||
u(2*i1-1,2*i2,2*i3) = u(2*i1-1,2*i2,2*i3) +
|
||||
& 0.25d0* (z3_p1 + z3 + z1_p1 + z1 )
|
||||
u(2*i1,2*i2,2*i3) = u(2*i1,2*i2,2*i3) +
|
||||
& 0.125d0*( z3_p1 + z3 + z1_p1 + z1 +
|
||||
& z4_p1 + z4 + z2_p1 + z2 )
|
||||
z1 = z1_p1
|
||||
z2 = z2_p1
|
||||
z3 = z3_p1
|
||||
z4 = z4_p1
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ END REGION
|
||||
endif
|
||||
|
||||
else
|
||||
|
||||
if(n1.eq.3) then; d1 = 2; t1 = 1; else; d1 = 1; t1 = 0; endif
|
||||
if(n2.eq.3) then; d2 = 2; t2 = 1; else; d2 = 1; t2 = 0; endif
|
||||
if(n3.eq.3) then; d3 = 2; t3 = 1; else; d3 = 1; t3 = 0; endif
|
||||
!DVM$ REGION
|
||||
!DVM$ PARALLEL (i3,i2,i1) ON u(2*i1-d1,2*i2-d2,2*i3-d3),
|
||||
!DVM$& SHADOW_RENEW(z(CORNER)), PRIVATE(i3,i2,i1)
|
||||
!DVM$& ,cuda_block(32,6,1)
|
||||
do i3=d3,mm3-1
|
||||
do i2=d2,mm2-1
|
||||
do i1=d1,mm1-1
|
||||
u(2*i1-d1,2*i2-d2,2*i3-d3)=u(2*i1-d1,2*i2-d2,2*i3-d3)
|
||||
& +z(i1,i2,i3)
|
||||
u(2*i1-t1,2*i2-d2,2*i3-d3)=u(2*i1-t1,2*i2-d2,2*i3-d3)
|
||||
& +0.5D0*(z(i1+1,i2,i3)+z(i1,i2,i3))
|
||||
u(2*i1-d1,2*i2-t2,2*i3-d3)=u(2*i1-d1,2*i2-t2,2*i3-d3)
|
||||
& +0.5D0*(z(i1,i2+1,i3)+z(i1,i2,i3))
|
||||
u(2*i1-t1,2*i2-t2,2*i3-d3)=u(2*i1-t1,2*i2-t2,2*i3-d3)
|
||||
& +0.25D0*(z(i1+1,i2+1,i3)+z(i1+1,i2,i3)
|
||||
& +z(i1, i2+1,i3)+z(i1, i2,i3))
|
||||
u(2*i1-d1,2*i2-d2,2*i3-t3)=u(2*i1-d1,2*i2-d2,2*i3-t3)
|
||||
& +0.5D0*(z(i1,i2,i3+1)+z(i1,i2,i3))
|
||||
u(2*i1-t1,2*i2-d2,2*i3-t3)=u(2*i1-t1,2*i2-d2,2*i3-t3)
|
||||
& +0.25D0*(z(i1+1,i2,i3+1)+z(i1,i2,i3+1)
|
||||
& +z(i1+1,i2,i3 )+z(i1,i2,i3 ))
|
||||
u(2*i1-d1,2*i2-t2,2*i3-t3)=u(2*i1-d1,2*i2-t2,2*i3-t3)
|
||||
& +0.25D0*(z(i1,i2+1,i3+1)+z(i1,i2,i3+1)
|
||||
& +z(i1,i2+1,i3 )+z(i1,i2,i3 ))
|
||||
u(2*i1-t1,2*i2-t2,2*i3-t3)=u(2*i1-t1,2*i2-t2,2*i3-t3)
|
||||
& +0.125D0*(z(i1+1,i2+1,i3+1)+z(i1+1,i2,i3+1)
|
||||
& +z(i1 ,i2+1,i3+1)+z(i1 ,i2,i3+1)
|
||||
& +z(i1+1,i2+1,i3 )+z(i1+1,i2,i3 )
|
||||
& +z(i1 ,i2+1,i3 )+z(i1 ,i2,i3 ))
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!DVM$ END REGION
|
||||
endif
|
||||
!DVM$ end interval
|
||||
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,369 @@
|
||||
!-------------------------------------------------------------------------!
|
||||
! !
|
||||
! N A S P A R A L L E L B E N C H M A R K S 3.3 !
|
||||
! !
|
||||
! D V M V E R S I O N !
|
||||
! !
|
||||
! M G !
|
||||
! !
|
||||
!-------------------------------------------------------------------------!
|
||||
! !
|
||||
! This benchmark is an OpenMP version of the NPB MG code. !
|
||||
! It is described in NAS Technical Report 99-011. !
|
||||
! !
|
||||
! Permission to use, copy, distribute and modify this software !
|
||||
! for any purpose with or without fee is hereby granted. We !
|
||||
! request, however, that all derived work reference the NAS !
|
||||
! Parallel Benchmarks 3.3. This software is provided "as is" !
|
||||
! without express or implied warranty. !
|
||||
! !
|
||||
! Information on NPB 3.3, including the technical report, the !
|
||||
! original specifications, source code, results and information !
|
||||
! on how to submit new results, is available at: !
|
||||
! !
|
||||
! http://www.nas.nasa.gov/Software/NPB/ !
|
||||
! !
|
||||
! Send comments or suggestions to npb@nas.nasa.gov !
|
||||
! !
|
||||
! NAS Parallel Benchmarks Group !
|
||||
! NASA Ames Research Center !
|
||||
! Mail Stop: T27A-1 !
|
||||
! Moffett Field, CA 94035-1000 !
|
||||
! !
|
||||
! E-mail: npb@nas.nasa.gov !
|
||||
! Fax: (650) 604-3957 !
|
||||
! !
|
||||
!-------------------------------------------------------------------------!
|
||||
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c
|
||||
c Authors:
|
||||
c Original:
|
||||
c E. Barszcz
|
||||
c P. Frederickson
|
||||
c A. Woo
|
||||
c M. Yarrow
|
||||
c H. Jin
|
||||
c DVM/DVMH vesion:
|
||||
c A. Shubert
|
||||
c Optimized for DVM/DVMH:
|
||||
c A. Kolganov
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Программа решающая уравнение Пуасона многосеточным методом (V-цикл)
|
||||
c---------------------------------------------------------------------
|
||||
program mg
|
||||
c---------------------------------------------------------------------
|
||||
implicit none
|
||||
|
||||
include 'globals.h'
|
||||
include 'dvmvars.h'
|
||||
|
||||
c---------------------------------------------------------------------------c
|
||||
c k is the current level. It is passed down through subroutine args
|
||||
c and is NOT global. it is the current iteration
|
||||
c---------------------------------------------------------------------------c
|
||||
|
||||
integer k, it, pdim, pi
|
||||
|
||||
external timer_read
|
||||
double precision t, tinit, mflops, timer_read
|
||||
|
||||
c---------------------------------------------------------------------------c
|
||||
c These arrays are in common because they are quite large
|
||||
c and probably shouldn't be allocated on the stack. They
|
||||
c are always passed as subroutine args.
|
||||
c---------------------------------------------------------------------------c
|
||||
|
||||
double precision rnm2, rnmu, epsilon
|
||||
integer n1, n2, n3, nit
|
||||
double precision nn, verify_value, err, t_1,t_2
|
||||
logical verified
|
||||
|
||||
integer i, fstatus
|
||||
character t_names(t_last)*8
|
||||
double precision tmax, elapsed_time
|
||||
|
||||
do i = T_init, T_last
|
||||
call timer_clear(i)
|
||||
end do
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Read in and broadcast input data
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
open(unit=7,file='timer.flag', status='old', iostat=fstatus)
|
||||
if (fstatus .eq. 0) then
|
||||
timeron = .true.
|
||||
t_names(t_init) = 'init'
|
||||
t_names(t_bench) = 'benchmk'
|
||||
t_names(t_mg3P) = 'mg3P'
|
||||
t_names(t_psinv) = 'psinv'
|
||||
t_names(t_resid) = 'resid'
|
||||
t_names(t_rprj3) = 'rprj3'
|
||||
t_names(t_interp) = 'interp'
|
||||
t_names(t_norm2) = 'norm2'
|
||||
t_names(t_comm3) = 'comm3'
|
||||
close(7)
|
||||
else
|
||||
timeron = .false.
|
||||
endif
|
||||
|
||||
write (*, 1000)
|
||||
|
||||
open(unit=7,file="mg.input", status="old", iostat=fstatus)
|
||||
if (fstatus .eq. 0) then
|
||||
write(*,50)
|
||||
50 format(' Reading from input file mg.input')
|
||||
read(7,*) lt
|
||||
read(7,*) nx(lt), ny(lt), nz(lt)
|
||||
read(7,*) nit
|
||||
read(7,*) (debug_vec(i),i=0,7)
|
||||
else
|
||||
write(*,51)
|
||||
51 format(' No input file. Using compiled defaults ')
|
||||
lt = lt_default
|
||||
nit = nit_default
|
||||
nx(lt) = nx_default
|
||||
ny(lt) = ny_default
|
||||
nz(lt) = nz_default
|
||||
do i = 0,7
|
||||
debug_vec(i) = debug_default
|
||||
end do
|
||||
endif
|
||||
|
||||
|
||||
if ( (nx(lt) .ne. ny(lt)) .or. (nx(lt) .ne. nz(lt)) ) then
|
||||
Class = 'U'
|
||||
else if( nx(lt) .eq. 32 .and. nit .eq. 4 ) then
|
||||
Class = 'S'
|
||||
else if( nx(lt) .eq. 128 .and. nit .eq. 4 ) then
|
||||
Class = 'W'
|
||||
else if( nx(lt) .eq. 256 .and. nit .eq. 4 ) then
|
||||
Class = 'A'
|
||||
else if( nx(lt) .eq. 256 .and. nit .eq. 20 ) then
|
||||
Class = 'B'
|
||||
else if( nx(lt) .eq. 512 .and. nit .eq. 20 ) then
|
||||
Class = 'C'
|
||||
else if( nx(lt) .eq. 1024 .and. nit .eq. 50 ) then
|
||||
Class = 'D'
|
||||
else if( nx(lt) .eq. 2048 .and. nit .eq. 50 ) then
|
||||
Class = 'E'
|
||||
else
|
||||
Class = 'U'
|
||||
endif
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Use these for debug info:
|
||||
c---------------------------------------------------------------------
|
||||
c debug_vec(0) = 1 !=> report all norms
|
||||
c debug_vec(1) = 1 !=> some setup information
|
||||
c debug_vec(1) = 2 !=> more setup information
|
||||
c debug_vec(2) = k => at level k or below, show result of resid
|
||||
c debug_vec(3) = k => at level k or below, show result of psinv
|
||||
c debug_vec(4) = k => at level k or below, show result of rprj
|
||||
c debug_vec(5) = k => at level k or below, show result of interp
|
||||
c debug_vec(6) = 1 => (unused)
|
||||
c debug_vec(7) = 1 => (unused)
|
||||
c---------------------------------------------------------------------
|
||||
a(0) = -8.0D0/3.0D0
|
||||
a(1) = 0.0D0
|
||||
a(2) = 1.0D0/6.0D0
|
||||
a(3) = 1.0D0/12.0D0
|
||||
|
||||
if(Class .eq. 'A' .or. Class .eq. 'S'.or. Class .eq.'W') then
|
||||
c---------------------------------------------------------------------
|
||||
c Coefficients for the S(a) smoother
|
||||
c---------------------------------------------------------------------
|
||||
c(0) = -3.0D0/8.0D0
|
||||
c(1) = +1.0D0/32.0D0
|
||||
c(2) = -1.0D0/64.0D0
|
||||
c(3) = 0.0D0
|
||||
else
|
||||
c---------------------------------------------------------------------
|
||||
c Coefficients for the S(b) smoother
|
||||
c---------------------------------------------------------------------
|
||||
c(0) = -3.0D0/17.0D0
|
||||
c(1) = +1.0D0/33.0D0
|
||||
c(2) = -1.0D0/61.0D0
|
||||
c(3) = 0.0D0
|
||||
endif
|
||||
lb = 1
|
||||
k = lt
|
||||
|
||||
c**********************************************************************
|
||||
c**********************************************************************
|
||||
c********************* START HERE *************************************
|
||||
c**********************************************************************
|
||||
c**********************************************************************
|
||||
|
||||
call setup(n1,n2,n3,k)
|
||||
call setupDVM(0)
|
||||
|
||||
call timer_start(T_init)
|
||||
|
||||
call zero3(u(pu),n1,n2,n3)
|
||||
call zran3(v(pv),n1,n2,n3,nx(lt),ny(lt),k,class)
|
||||
|
||||
write (*, 1001) nx(lt),ny(lt),nz(lt), Class
|
||||
write (*, 1002) nit
|
||||
write (*, *)
|
||||
|
||||
pdim = PROCESSORS_RANK()
|
||||
write (*, 310) pdim
|
||||
do pi=1, pdim
|
||||
write (*, 311) pi, PROCESSORS_SIZE(pi)
|
||||
enddo
|
||||
write (*, *) ' '
|
||||
|
||||
310 format(' Processors grid rank: ', i4)
|
||||
311 format(' Grid dimension [', i4, '] size: ', i4)
|
||||
|
||||
1000 format(//,' NAS Parallel Benchmarks (NPB3.3-DVMH)',
|
||||
> ' - MG Benchmark', /)
|
||||
1001 format(' Size: ', i4, 'x', i4, 'x', i4, ' (class ', A, ')' )
|
||||
1002 format(' Iterations: ', i5)
|
||||
|
||||
call resid(u(pu),v(pv),r(pr),n1,n2,n3,a,k)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c One iteration for startup
|
||||
c---------------------------------------------------------------------
|
||||
call mg3P(n1,n2,n3,k)
|
||||
call resid(u(pu),v(pv),r(pr),n1,n2,n3,a,k)
|
||||
call norm2u3(r(pr),n1,n2,n3,rnm2,rnmu,nx(lt),ny(lt),nz(lt))
|
||||
call setup(n1,n2,n3,k)
|
||||
|
||||
call zero3(u(pu),n1,n2,n3)
|
||||
call zran3(v(pv),n1,n2,n3,nx(lt),ny(lt),k,class)
|
||||
|
||||
call timer_stop(T_init)
|
||||
tinit = timer_read(T_init)
|
||||
|
||||
write( *,'(A,F15.3,A/)' )
|
||||
> ' Initialization time: ',tinit, ' seconds'
|
||||
|
||||
do i = T_bench, T_last
|
||||
call timer_clear(i)
|
||||
end do
|
||||
|
||||
call timer_start(T_bench)
|
||||
call resid(u(pu),v(pv),r(pr),n1,n2,n3,a,k)
|
||||
call norm2u3(r(pr),n1,n2,n3,rnm2,rnmu,nx(lt),ny(lt),nz(lt))
|
||||
|
||||
! временный вызов
|
||||
! call timer_start(T_bench)
|
||||
c*****************************************************************
|
||||
c********************MAIN LOOP ***********************************
|
||||
c*****************************************************************
|
||||
|
||||
do it = 1, nit
|
||||
if (it.eq.1 .or. it.eq.nit .or. mod(it,5).eq.0) then
|
||||
write(*,80) it
|
||||
80 format(' iter ',i3)
|
||||
endif
|
||||
call mg3P(n1,n2,n3,k)
|
||||
call resid(u(pu),v(pv),r(pr),n1,n2,n3,a,k)
|
||||
enddo
|
||||
|
||||
call norm2u3(r(pr),n1,n2,n3,rnm2,rnmu,nx(lt),ny(lt),nz(lt))
|
||||
|
||||
call timer_stop(T_bench)
|
||||
|
||||
t = timer_read(T_bench)
|
||||
|
||||
verified = .FALSE.
|
||||
verify_value = 0.0
|
||||
|
||||
write(*,100)
|
||||
100 format(/' Benchmark completed ')
|
||||
|
||||
epsilon = 1.d-8
|
||||
if (Class .ne. 'U') then
|
||||
if(Class.eq.'S') then
|
||||
verify_value = 0.5307707005734d-04
|
||||
elseif(Class.eq.'W') then
|
||||
verify_value = 0.6467329375339d-05
|
||||
elseif(Class.eq.'A') then
|
||||
verify_value = 0.2433365309069d-05
|
||||
elseif(Class.eq.'B') then
|
||||
verify_value = 0.1800564401355d-05
|
||||
elseif(Class.eq.'C') then
|
||||
verify_value = 0.5706732285740d-06
|
||||
elseif(Class.eq.'D') then
|
||||
verify_value = 0.1583275060440d-09
|
||||
elseif(Class.eq.'E') then
|
||||
verify_value = 0.5630442584711d-10
|
||||
endif
|
||||
|
||||
err = abs( rnm2 - verify_value ) / verify_value
|
||||
if( err .le. epsilon ) then
|
||||
verified = .TRUE.
|
||||
write(*, 200)
|
||||
write(*, 201) rnm2
|
||||
write(*, 202) err
|
||||
200 format(' VERIFICATION SUCCESSFUL ')
|
||||
201 format(' L2 Norm is ', E20.13)
|
||||
202 format(' Error is ', E20.13)
|
||||
else
|
||||
verified = .FALSE.
|
||||
write(*, 300)
|
||||
write(*, 301) rnm2
|
||||
write(*, 302) verify_value
|
||||
300 format(' VERIFICATION FAILED')
|
||||
301 format(' L2 Norm is ', E20.13)
|
||||
302 format(' The correct L2 Norm is ', E20.13)
|
||||
endif
|
||||
else
|
||||
verified = .FALSE.
|
||||
write (*, 400)
|
||||
write (*, 401)
|
||||
write (*, 201) rnm2
|
||||
400 format(' Problem size unknown')
|
||||
401 format(' NO VERIFICATION PERFORMED')
|
||||
endif
|
||||
|
||||
nn = 1.0d0*nx(lt)*ny(lt)*nz(lt)
|
||||
|
||||
if( t .ne. 0. ) then
|
||||
mflops = 58.*nit*nn*1.0D-6 /t
|
||||
else
|
||||
mflops = 0.0
|
||||
endif
|
||||
|
||||
call print_results('MG', class, nx(lt), ny(lt), nz(lt),
|
||||
> nit, t,
|
||||
> mflops, ' floating point',
|
||||
> verified, npbversion, compiletime,
|
||||
> cs1, cs2, cs3, cs4, cs5, cs6, cs7)
|
||||
|
||||
|
||||
600 format( i4, 2e19.12)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c More timers
|
||||
c---------------------------------------------------------------------
|
||||
if (.not.timeron) goto 999
|
||||
|
||||
tmax = timer_read(t_bench)
|
||||
if (tmax .eq. 0.0) tmax = 1.0
|
||||
|
||||
write(*,800)
|
||||
800 format(' SECTION Time (secs)')
|
||||
do i=t_bench, t_last
|
||||
t = timer_read(i)
|
||||
if (i.eq.t_resid2) then
|
||||
t = timer_read(T_resid) - t
|
||||
write(*,820) 'mg-resid', t, t*100./tmax
|
||||
else
|
||||
write(*,810) t_names(i), t, t*100./tmax
|
||||
endif
|
||||
810 format(2x,a8,':',f9.3,' (',f6.2,'%)')
|
||||
820 format(' --> ',a8,':',f9.3,' (',f6.2,'%)')
|
||||
end do
|
||||
|
||||
999 continue
|
||||
|
||||
end
|
||||
@@ -0,0 +1,167 @@
|
||||
c---------------------------------------------------------------------
|
||||
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> V-<2D><><EFBFBD><EFBFBD><EFBFBD>
|
||||
c---------------------------------------------------------------------
|
||||
c @param integer d1, d2, d3 - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
c @param integer k - <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
|
||||
c---------------------------------------------------------------------
|
||||
subroutine VDownIteration(d1, d2, d3, k)
|
||||
|
||||
implicit none
|
||||
|
||||
include 'globals.h'
|
||||
include 'dvmvars.h'
|
||||
|
||||
integer k
|
||||
integer d1, d2, d3
|
||||
|
||||
! !DVM$ GET_ACTUAL(p_curr_r_k)
|
||||
!write (*,*) 'R(',k,') down: '
|
||||
!call printMatrix(r(p_curr_r_k), m1(k),m2(k),m3(k));
|
||||
!stop
|
||||
|
||||
call rprj3(
|
||||
> r(p_curr_r_k),m1(k),m2(k),m3(k),
|
||||
> r(p_curr_r_j),m1(k-1),m2(k-1),m3(k-1),
|
||||
> k, d1, d2, d3
|
||||
> )
|
||||
|
||||
! !DVM$ GET_ACTUAL(p_curr_r_j)
|
||||
!write (*,*) 'R(',k-1,') down: '
|
||||
!call printMatrix(r(p_curr_r_j), m1(k-1),m2(k-1),m3(k-1));
|
||||
!stop
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> V-<2D><><EFBFBD><EFBFBD><EFBFBD>
|
||||
c---------------------------------------------------------------------
|
||||
c @param integer k - <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
|
||||
c---------------------------------------------------------------------
|
||||
subroutine VUpIteration(k)
|
||||
|
||||
implicit none
|
||||
|
||||
include 'globals.h'
|
||||
include 'dvmvars.h'
|
||||
|
||||
integer k
|
||||
integer m1k, m2k, m3k
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c prolongate from level k-1 to k
|
||||
c---------------------------------------------------------------------
|
||||
call zero3(u(p_curr_u_k),m1(k),m2(k),m3(k))
|
||||
|
||||
call interp(
|
||||
> u(p_curr_u_j),m1(k-1),m2(k-1),m3(k-1),
|
||||
> u(p_curr_u_k),m1(k),m2(k),m3(k),
|
||||
> k
|
||||
> )
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c compute residual for level k
|
||||
c---------------------------------------------------------------------
|
||||
call resid(
|
||||
> u(p_curr_u_k),
|
||||
> r(p_curr_r_k),
|
||||
> r(p_curr_r_k),
|
||||
> m1(k),m2(k),m3(k),
|
||||
> a,k
|
||||
> )
|
||||
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c apply smoother
|
||||
c---------------------------------------------------------------------
|
||||
call psinv(r(p_curr_r_k),u(p_curr_u_k),m1(k),m2(k),m3(k),c,k)
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
c---------------------------------------------------------------------
|
||||
subroutine mg3P(n1,n2,n3,k)
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c multigrid V-cycle routine
|
||||
c---------------------------------------------------------------------
|
||||
implicit none
|
||||
|
||||
include 'globals.h'
|
||||
include 'dvmvars.h'
|
||||
|
||||
integer n1, n2, n3, k
|
||||
|
||||
integer j, d1, d2, d3
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c down cycle.
|
||||
c restrict the residual from the find grid to the coarse
|
||||
c---------------------------------------------------------------------
|
||||
!!DVM$ INTERVAL 1
|
||||
p_curr_r_j = pr
|
||||
|
||||
do k= lt, lb+1 , -1
|
||||
j = k-1
|
||||
|
||||
p_curr_r_k = p_curr_r_j
|
||||
p_curr_r_j = p_r_ir(j)
|
||||
|
||||
if(m1(k).eq.3)then; d1 = 2; else; d1 = 1; endif
|
||||
if(m2(k).eq.3)then; d2 = 2; else; d2 = 1; endif
|
||||
if(m3(k).eq.3)then; d3 = 2; else; d3 = 1; endif
|
||||
|
||||
call VDownIteration(d1,d2,d3,k)
|
||||
enddo
|
||||
!!DVM$ END INTERVAL
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c compute an approximate solution on the coarsest grid
|
||||
c---------------------------------------------------------------------
|
||||
!!DVM$ INTERVAL 5
|
||||
k = lb
|
||||
|
||||
p_curr_u_k = p_u_ir(k)
|
||||
p_curr_r_k = p_r_ir(k)
|
||||
|
||||
call zero3(u(p_curr_u_k),m1(k),m2(k),m3(k))
|
||||
call psinv(r(p_curr_r_k),u(p_curr_u_k),m1(k),m2(k),m3(k),c,k)
|
||||
!!DVM$ END INTERVAL
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c up cycle.
|
||||
c---------------------------------------------------------------------
|
||||
!!DVM$ INTERVAL 6
|
||||
do k = lb+1, lt-1
|
||||
j = k-1
|
||||
|
||||
p_curr_u_j = p_curr_u_k
|
||||
p_curr_u_k = p_u_ir(k)
|
||||
p_curr_r_k = p_r_ir(k)
|
||||
|
||||
call VUpIteration(k)
|
||||
enddo
|
||||
!!DVM$ END INTERVAL
|
||||
|
||||
200 continue
|
||||
|
||||
j = lt - 1
|
||||
k = lt
|
||||
|
||||
p_curr_u_j = p_u_ir(j)
|
||||
|
||||
call interp(u(p_curr_u_j),m1(j) ,m2(j) ,m3(j),
|
||||
> u(pu),m1(k),m2(k),m3(k),
|
||||
> k
|
||||
> )
|
||||
|
||||
call resid (u(pu),v(pv),r(pr),m1(k),m2(k),m3(k),a,k)
|
||||
|
||||
call psinv (r(pr),u(pu),m1(k),m2(k),m3(k),c,k)
|
||||
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,51 @@
|
||||
c---------------------------------------------------------------------
|
||||
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
|
||||
c---------------------------------------------------------------------
|
||||
subroutine norm2u3(r,n1,n2,n3,rnm2,rnmu,nx,ny,nz)
|
||||
c---------------------------------------------------------------------
|
||||
!DVM$ INHERIT r
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c norm2u3 evaluates approximations to the L2 norm and the
|
||||
c uniform (or L-infinity or Chebyshev) norm, under the
|
||||
c assumption that the boundaries are periodic or zero. Add the
|
||||
c boundaries in with half weight (quarter weight on the edges
|
||||
c and eighth weight at the corners) for inhomogeneous boundaries.
|
||||
c---------------------------------------------------------------------
|
||||
implicit none
|
||||
|
||||
integer n1, n2, n3, nx, ny, nz
|
||||
double precision rnm2, rnmu, r(n1,n2,n3)
|
||||
double precision s, a
|
||||
integer i3, i2, i1
|
||||
|
||||
double precision dn
|
||||
|
||||
integer T_norm2
|
||||
parameter (T_norm2=9)
|
||||
|
||||
dn = 1.0d0*nx*ny*nz
|
||||
s=0.0D0
|
||||
rnmu = 0.0D0
|
||||
|
||||
|
||||
!DVM$ REGION
|
||||
!DVM$ PARALLEL (i3,i2,i1) ON r(i1,i2,i3),
|
||||
!DVM$& REDUCTION(SUM(s), MAX(rnmu)), PRIVATE(a)
|
||||
!DVM$& ,cuda_block(32,4,1)
|
||||
do i3=2,n3-1
|
||||
do i2=2,n2-1
|
||||
do i1=2,n1-1
|
||||
s=s+r(i1,i2,i3)**2
|
||||
a=abs(r(i1,i2,i3))
|
||||
rnmu=dmax1(rnmu,a)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ END REGION
|
||||
!!DVM$ END INTERVAL
|
||||
rnm2=sqrt( s / dn )
|
||||
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,167 @@
|
||||
c---------------------------------------------------------------------
|
||||
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
c---------------------------------------------------------------------
|
||||
c u(h) = u(h) + C r(h)
|
||||
c <20><><EFBFBD> C - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>-<2D><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>,
|
||||
c r - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
c h - <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
c---------------------------------------------------------------------
|
||||
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>:
|
||||
c - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD> V-<2D><><EFBFBD><EFBFBD><EFBFBD>;
|
||||
c - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> V-<2D><><EFBFBD><EFBFBD><EFBFBD>
|
||||
c---------------------------------------------------------------------
|
||||
c @param double precission :: r(n1,n2,n3) ? r(h) - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
c @param double precission :: u(n1,n2,n3) ? u(H) - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
|
||||
c @param double precission :: <20>(3) ? <20> - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
c @param integer :: k - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
|
||||
c---------------------------------------------------------------------
|
||||
subroutine psinv( r,u,n1,n2,n3,c,k)
|
||||
c---------------------------------------------------------------------
|
||||
!DVM$ INHERIT r,u,c
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c psinv applies an approximate inverse as smoother: u = u + Cr
|
||||
c
|
||||
c This implementation costs 15A + 4M per result, where
|
||||
c A and M denote the costs of Addition and Multiplication.
|
||||
c Presuming coefficient c(3) is zero (the NPB assumes this,
|
||||
c but it is thus not a general case), 2A + 1M may be eliminated,
|
||||
c resulting in 13A + 3M.
|
||||
c Note that this vectorizes, and is also fine for cache
|
||||
c based machines.
|
||||
c---------------------------------------------------------------------
|
||||
implicit none
|
||||
|
||||
include 'globals.h'
|
||||
|
||||
integer n1,n2,n3,k
|
||||
double precision u(n1,n2,n3),r(n1,n2,n3),c(0:3)
|
||||
double precision c_0,c_1,c_2, r1,r1_m1,r1_p1, r2,r2_m1,r2_p1
|
||||
double precision r3,r3_m1,r3_p1, r4,r4_m1,r4_p1, r5,r5_m1,r5_p1
|
||||
integer i3, i2, i1
|
||||
|
||||
if(Class .eq. 'A' .or. Class .eq. 'S'.or. Class .eq.'W') then
|
||||
c_0 = -3.0D0/8.0D0
|
||||
c_1 = +1.0D0/32.0D0
|
||||
c_2 = -1.0D0/64.0D0
|
||||
else
|
||||
c_0 = -3.0D0/17.0D0
|
||||
c_1 = +1.0D0/33.0D0
|
||||
c_2 = -1.0D0/61.0D0
|
||||
endif
|
||||
|
||||
!DVM$ interval 2
|
||||
if( ver ) then
|
||||
!DVM$ REGION
|
||||
!DVM$ PARALLEL (i3,i2) ON u(*,i2,i3),cuda_block(32,6),
|
||||
!DVM$& SHADOW_RENEW(r(CORNER)),
|
||||
!DVM$& private(i1,r1,r1_m1,r1_p1,r2,r2_m1,r2_p1,r3,r3_m1,r3_p1,
|
||||
!DVM$& r4,r4_m1,r4_p1, r5,r5_m1,r5_p1)
|
||||
do i3=2,n3-1
|
||||
do i2=2,n2-1
|
||||
r1_m1=r(1,i2,i3)
|
||||
r1=r(2,i2,i3)
|
||||
r2_m1=r(1,i2-1,i3)
|
||||
r2=r(2,i2-1,i3)
|
||||
r3_m1=r(1,i2+1,i3)
|
||||
r3=r(2,i2+1,i3)
|
||||
r4_m1=r(1,i2,i3+1)
|
||||
r4=r(2,i2,i3+1)
|
||||
r5_m1=r(1,i2,i3-1)
|
||||
r5=r(2,i2,i3-1)
|
||||
do i1=2,n1-1
|
||||
r1_p1=r(i1+1,i2,i3)
|
||||
r2_p1=r(i1+1,i2-1,i3)
|
||||
r3_p1=r(i1+1,i2+1,i3)
|
||||
r4_p1=r(i1+1,i2,i3+1)
|
||||
r5_p1=r(i1+1,i2,i3-1)
|
||||
u(i1,i2,i3) = u(i1,i2,i3)
|
||||
& + c_0 * r1
|
||||
& + c_1 * ( r1_m1 + r1_p1 + r2 + r3 + r5 + r4)
|
||||
& + c_2 * ( r(i1,i2-1,i3-1) + r(i1,i2+1,i3-1)
|
||||
& + r(i1,i2-1,i3+1) + r(i1,i2+1,i3+1)
|
||||
& + r2_m1+r3_m1+r5_m1+r4_m1+r2_p1+r3_p1+r5_p1+r4_p1)
|
||||
r1_m1 = r1
|
||||
r1 = r1_p1
|
||||
r2_m1 = r2
|
||||
r2 = r2_p1
|
||||
r3_m1 = r3
|
||||
r3 = r3_p1
|
||||
r4_m1 = r4
|
||||
r4 = r4_p1
|
||||
r5_m1 = r5
|
||||
r5 = r5_p1
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ END REGION
|
||||
else
|
||||
!DVM$ REGION
|
||||
!DVM$ PARALLEL (i3,i1) ON u(i1,*,i3),cuda_block(32,6)
|
||||
!DVM$& ,private(i2,r1,r1_m1,r1_p1, r2,r2_m1,r2_p1, r3,r3_m1,r3_p1,
|
||||
!DVM$& r4,r4_m1,r4_p1, r5,r5_m1,r5_p1), SHADOW_RENEW(r(CORNER))
|
||||
do i3=2,n3-1
|
||||
do i1=2,n1-1
|
||||
r1_m1 = r(i1,1,i3)
|
||||
r1 = r(i1,2,i3)
|
||||
|
||||
r2_m1 = r(i1-1,1,i3)
|
||||
r2 = r(i1-1,2,i3)
|
||||
|
||||
r3_m1 = r(i1+1,1,i3)
|
||||
r3 = r(i1+1,2,i3)
|
||||
|
||||
r4_m1 = r(i1,1,i3+1)
|
||||
r4 = r(i1,2,i3+1)
|
||||
|
||||
r5_m1 = r(i1,1,i3-1)
|
||||
r5 = r(i1,2,i3-1)
|
||||
|
||||
do i2=2,n2-1
|
||||
r1_p1 = r(i1,i2+1,i3)
|
||||
r2_p1 = r(i1-1,i2+1,i3)
|
||||
r3_p1 = r(i1+1,i2+1,i3)
|
||||
r4_p1 = r(i1,i2+1,i3+1)
|
||||
r5_p1 = r(i1,i2+1,i3-1)
|
||||
u(i1,i2,i3) = u(i1,i2,i3)
|
||||
& + c_0 * r1
|
||||
& + c_1 * ( r2 + r3
|
||||
& + r1_m1 + r1_p1
|
||||
& + r4 + r5
|
||||
& )
|
||||
& + c_2 * (
|
||||
& r4_m1 + r4_p1
|
||||
& + r5_m1 + r5_p1
|
||||
& + r2_m1 + r2_p1
|
||||
& + r(i1-1,i2,i3-1) + r(i1-1,i2,i3+1)
|
||||
& + r3_m1 + r3_p1
|
||||
& + r(i1+1,i2,i3-1) + r(i1+1,i2,i3+1)
|
||||
& )
|
||||
r1_m1 = r1
|
||||
r1 = r1_p1
|
||||
|
||||
r2_m1 = r2
|
||||
r2 = r2_p1
|
||||
|
||||
r3_m1 = r3
|
||||
r3 = r3_p1
|
||||
|
||||
r4_m1 = r4
|
||||
r4 = r4_p1
|
||||
|
||||
r5_m1 = r5
|
||||
r5 = r5_p1
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ END REGION
|
||||
endif
|
||||
!DVM$ end interval
|
||||
c---------------------------------------------------------------------
|
||||
c exchange boundary points
|
||||
c---------------------------------------------------------------------
|
||||
call comm3(u,n1,n2,n3,k)
|
||||
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,196 @@
|
||||
c---------------------------------------------------------------------
|
||||
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
c---------------------------------------------------------------------
|
||||
c r(h) = v - A u(h)
|
||||
c <20><><EFBFBD> A - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>-<2D><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>,
|
||||
c v - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> (<28><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> V-<2D><><EFBFBD><EFBFBD><EFBFBD>, <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>)
|
||||
c h - <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
c---------------------------------------------------------------------
|
||||
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> V-<2D><><EFBFBD><EFBFBD><EFBFBD>
|
||||
c---------------------------------------------------------------------
|
||||
c @param double precission :: r(n1,n2,n3) ? r(h) - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
c @param double precission :: v(n1,n2,n3) ? v - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
c @param double precission :: a(3) ? A - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
c @param integer :: k - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
|
||||
c---------------------------------------------------------------------
|
||||
subroutine resid( u,v,r,n1,n2,n3,a,k )
|
||||
c---------------------------------------------------------------------
|
||||
!DVM$ INHERIT r, u, a, v
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c resid computes the residual: r = v - Au
|
||||
c
|
||||
c This implementation costs 15A + 4M per result, where
|
||||
c A and M denote the costs of Addition (or Subtraction) and
|
||||
c Multiplication, respectively.
|
||||
c Presuming coefficient a(1) is zero (the NPB assumes this,
|
||||
c but it is thus not a general case), 3A + 1M may be eliminated,
|
||||
c resulting in 12A + 3M.
|
||||
c Note that this vectorizes, and is also fine for cache
|
||||
c based machines.
|
||||
c---------------------------------------------------------------------
|
||||
implicit none
|
||||
|
||||
include 'globals.h'
|
||||
|
||||
integer n1,n2,n3,k
|
||||
double precision u(n1,n2,n3),v(n1,n2,n3),r(n1,n2,n3),a(0:3)
|
||||
integer i3, i2, i1
|
||||
double precision u1_m1,u1,u1_p1, u2_m1,u2,u2_p1, u3_m1,u3,u3_p1
|
||||
double precision u4_m1,u4,u4_p1, u5_m1,u5,u5_p1, u6_m1,u6,u6_p1
|
||||
double precision u7_m1,u7,u7_p1, u8_m1,u8,u8_p1
|
||||
|
||||
!DVM$ interval 3
|
||||
if ( ver ) then
|
||||
|
||||
!DVM$ REGION
|
||||
!DVM$ PARALLEL (i3,i2) ON r(*,i2,i3), cuda_block(32,6)
|
||||
!DVM$& ,private(i1, u1_m1,u1,u1_p1, u2_m1,u2,u2_p1, u3_m1,u3,u3_p1,
|
||||
!DVM$& u4_m1,u4,u4_p1, u5_m1,u5,u5_p1, u6_m1,u6,u6_p1,
|
||||
!DVM$& u7_m1,u7,u7_p1, u8_m1,u8,u8_p1),SHADOW_RENEW(u(CORNER))
|
||||
do i3=2,n3-1
|
||||
do i2=2,n2-1
|
||||
u1_m1 = u(1,i2-1,i3-1)
|
||||
u1 = u(2,i2-1,i3-1)
|
||||
u2_m1 = u(1,i2+1,i3+1)
|
||||
u2 = u(2,i2+1,i3+1)
|
||||
u3_m1 = u(1,i2-1,i3+1)
|
||||
u3 = u(2,i2-1,i3+1)
|
||||
u4_m1 = u(1,i2+1,i3-1)
|
||||
u4 = u(2,i2+1,i3-1)
|
||||
u5_m1 = u(1,i2+1,i3)
|
||||
u5 = u(2,i2+1,i3)
|
||||
u6_m1 = u(1,i2-1,i3)
|
||||
u6 = u(2,i2-1,i3)
|
||||
u7_m1 = u(1,i2,i3-1)
|
||||
u7 = u(2,i2,i3-1)
|
||||
u8_m1 = u(1,i2,i3+1)
|
||||
u8 = u(2,i2,i3+1)
|
||||
do i1=2,n1-1
|
||||
u1_p1 = u(i1+1,i2-1,i3-1)
|
||||
u2_p1 = u(i1+1,i2+1,i3+1)
|
||||
u3_p1 = u(i1+1,i2-1,i3+1)
|
||||
u4_p1 = u(i1+1,i2+1,i3-1)
|
||||
u5_p1 = u(i1+1,i2+1,i3)
|
||||
u6_p1 = u(i1+1,i2-1,i3)
|
||||
u7_p1 = u(i1+1,i2,i3-1)
|
||||
u8_p1 = u(i1+1,i2,i3+1)
|
||||
r(i1,i2,i3) = v(i1,i2,i3)
|
||||
& + 8.0D0/3.0D0 * u(i1,i2,i3) - 1.0D0/6.0D0 *
|
||||
& (u1+u4+u3+u2+u6_m1+u5_m1+u7_m1+u8_m1+u6_p1+u5_p1+u7_p1+u8_p1)
|
||||
& -1.0D0/12.0D0*(u1_m1+u4_m1+u3_m1+u2_m1+u1_p1+u4_p1+u3_p1+u2_p1)
|
||||
u1_m1 = u1
|
||||
u1 = u1_p1
|
||||
u2_m1 = u2
|
||||
u2 = u2_p1
|
||||
u3_m1 = u3
|
||||
u3 = u3_p1
|
||||
u4_m1 = u4
|
||||
u4 = u4_p1
|
||||
u5_m1 = u5
|
||||
u5 = u5_p1
|
||||
u6_m1 = u6
|
||||
u6 = u6_p1
|
||||
u7_m1 = u7
|
||||
u7 = u7_p1
|
||||
u8_m1 = u8
|
||||
u8 = u8_p1
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ END REGION
|
||||
|
||||
else
|
||||
!DVM$ REGION
|
||||
!DVM$ PARALLEL (i3,i1) ON r(i1,*,i3), cuda_block(32,6)
|
||||
!DVM$& ,private(i2, u1_m1,u1,u1_p1, u2_m1,u2,u2_p1, u3_m1,u3,u3_p1,
|
||||
!DVM$& u4_m1,u4,u4_p1, u5_m1,u5,u5_p1, u6_m1,u6,u6_p1,
|
||||
!DVM$& u7_m1,u7,u7_p1, u8_m1,u8,u8_p1),SHADOW_RENEW(u(CORNER))
|
||||
do i3=2,n3-1
|
||||
do i1=2,n1-1
|
||||
u1_m1 = u(i1,1,i3-1)
|
||||
u1 = u(i1,2,i3-1)
|
||||
|
||||
u2_m1 = u(i1,1,i3+1)
|
||||
u2 = u(i1,2,i3+1)
|
||||
|
||||
u3_m1 = u(i1-1,1,i3)
|
||||
u3 = u(i1-1,2,i3)
|
||||
|
||||
u4_m1 = u(i1-1,1,i3-1)
|
||||
u4 = u(i1-1,2,i3-1)
|
||||
|
||||
u5_m1 = u(i1-1,1,i3+1)
|
||||
u5 = u(i1-1,2,i3+1)
|
||||
|
||||
u6_m1 = u(i1+1,1,i3)
|
||||
u6 = u(i1+1,2,i3)
|
||||
|
||||
u7_m1 = u(i1+1,1,i3-1)
|
||||
u7 = u(i1+1,2,i3-1)
|
||||
|
||||
u8_m1 = u(i1+1,1,i3+1)
|
||||
u8 = u(i1+1,2,i3+1)
|
||||
do i2=2,n2-1
|
||||
u1_p1 = u(i1,i2+1,i3-1)
|
||||
u2_p1 = u(i1,i2+1,i3+1)
|
||||
u3_p1 = u(i1-1,i2+1,i3)
|
||||
u4_p1 = u(i1-1,i2+1,i3-1)
|
||||
u5_p1 = u(i1-1,i2+1,i3+1)
|
||||
u6_p1 = u(i1+1,i2+1,i3)
|
||||
u7_p1 = u(i1+1,i2+1,i3-1)
|
||||
u8_p1 = u(i1+1,i2+1,i3+1)
|
||||
|
||||
r(i1,i2,i3) = v(i1,i2,i3)
|
||||
& + 8.0D0/3.0D0 * u(i1,i2,i3)
|
||||
& - 1.0D0/6.0D0 * (
|
||||
& u1_m1 + u1_p1
|
||||
& + u2_m1 + u2_p1
|
||||
& + u3_m1 + u3_p1
|
||||
& + u4 + u5
|
||||
& + u6_m1 + u6_p1
|
||||
& + u7 + u8
|
||||
& )
|
||||
& - 1.0D0/12.0D0 * (
|
||||
& u4_m1 + u4_p1
|
||||
& + u5_m1 + u5_p1
|
||||
& + u7_m1 + u7_p1
|
||||
& + u8_m1 + u8_p1
|
||||
& )
|
||||
u1_m1 = u1
|
||||
u1 = u1_p1
|
||||
|
||||
u2_m1 = u2
|
||||
u2 = u2_p1
|
||||
|
||||
u3_m1 = u3
|
||||
u3 = u3_p1
|
||||
|
||||
u4_m1 = u4
|
||||
u4 = u4_p1
|
||||
|
||||
u5_m1 = u5
|
||||
u5 = u5_p1
|
||||
|
||||
u6_m1 = u6
|
||||
u6 = u6_p1
|
||||
|
||||
u7_m1 = u7
|
||||
u7 = u7_p1
|
||||
|
||||
u8_m1 = u8
|
||||
u8 = u8_p1
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ END REGION
|
||||
endif
|
||||
!DVM$ end interval
|
||||
c---------------------------------------------------------------------
|
||||
c exchange boundary data
|
||||
c---------------------------------------------------------------------
|
||||
call comm3(r,n1,n2,n3,k)
|
||||
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,169 @@
|
||||
c---------------------------------------------------------------------
|
||||
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> (<28><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>) <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
|
||||
c---------------------------------------------------------------------
|
||||
c r(H) = P r(h)
|
||||
c <20><><EFBFBD> H = 2h - <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>,
|
||||
c P - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>,
|
||||
c r - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
c---------------------------------------------------------------------
|
||||
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> V-<2D><><EFBFBD><EFBFBD><EFBFBD>
|
||||
c---------------------------------------------------------------------
|
||||
c @param double precission :: r(m1k,m2k,m3k) ? r(h) - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
|
||||
c @param double precission :: s(m1j,m2j,m3j) ? r(H) - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
|
||||
c @param double precission :: d1, d2, d3 - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
c @param integer :: k - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
|
||||
c---------------------------------------------------------------------
|
||||
subroutine rprj3( r,m1k,m2k,m3k,s,m1j,m2j,m3j,k, d1,d2,d3)
|
||||
c---------------------------------------------------------------------
|
||||
!DVM$ INHERIT r,s
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c rprj3 projects onto the next coarser grid,
|
||||
c using a trilinear Finite Element projection: s = r' = P r
|
||||
c
|
||||
c This implementation costs 20A + 4M per result, where
|
||||
c A and M denote the costs of Addition and Multiplication.
|
||||
c Note that this vectorizes, and is also fine for cache
|
||||
c based machines.
|
||||
c---------------------------------------------------------------------
|
||||
implicit none
|
||||
|
||||
include 'globals.h'
|
||||
|
||||
integer m1k, m2k, m3k, m1j, m2j, m3j,k
|
||||
double precision r(m1k,m2k,m3k), s(m1j,m2j,m3j)
|
||||
integer j3, j2, j1, i3, i2, i1, d1, d2, d3, j
|
||||
double precision r1_m1,r1_p1, r2_m1,r2_p1, r3_m1,r3_p1
|
||||
double precision r4_m1,r4_p1, r5_m1,r5_p1, r6_m1,r6_p1
|
||||
double precision r7_m1,r7_p1, r8_m1,r8_p1, r9_m1,r9_p1,y2,x2
|
||||
|
||||
!DVM$ interval 4
|
||||
if( ver ) then
|
||||
|
||||
!DVM$ REGION
|
||||
!DVM$ PARALLEL (j3,j2) ON s(*,j2,j3), SHADOW_RENEW(r(CORNER)),
|
||||
!DVM$& cuda_block(32,6), PRIVATE(i1, i2, i3, j1,
|
||||
!DVM$& r1_m1,r1_p1, r2_m1,r2_p1, r3_m1,r3_p1, r4_m1,r4_p1, r5_m1,r5_p1
|
||||
!DVM$&,r6_m1,r6_p1, r7_m1,r7_p1, r8_m1,r8_p1, r9_m1,r9_p1,y2,x2)
|
||||
do j3=2,m3j-1
|
||||
do j2=2,m2j-1
|
||||
i3 = 2*j3-d3
|
||||
i2 = 2*j2-d2
|
||||
|
||||
r1_m1 = r(2*2-d1-1,i2+1,i3)
|
||||
r2_m1 = r(2*2-d1-1,i2,i3+1)
|
||||
r3_m1 = r(2*2-d1-1,i2+1,i3+1)
|
||||
r4_m1 = r(2*2-d1-1,i2-1,i3)
|
||||
r5_m1 = r(2*2-d1-1,i2,i3-1)
|
||||
r6_m1 = r(2*2-d1-1,i2-1,i3-1)
|
||||
r7_m1 = r(2*2-d1-1,i2,i3)
|
||||
r8_m1 = r(2*2-d1-1,i2-1,i3+1)
|
||||
r9_m1 = r(2*2-d1-1,i2+1,i3-1)
|
||||
|
||||
do j1=2,m1j-1
|
||||
i1 = 2*j1-d1
|
||||
r1_p1 = r(i1+1,i2+1,i3)
|
||||
r2_p1 = r(i1+1,i2,i3+1)
|
||||
r3_p1 = r(i1+1,i2+1,i3+1)
|
||||
r4_p1 = r(i1+1,i2-1,i3)
|
||||
r5_p1 = r(i1+1,i2,i3-1)
|
||||
r6_p1 = r(i1+1,i2-1,i3-1)
|
||||
r7_p1 = r(i1+1,i2,i3)
|
||||
r8_p1 = r(i1+1,i2-1,i3+1)
|
||||
r9_p1 = r(i1+1,i2+1,i3-1)
|
||||
|
||||
y2 = r(i1, i2-1,i3-1) + r(i1, i2-1,i3+1)
|
||||
& + r(i1, i2+1,i3-1) + r(i1, i2+1,i3+1)
|
||||
x2 = r(i1, i2-1,i3 ) + r(i1, i2+1,i3 )
|
||||
& + r(i1, i2, i3-1) + r(i1, i2, i3+1)
|
||||
s(j1,j2,j3) =
|
||||
& 0.5D0 * r(i1,i2,i3)
|
||||
& + 0.25D0 * ( r7_m1 + r7_p1 + x2)
|
||||
& + 0.125D0 * ( r4_m1 + r1_m1 + r5_m1 + r2_m1 +
|
||||
& r4_p1 + r1_p1 + r5_p1 + r2_p1 + y2)
|
||||
& + 0.0625D0 * ( r6_m1 + r8_m1
|
||||
& + r9_m1 + r3_m1 + r6_p1 + r8_p1 + r9_p1 + r3_p1)
|
||||
r1_m1 = r1_p1
|
||||
r2_m1 = r2_p1
|
||||
r3_m1 = r3_p1
|
||||
r4_m1 = r4_p1
|
||||
r5_m1 = r5_p1
|
||||
r6_m1 = r6_p1
|
||||
r7_m1 = r7_p1
|
||||
r8_m1 = r8_p1
|
||||
r9_m1 = r9_p1
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ END REGION
|
||||
|
||||
else
|
||||
!DVM$ REGION
|
||||
!DVM$ PARALLEL (j3,j1) ON s(j1,*,j3), SHADOW_RENEW(r(CORNER)),
|
||||
!DVM$& cuda_block(32,6), PRIVATE(i1, i2, i3, j2,
|
||||
!DVM$& r1_m1,r1_p1, r2_m1,r2_p1, r3_m1,r3_p1, r4_m1,r4_p1, r5_m1,r5_p1
|
||||
!DVM$&,r6_m1,r6_p1, r7_m1,r7_p1, r8_m1,r8_p1, r9_m1,r9_p1)
|
||||
do j3=2,m3j-1
|
||||
do j1=2,m1j-1
|
||||
i3 = 2*j3-d3
|
||||
i1 = 2*j1-d1
|
||||
|
||||
r1_m1 = r(i1,2*2-d2-1,i3)
|
||||
r2_m1 = r(i1-1,2*2-d2-1,i3)
|
||||
r3_m1 = r(i1+1,2*2-d2-1,i3)
|
||||
r4_m1 = r(i1,2*2-d2-1,i3+1)
|
||||
r5_m1 = r(i1,2*2-d2-1,i3-1)
|
||||
r6_m1 = r(i1+1,2*2-d2-1,i3+1)
|
||||
r7_m1 = r(i1+1,2*2-d2-1,i3-1)
|
||||
r8_m1 = r(i1-1,2*2-d2-1,i3-1)
|
||||
r9_m1 = r(i1-1,2*2-d2-1,i3+1)
|
||||
do j2=2,m2j-1
|
||||
i2 = 2*j2-d2
|
||||
r1_p1 = r(i1,i2+1,i3)
|
||||
r2_p1 = r(i1-1,i2+1,i3)
|
||||
r3_p1 = r(i1+1,i2+1,i3)
|
||||
r4_p1 = r(i1,i2+1,i3+1)
|
||||
r5_p1 = r(i1,i2+1,i3-1)
|
||||
r6_p1 = r(i1+1,i2+1,i3+1)
|
||||
r7_p1 = r(i1+1,i2+1,i3-1)
|
||||
r8_p1 = r(i1-1,i2+1,i3-1)
|
||||
r9_p1 = r(i1-1,i2+1,i3+1)
|
||||
s(j1,j2,j3) =
|
||||
& 0.5D0 * r(i1,i2,i3)
|
||||
& + 0.25D0 * ( r1_m1 + r1_p1 +
|
||||
& r(i1-1,i2,i3) + r(i1+1,i2,i3)
|
||||
& + r(i1,i2,i3-1) + r(i1,i2,i3+1))
|
||||
& + 0.125D0 * (
|
||||
& r2_m1 + r2_p1 + r3_m1 + r3_p1
|
||||
& + r5_m1 + r4_m1
|
||||
& + r5_p1 + r4_p1
|
||||
& + r(i1-1,i2, i3-1) + r(i1-1,i2, i3+1)
|
||||
& + r(i1+1,i2, i3-1) + r(i1+1,i2, i3+1))
|
||||
& + 0.0625D0 * (
|
||||
& r8_m1 + r9_m1
|
||||
& + r8_p1 + r9_p1
|
||||
& + r7_m1 + r6_m1
|
||||
& + r7_p1 + r6_p1)
|
||||
|
||||
r1_m1 = r1_p1
|
||||
r2_m1 = r2_p1
|
||||
r3_m1 = r3_p1
|
||||
r4_m1 = r4_p1
|
||||
r5_m1 = r5_p1
|
||||
r6_m1 = r6_p1
|
||||
r7_m1 = r7_p1
|
||||
r8_m1 = r8_p1
|
||||
r9_m1 = r9_p1
|
||||
enddo
|
||||
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ END REGION
|
||||
endif
|
||||
!DVM$ end interval
|
||||
j = k-1
|
||||
call comm3(s,m1j,m2j,m3j,j)
|
||||
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,226 @@
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> V-<2D><><EFBFBD><EFBFBD><EFBFBD>
|
||||
c @param integer d1, d2, d3 - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
c @param integer k - <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
|
||||
c---------------------------------------------------------------------
|
||||
subroutine dvmAlignDownCycle(d1, d2, d3, k)
|
||||
|
||||
implicit none
|
||||
|
||||
include 'globals.h'
|
||||
include 'dvmvars.h'
|
||||
|
||||
integer k
|
||||
integer d1, d2, d3
|
||||
|
||||
integer t1, t2, t3
|
||||
|
||||
!DVM$ TEMPLATE EXT_V_DOWN (m1(k)+1, m2(k)+1, m3(k)+1)
|
||||
!DVM$ DISTRIBUTE EXT_V_DOWN (BLOCK, BLOCK, BLOCK)
|
||||
|
||||
t1 = d1 - 1; t2 = d2 - 1; t3 = d3 - 1;
|
||||
|
||||
!DVM$ REALIGN p_curr_r_j(i,j,k) WITH EXT_V_DOWN(2*i-1, 2*j-1, 2*k-1)
|
||||
!DVM$ REALIGN p_curr_r_k(i,j,k) WITH EXT_V_DOWN(i+t1, j+t2, k+t3)
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> V-<2D><><EFBFBD><EFBFBD><EFBFBD>
|
||||
c @param integer d1, d2, d3 - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
c @param integer k - <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
|
||||
c---------------------------------------------------------------------
|
||||
subroutine dvmAlignUpCycle(k)
|
||||
|
||||
implicit none
|
||||
|
||||
include 'globals.h'
|
||||
include 'dvmvars.h'
|
||||
|
||||
integer k
|
||||
|
||||
!!DVM$ SHADOW p_curr_u_k(1:1,1:1,1:1)
|
||||
!!DVM$ SHADOW p_curr_u_j(1:1,1:1,1:1)
|
||||
|
||||
!DVM$ TEMPLATE EXT_V_UP (m1(k)+1, m2(k)+1, m3(k)+1)
|
||||
!DVM$ DISTRIBUTE EXT_V_UP (BLOCK, BLOCK, BLOCK)
|
||||
|
||||
!DVM$ REALIGN p_curr_u_j(i,j,k) WITH EXT_V_UP(2*i-1, 2*j-1, 2*k-1)
|
||||
!DVM$ REALIGN p_curr_u_k(i,j,k) WITH EXT_V_UP(i, j, k)
|
||||
!DVM$ REALIGN p_curr_r_k(i,j,k) WITH EXT_V_UP(i, j, k)
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> DVM
|
||||
c @param integer isSecond - 1 <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
c---------------------------------------------------------------------
|
||||
subroutine setupDVM(isSecond)
|
||||
implicit none
|
||||
|
||||
include 'globals.h'
|
||||
include 'dvmvars.h'
|
||||
|
||||
integer allocate, k, j, isSecond
|
||||
integer pdim(3)
|
||||
integer d1, d2, d3
|
||||
|
||||
!DVM$ TEMPLATE EXT_BOTTOM (m1(lb), m2(lb), m3(lb))
|
||||
!DVM$ DISTRIBUTE EXT_BOTTOM (BLOCK, BLOCK, BLOCK)
|
||||
|
||||
!DVM$ TEMPLATE EXT_LAST (m1(lt)+1, m2(lt)+1, m3(lt)+1)
|
||||
!DVM$ DISTRIBUTE EXT_LAST (BLOCK, BLOCK, BLOCK)
|
||||
|
||||
if (isSecond .eq. 0) then
|
||||
! <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
do k = lt, 1, -1
|
||||
pdim =(/ m1(k), m2(k), m3(k) /)
|
||||
p_u_ir(k) = allocate(pdim, ir(k))
|
||||
p_r_ir(k) = allocate(pdim, ir(k))
|
||||
enddo
|
||||
|
||||
! <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
pdim =(/ m1(lt), m2(lt), m3(lt) /)
|
||||
pv = allocate(pdim, 1)
|
||||
|
||||
! <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
pu = p_u_ir(lt)
|
||||
pr = p_r_ir(lt)
|
||||
endif
|
||||
|
||||
! <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD>
|
||||
p_curr_r_j = pr
|
||||
do k= lt, lb+1 , -1
|
||||
j = k-1
|
||||
p_curr_r_k = p_curr_r_j
|
||||
p_curr_r_j = p_r_ir(j)
|
||||
|
||||
if(m1(k).eq.3)then; d1 = 2; else; d1 = 1; endif
|
||||
if(m2(k).eq.3)then; d2 = 2; else; d2 = 1; endif
|
||||
if(m3(k).eq.3)then; d3 = 2; else; d3 = 1; endif
|
||||
|
||||
call dvmAlignDownCycle(d1, d2, d3, k)
|
||||
enddo
|
||||
|
||||
! <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD>
|
||||
k = lb
|
||||
p_curr_u_k = p_u_ir(k)
|
||||
p_curr_r_k = p_r_ir(k)
|
||||
!DVM$ REALIGN (i, j, k) WITH EXT_BOTTOM(i,j,k) :: p_curr_u_k
|
||||
!DVM$ REALIGN (i, j, k) WITH EXT_BOTTOM(i,j,k) :: p_curr_r_k
|
||||
|
||||
! <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
do k = lb+1, lt-1
|
||||
j = k-1
|
||||
|
||||
p_curr_u_j = p_curr_u_k
|
||||
p_curr_u_k = p_u_ir(k)
|
||||
p_curr_r_k = p_r_ir(k)
|
||||
|
||||
call dvmAlignUpCycle(k)
|
||||
enddo
|
||||
|
||||
! <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
j = lt - 1
|
||||
k = lt
|
||||
|
||||
p_curr_u_j = p_u_ir(j)
|
||||
|
||||
!DVM$ REALIGN p_curr_u_j(i,j,k) WITH EXT_LAST(2*i-1, 2*j-1, 2*k-1)
|
||||
!DVM$ REALIGN pu(i,j,k) WITH EXT_LAST(i, j, k)
|
||||
!DVM$ REALIGN pr(i,j,k) WITH EXT_LAST(i, j, k)
|
||||
!DVM$ REALIGN pv(i,j,k) WITH EXT_LAST(i, j, k)
|
||||
|
||||
end subroutine setupDVM
|
||||
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> DVM
|
||||
c---------------------------------------------------------------------
|
||||
function allocate(dims, disp)
|
||||
integer allocate
|
||||
allocate = disp
|
||||
return
|
||||
end function allocate
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
c---------------------------------------------------------------------
|
||||
subroutine setup(n1,n2,n3,k)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
implicit none
|
||||
|
||||
include 'globals.h'
|
||||
|
||||
integer is1, is2, is3, ie1, ie2, ie3
|
||||
common /grid/ is1,is2,is3,ie1,ie2,ie3
|
||||
|
||||
integer n1,n2,n3,k
|
||||
integer j
|
||||
|
||||
integer ax, mi(3,maxlevel)
|
||||
integer ng(3,maxlevel)
|
||||
|
||||
ng(1,lt) = nx(lt)
|
||||
ng(2,lt) = ny(lt)
|
||||
ng(3,lt) = nz(lt)
|
||||
do ax=1,3
|
||||
do k=lt-1,1,-1
|
||||
ng(ax,k) = ng(ax,k+1)/2
|
||||
enddo
|
||||
enddo
|
||||
61 format(10i4)
|
||||
do k=lt,1,-1
|
||||
nx(k) = ng(1,k)
|
||||
ny(k) = ng(2,k)
|
||||
nz(k) = ng(3,k)
|
||||
enddo
|
||||
|
||||
do k = lt,1,-1
|
||||
do ax = 1,3
|
||||
mi(ax,k) = 2 + ng(ax,k)
|
||||
enddo
|
||||
|
||||
m1(k) = mi(1,k)
|
||||
m2(k) = mi(2,k)
|
||||
m3(k) = mi(3,k)
|
||||
|
||||
enddo
|
||||
|
||||
k = lt
|
||||
is1 = 2 + ng(1,k) - ng(1,lt)
|
||||
ie1 = 1 + ng(1,k)
|
||||
n1 = 3 + ie1 - is1
|
||||
is2 = 2 + ng(2,k) - ng(2,lt)
|
||||
ie2 = 1 + ng(2,k)
|
||||
n2 = 3 + ie2 - is2
|
||||
is3 = 2 + ng(3,k) - ng(3,lt)
|
||||
ie3 = 1 + ng(3,k)
|
||||
n3 = 3 + ie3 - is3
|
||||
|
||||
|
||||
ir(lt)=1
|
||||
do j = lt-1, 1, -1
|
||||
ir(j)=ir(j+1)+one*m1(j+1)*m2(j+1)*m3(j+1)
|
||||
enddo
|
||||
|
||||
if( debug_vec(1) .ge. 1 )then
|
||||
write(*,*)' in setup, '
|
||||
write(*,*)' k lt nx ny nz ',
|
||||
& ' n1 n2 n3 is1 is2 is3 ie1 ie2 ie3'
|
||||
write(*,9) k,lt,ng(1,k),ng(2,k),ng(3,k),
|
||||
& n1,n2,n3,is1,is2,is3,ie1,ie2,ie3
|
||||
9 format(15i4)
|
||||
endif
|
||||
|
||||
k = lt
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
@@ -0,0 +1,415 @@
|
||||
c---------------------------------------------------------------------
|
||||
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
c---------------------------------------------------------------------
|
||||
c @param double precission :: z(n1 ,n2 ,n3) - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
c---------------------------------------------------------------------
|
||||
subroutine zero3(z,n1,n2,n3)
|
||||
c---------------------------------------------------------------------
|
||||
!DVM$ INHERIT z
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
implicit none
|
||||
|
||||
integer n1, n2, n3
|
||||
double precision z(n1,n2,n3)
|
||||
integer i1, i2, i3
|
||||
|
||||
!!DVM$ INTERVAL 3
|
||||
!DVM$ REGION
|
||||
!DVM$ PARALLEL (i3,i2,i1) ON z(i1,i2,i3), PRIVATE(i3,i2,i1)
|
||||
!DVM$&, cuda_block(32,6)
|
||||
do i3=1,n3
|
||||
do i2=1,n2
|
||||
do i1=1,n1
|
||||
z(i1,i2,i3)=0.0D0
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ END REGION
|
||||
!!DVM$ END INTERVAL
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
|
||||
c----- end of program ------------------------------------------------
|
||||
|
||||
c -- DEBUG --------------------------------------------------------
|
||||
|
||||
subroutine printMatrix(a,n1,n2,n3)
|
||||
c---------------------------------------------------------------------
|
||||
!DVM$ INHERIT a
|
||||
integer n1,n2,n3,i1,i2,i3
|
||||
double precision a(n1,n2,n3), z(n2)
|
||||
integer m1, m2, m3
|
||||
|
||||
write(*,*) 'MATRIX ------------------'
|
||||
write(*,*) a
|
||||
write(*,*) ' '
|
||||
! m1 = min(n1,18)
|
||||
! m2 = min(n2,14)
|
||||
! m3 = min(n3,18)
|
||||
|
||||
! write(*,*)' '
|
||||
! do i3=1,m3
|
||||
! do i1=1,m1
|
||||
! do i2=1,m2
|
||||
! z(i2) = a(i1,i2,i3)
|
||||
! enddo
|
||||
! write(*,6)(z(i2),i2=1,m2)
|
||||
! enddo
|
||||
! write(*,*)' - - - - - - - '
|
||||
! enddo
|
||||
! write(*,*)' '
|
||||
! 6 format(15f6.3)
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
subroutine printMatrixNN(a,n1,n2,n3)
|
||||
c---------------------------------------------------------------------
|
||||
!DVM$ INHERIT a
|
||||
integer n1, n2, n3
|
||||
double precision a(n1,n2,n3), z
|
||||
|
||||
do i3=1,n3
|
||||
do i2=1,n2
|
||||
do i1=1,n1
|
||||
z = a(i1,i2,i3)
|
||||
if (z.ne.0) then
|
||||
write(*,*) '(',i1,',',i2,',',i3,')=',z
|
||||
endif
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
end
|
||||
|
||||
|
||||
c -- EXTERNAL --------------------------------------------------------
|
||||
|
||||
subroutine timer_clear(n)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
implicit none
|
||||
integer n
|
||||
|
||||
double precision start(64), elapsed(64)
|
||||
common /tt/ start, elapsed
|
||||
|
||||
elapsed(n) = 0.0
|
||||
return
|
||||
end
|
||||
|
||||
subroutine timer_start(n)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
implicit none
|
||||
external elapsed_time
|
||||
double precision elapsed_time
|
||||
integer n
|
||||
double precision start(64), elapsed(64)
|
||||
common /tt/ start, elapsed
|
||||
|
||||
start(n) = elapsed_time()
|
||||
return
|
||||
end
|
||||
|
||||
subroutine timer_stop(n)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
implicit none
|
||||
external elapsed_time
|
||||
double precision elapsed_time
|
||||
integer n
|
||||
double precision start(64), elapsed(64)
|
||||
common /tt/ start, elapsed
|
||||
double precision t, now
|
||||
|
||||
now = elapsed_time()
|
||||
t = now - start(n)
|
||||
elapsed(n) = elapsed(n) + t
|
||||
return
|
||||
end
|
||||
|
||||
|
||||
double precision function timer_read(n)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
implicit none
|
||||
integer n
|
||||
double precision start(64), elapsed(64)
|
||||
common /tt/ start, elapsed
|
||||
|
||||
timer_read = elapsed(n)
|
||||
return
|
||||
end
|
||||
|
||||
double precision function elapsed_time()
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
implicit none
|
||||
|
||||
double precision t
|
||||
double precision dvtime
|
||||
|
||||
data t/0.d0/
|
||||
c This function must measure wall clock time, not CPU time.
|
||||
c Since there is no portable timer in Fortran (77)
|
||||
c we call a routine compiled in C (though the C source may have
|
||||
c to be tweaked).
|
||||
t = dvtime()
|
||||
c The following is not ok for "official" results because it reports
|
||||
c CPU time not wall clock time. It may be useful for developing/testing
|
||||
c on timeshared Crays, though.
|
||||
c call second(t)
|
||||
|
||||
elapsed_time = t
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
subroutine print_results(name, class, n1, n2, n3, niter,
|
||||
> t, mops, optype, verified, npbversion)
|
||||
c ,compiletime, cs1, cs2, cs3, cs4, cs5, cs6, cs7)
|
||||
|
||||
implicit none
|
||||
character*2 name
|
||||
character*1 class
|
||||
integer n1, n2, n3, niter, j
|
||||
double precision t, mops
|
||||
character optype*24, size*13
|
||||
logical verified
|
||||
character*5 npbversion
|
||||
c > , compiletime, cs1, cs2, cs3, cs4, cs5, cs6, cs7
|
||||
|
||||
write (*, 2) name
|
||||
2 format(//, ' ', A2, ' Benchmark Completed.')
|
||||
|
||||
write (*, 3) Class
|
||||
3 format(' Class = ', 12x, a12)
|
||||
|
||||
c If this is not a grid-based problem (EP, FT, CG), then
|
||||
c we only print n1, which contains some measure of the
|
||||
c problem size. In that case, n2 and n3 are both zero.
|
||||
c Otherwise, we print the grid size n1xn2xn3
|
||||
|
||||
if ((n2 .eq. 0) .and. (n3 .eq. 0)) then
|
||||
if (name(1:2) .eq. 'EP') then
|
||||
write(size, '(f12.0)' ) 2.d0**n1
|
||||
do j =13,1,-1
|
||||
if (size(j:j) .eq. '.') size(j:j) = ' '
|
||||
end do
|
||||
write (*,42) size
|
||||
42 format(' Size = ',12x, a14)
|
||||
else
|
||||
write (*,44) n1
|
||||
44 format(' Size = ',12x, i12)
|
||||
endif
|
||||
else
|
||||
write (*, 4) n1,n2,n3
|
||||
4 format(' Size = ',12x, i3,'x',i3,'x',i3)
|
||||
endif
|
||||
|
||||
write (*, 5) niter
|
||||
5 format(' Iterations = ', 12x, i12)
|
||||
|
||||
write (*, 6) t
|
||||
6 format(' Time in seconds = ',12x, f12.2)
|
||||
|
||||
write (*,9) mops
|
||||
9 format(' Mop/s total = ',12x, f12.2)
|
||||
|
||||
write(*, 11) optype
|
||||
11 format(' Operation type = ', a24)
|
||||
|
||||
if (verified) then
|
||||
write(*,12) ' SUCCESSFUL'
|
||||
else
|
||||
write(*,12) 'UNSUCCESSFUL'
|
||||
endif
|
||||
12 format(' Verification = ', 12x, a)
|
||||
|
||||
write(*,13) npbversion
|
||||
13 format(' Version = ', 12x, a12)
|
||||
|
||||
c write(*,14) compiletime
|
||||
c 14 format(' Compile date = ', 12x, a12)
|
||||
|
||||
|
||||
c write (*,121) cs1
|
||||
c 121 format(/, ' Compile options:', /,
|
||||
c > ' F77 = ', A)
|
||||
|
||||
c write (*,122) cs2
|
||||
c 122 format(' FLINK = ', A)
|
||||
|
||||
c write (*,123) cs3
|
||||
c 123 format(' F_LIB = ', A)
|
||||
c
|
||||
c write (*,124) cs4
|
||||
c 124 format(' F_INC = ', A)
|
||||
c
|
||||
c write (*,125) cs5
|
||||
c 125 format(' FFLAGS = ', A)
|
||||
c
|
||||
c write (*,126) cs6
|
||||
c 126 format(' FLINKFLAGS = ', A)
|
||||
c
|
||||
c write(*, 127) cs7
|
||||
c 127 format(' RAND = ', A)
|
||||
|
||||
write (*,130)
|
||||
130 format(//' Please send the results of this run to:'//
|
||||
> ' NPB Development Team '/
|
||||
> ' Internet: npb@nas.nasa.gov'/
|
||||
> ' '/
|
||||
> ' If email is not available, send this to:'//
|
||||
> ' MS T27A-1'/
|
||||
> ' NASA Ames Research Center'/
|
||||
> ' Moffett Field, CA 94035-1000'//
|
||||
> ' Fax: 415-604-3957'//)
|
||||
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
|
||||
double precision function randlc (x, a)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c
|
||||
c This routine returns a uniform pseudorandom double precision number in the
|
||||
c range (0, 1) by using the linear congruential generator
|
||||
c
|
||||
c x_{k+1} = a x_k (mod 2^46)
|
||||
c
|
||||
c where 0 < x_k < 2^46 and 0 < a < 2^46. This scheme generates 2^44 numbers
|
||||
c before repeating. The argument A is the same as 'a' in the above formula,
|
||||
c and X is the same as x_0. A and X must be odd double precision integers
|
||||
c in the range (1, 2^46). The returned value RANDLC is normalized to be
|
||||
c between 0 and 1, i.e. RANDLC = 2^(-46) * x_1. X is updated to contain
|
||||
c the new seed x_1, so that subsequent calls to RANDLC using the same
|
||||
c arguments will generate a continuous sequence.
|
||||
c
|
||||
c This routine should produce the same results on any computer with at least
|
||||
c 48 mantissa bits in double precision floating point data. On 64 bit
|
||||
c systems, double precision should be disabled.
|
||||
c
|
||||
c David H. Bailey October 26, 1990
|
||||
c
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
implicit none
|
||||
|
||||
double precision r23,r46,t23,t46,a,x,t1,t2,t3,t4,a1,a2,x1,x2,z
|
||||
parameter (r23 = 0.5d0 ** 23, r46 = r23 ** 2, t23 = 2.d0 ** 23,
|
||||
> t46 = t23 ** 2)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Break A into two parts such that A = 2^23 * A1 + A2.
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
t1 = r23 * a
|
||||
a1 = int (t1)
|
||||
a2 = a - t23 * a1
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Break X into two parts such that X = 2^23 * X1 + X2, compute
|
||||
c Z = A1 * X2 + A2 * X1 (mod 2^23), and then
|
||||
c X = 2^23 * Z + A2 * X2 (mod 2^46).
|
||||
c---------------------------------------------------------------------
|
||||
t1 = r23 * x
|
||||
x1 = int (t1)
|
||||
x2 = x - t23 * x1
|
||||
t1 = a1 * x2 + a2 * x1
|
||||
t2 = int (r23 * t1)
|
||||
z = t1 - t23 * t2
|
||||
t3 = t23 * z + a2 * x2
|
||||
t4 = int (r46 * t3)
|
||||
x = t3 - t46 * t4
|
||||
randlc = r46 * x
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
subroutine vranlc (n, x, a, y)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c
|
||||
c This routine generates N uniform pseudorandom double precision numbers in
|
||||
c the range (0, 1) by using the linear congruential generator
|
||||
c
|
||||
c x_{k+1} = a x_k (mod 2^46)
|
||||
c
|
||||
c where 0 < x_k < 2^46 and 0 < a < 2^46. This scheme generates 2^44 numbers
|
||||
c before repeating. The argument A is the same as 'a' in the above formula,
|
||||
c and X is the same as x_0. A and X must be odd double precision integers
|
||||
c in the range (1, 2^46). The N results are placed in Y and are normalized
|
||||
c to be between 0 and 1. X is updated to contain the new seed, so that
|
||||
c subsequent calls to VRANLC using the same arguments will generate a
|
||||
c continuous sequence. If N is zero, only initialization is performed, and
|
||||
c the variables X, A and Y are ignored.
|
||||
c
|
||||
c This routine is the standard version designed for scalar or RISC systems.
|
||||
c However, it should produce the same results on any single processor
|
||||
c computer with at least 48 mantissa bits in double precision floating point
|
||||
c data. On 64 bit systems, double precision should be disabled.
|
||||
c
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
implicit none
|
||||
|
||||
integer i,n
|
||||
double precision y,r23,r46,t23,t46,a,x,t1,t2,t3,t4,a1,a2,x1,x2,z
|
||||
dimension y(*)
|
||||
parameter (r23 = 0.5d0 ** 23, r46 = r23 ** 2, t23 = 2.d0 ** 23,
|
||||
> t46 = t23 ** 2)
|
||||
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Break A into two parts such that A = 2^23 * A1 + A2.
|
||||
c---------------------------------------------------------------------
|
||||
t1 = r23 * a
|
||||
a1 = int (t1)
|
||||
a2 = a - t23 * a1
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Generate N results. This loop is not vectorizable.
|
||||
c---------------------------------------------------------------------
|
||||
do i = 1, n
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Break X into two parts such that X = 2^23 * X1 + X2, compute
|
||||
c Z = A1 * X2 + A2 * X1 (mod 2^23), and then
|
||||
c X = 2^23 * Z + A2 * X2 (mod 2^46).
|
||||
c---------------------------------------------------------------------
|
||||
t1 = r23 * x
|
||||
x1 = int (t1)
|
||||
x2 = x - t23 * x1
|
||||
t1 = a1 * x2 + a2 * x1
|
||||
t2 = int (r23 * t1)
|
||||
z = t1 - t23 * t2
|
||||
t3 = t23 * z + a2 * x2
|
||||
t4 = int (r46 * t3)
|
||||
x = t3 - t46 * t4
|
||||
y(i) = r46 * x
|
||||
enddo
|
||||
return
|
||||
end
|
||||
c---------------------------------------------------------------------
|
||||
@@ -0,0 +1,431 @@
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>-<2D><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
c---------------------------------------------------------------------
|
||||
c @param double precission :: z(n1 ,n2 ,n3) - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
c @param integer :: k - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
|
||||
c---------------------------------------------------------------------
|
||||
subroutine zran3(z,n1,n2,n3,nx,ny,k,class)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c zran3 loads +1 at ten randomly chosen points,
|
||||
c loads -1 at a different ten random points,
|
||||
c and zero elsewhere.
|
||||
c---------------------------------------------------------------------
|
||||
implicit none
|
||||
include 'npbparams.h'
|
||||
!DVM$ INHERIT z
|
||||
!DVM$ DYNAMIC z
|
||||
integer is1, is2, is3, ie1, ie2, ie3,i3b,i3e,i2b,i1b
|
||||
common /grid/ is1,is2,is3,ie1,ie2,ie3
|
||||
|
||||
integer n1, n2, n3, k, nx, ny, ierr, i0, m0, m1
|
||||
double precision z(n1,n2,n3),zz(2+2**ndim1)
|
||||
|
||||
character*1 class
|
||||
integer mm, i1, i2, i3, d1, e1, e2, e3,ii2,ii1
|
||||
double precision x, a, max_val, min_val
|
||||
double precision xx, x0, x1, a1, a2, ai
|
||||
parameter( mm = 10, a = 5.D0 ** 13, x = 314159265.D0)
|
||||
double precision ten( mm, 0:1 ), temp, best
|
||||
integer i, j1( mm, 0:1 ), j2( mm, 0:1 ), j3( mm, 0:1 )
|
||||
integer jg( 0:3, mm, 0:1 ), jg_temp(4)
|
||||
integer id1,id2,id3,idx1,idx2,idx3,nj,x22
|
||||
|
||||
call zero3(z,n1,n2,n3)
|
||||
|
||||
if(Class .eq. 'S') then
|
||||
j1( 10,1)= 9
|
||||
j1( 10,0)= 2
|
||||
j2( 10,1)= 3
|
||||
j2( 10,0)= 13
|
||||
j3( 10,1)= 22
|
||||
j3( 10,0)= 4
|
||||
ten( 10,1)= 0.9999958165E+00
|
||||
ten( 10,0)= 0.1621806298E-04
|
||||
j1( 9,1)= 21
|
||||
j1( 9,0)= 15
|
||||
j2( 9,1)= 31
|
||||
j2( 9,0)= 10
|
||||
j3( 9,1)= 33
|
||||
j3( 9,0)= 19
|
||||
ten( 9,1)= 0.9999389618E+00
|
||||
ten( 9,0)= 0.7495597642E-04
|
||||
j1( 8,1)= 4
|
||||
j1( 8,0)= 7
|
||||
j2( 8,1)= 2
|
||||
j2( 8,0)= 16
|
||||
j3( 8,1)= 5
|
||||
j3( 8,0)= 2
|
||||
ten( 8,1)= 0.9999174510E+00
|
||||
ten( 8,0)= 0.1889568795E-03
|
||||
j1( 7,1)= 6
|
||||
j1( 7,0)= 6
|
||||
j2( 7,1)= 24
|
||||
j2( 7,0)= 30
|
||||
j3( 7,1)= 5
|
||||
j3( 7,0)= 17
|
||||
ten( 7,1)= 0.9998666211E+00
|
||||
ten( 7,0)= 0.1958622020E-03
|
||||
j1( 6,1)= 3
|
||||
j1( 6,0)= 14
|
||||
j2( 6,1)= 18
|
||||
j2( 6,0)= 4
|
||||
j3( 6,1)= 23
|
||||
j3( 6,0)= 3
|
||||
ten( 6,1)= 0.9998273669E+00
|
||||
ten( 6,0)= 0.2522906835E-03
|
||||
j1( 5,1)= 23
|
||||
j1( 5,0)= 7
|
||||
j2( 5,1)= 33
|
||||
j2( 5,0)= 19
|
||||
j3( 5,1)= 8
|
||||
j3( 5,0)= 10
|
||||
ten( 5,1)= 0.9997817402E+00
|
||||
ten( 5,0)= 0.2966875037E-03
|
||||
j1( 4,1)= 14
|
||||
j1( 4,0)= 22
|
||||
j2( 4,1)= 17
|
||||
j2( 4,0)= 21
|
||||
j3( 4,1)= 14
|
||||
j3( 4,0)= 13
|
||||
ten( 4,1)= 0.9997789044E+00
|
||||
ten( 4,0)= 0.3082809722E-03
|
||||
j1( 3,1)= 32
|
||||
j1( 3,0)= 28
|
||||
j2( 3,1)= 6
|
||||
j2( 3,0)= 17
|
||||
j3( 3,1)= 27
|
||||
j3( 3,0)= 33
|
||||
ten( 3,1)= 0.9997405518E+00
|
||||
ten( 3,0)= 0.3944731504E-03
|
||||
j1( 2,1)= 30
|
||||
j1( 2,0)= 10
|
||||
j2( 2,1)= 2
|
||||
j2( 2,0)= 27
|
||||
j3( 2,1)= 30
|
||||
j3( 2,0)= 24
|
||||
ten( 2,1)= 0.9997394292E+00
|
||||
ten( 2,0)= 0.4423527428E-03
|
||||
j1( 1,1)= 19
|
||||
j1( 1,0)= 9
|
||||
j2( 1,1)= 28
|
||||
j2( 1,0)= 16
|
||||
j3( 1,1)= 19
|
||||
j3( 1,0)= 28
|
||||
ten( 1,1)= 0.9996874580E+00
|
||||
ten( 1,0)= 0.4726676489E-03
|
||||
else if(class .eq. 'W') then
|
||||
j1( 10,1)= 115
|
||||
j1( 10,0)= 17
|
||||
j2( 10,1)= 87
|
||||
j2( 10,0)= 24
|
||||
j3( 10,1)= 52
|
||||
j3( 10,0)= 125
|
||||
ten( 10,1)= 0.9999999670E+00
|
||||
ten( 10,0)= 0.4097578454E-06
|
||||
j1( 9,1)= 129
|
||||
j1( 9,0)= 104
|
||||
j2( 9,1)= 47
|
||||
j2( 9,0)= 119
|
||||
j3( 9,1)= 34
|
||||
j3( 9,0)= 61
|
||||
ten( 9,1)= 0.9999996061E+00
|
||||
ten( 9,0)= 0.9408003763E-06
|
||||
j1( 8,1)= 16
|
||||
j1( 8,0)= 16
|
||||
j2( 8,1)= 112
|
||||
j2( 8,0)= 123
|
||||
j3( 8,1)= 120
|
||||
j3( 8,0)= 77
|
||||
ten( 8,1)= 0.9999987379E+00
|
||||
ten( 8,0)= 0.1220169409E-05
|
||||
j1( 7,1)= 36
|
||||
j1( 7,0)= 111
|
||||
j2( 7,1)= 23
|
||||
j2( 7,0)= 89
|
||||
j3( 7,1)= 102
|
||||
j3( 7,0)= 123
|
||||
ten( 7,1)= 0.9999973226E+00
|
||||
ten( 7,0)= 0.1432884929E-05
|
||||
j1( 6,1)= 31
|
||||
j1( 6,0)= 11
|
||||
j2( 6,1)= 19
|
||||
j2( 6,0)= 3
|
||||
j3( 6,1)= 111
|
||||
j3( 6,0)= 23
|
||||
ten( 6,1)= 0.9999970764E+00
|
||||
ten( 6,0)= 0.1917141063E-05
|
||||
j1( 5,1)= 29
|
||||
j1( 5,0)= 97
|
||||
j2( 5,1)= 50
|
||||
j2( 5,0)= 36
|
||||
j3( 5,1)= 13
|
||||
j3( 5,0)= 56
|
||||
ten( 5,1)= 0.9999968171E+00
|
||||
ten( 5,0)= 0.2780729588E-05
|
||||
j1( 4,1)= 82
|
||||
j1( 4,0)= 40
|
||||
j2( 4,1)= 92
|
||||
j2( 4,0)= 128
|
||||
j3( 4,1)= 22
|
||||
j3( 4,0)= 14
|
||||
ten( 4,1)= 0.9999964096E+00
|
||||
ten( 4,0)= 0.3077687282E-05
|
||||
j1( 3,1)= 28
|
||||
j1( 3,0)= 94
|
||||
j2( 3,1)= 86
|
||||
j2( 3,0)= 85
|
||||
j3( 3,1)= 75
|
||||
j3( 3,0)= 37
|
||||
ten( 3,1)= 0.9999960890E+00
|
||||
ten( 3,0)= 0.3419091698E-05
|
||||
j1( 2,1)= 41
|
||||
j1( 2,0)= 72
|
||||
j2( 2,1)= 34
|
||||
j2( 2,0)= 4
|
||||
j3( 2,1)= 3
|
||||
j3( 2,0)= 66
|
||||
ten( 2,1)= 0.9999958165E+00
|
||||
ten( 2,0)= 0.3899679498E-05
|
||||
j1( 1,1)= 117
|
||||
j1( 1,0)= 116
|
||||
j2( 1,1)= 88
|
||||
j2( 1,0)= 105
|
||||
j3( 1,1)= 22
|
||||
j3( 1,0)= 7
|
||||
ten( 1,1)= 0.9999953932E+00
|
||||
ten( 1,0)= 0.4564590384E-05
|
||||
else if(class .eq. 'A' .or. class .eq. 'B') then
|
||||
j1( 10,1)= 54
|
||||
j1( 10,0)= 223
|
||||
j2( 10,1)= 209
|
||||
j2( 10,0)= 42
|
||||
j3( 10,1)= 40
|
||||
j3( 10,0)= 240
|
||||
ten( 10,1)= 0.9999999811E+00
|
||||
ten( 10,0)= 0.1058528198E-07
|
||||
j1( 9,1)= 243
|
||||
j1( 9,0)= 154
|
||||
j2( 9,1)= 172
|
||||
j2( 9,0)= 162
|
||||
j3( 9,1)= 14
|
||||
j3( 9,0)= 36
|
||||
ten( 9,1)= 0.9999999670E+00
|
||||
ten( 9,0)= 0.6491002580E-07
|
||||
j1( 8,1)= 203
|
||||
j1( 8,0)= 82
|
||||
j2( 8,1)= 18
|
||||
j2( 8,0)= 184
|
||||
j3( 8,1)= 198
|
||||
j3( 8,0)= 255
|
||||
ten( 8,1)= 0.9999999092E+00
|
||||
ten( 8,0)= 0.1261776816E-06
|
||||
j1( 7,1)= 202
|
||||
j1( 7,0)= 250
|
||||
j2( 7,1)= 83
|
||||
j2( 7,0)= 170
|
||||
j3( 7,1)= 209
|
||||
j3( 7,0)= 157
|
||||
ten( 7,1)= 0.9999999006E+00
|
||||
ten( 7,0)= 0.2087648028E-06
|
||||
j1( 6,1)= 115
|
||||
j1( 6,0)= 199
|
||||
j2( 6,1)= 123
|
||||
j2( 6,0)= 7
|
||||
j3( 6,1)= 207
|
||||
j3( 6,0)= 203
|
||||
ten( 6,1)= 0.9999998605E+00
|
||||
ten( 6,0)= 0.3218575699E-06
|
||||
j1( 5,1)= 212
|
||||
j1( 5,0)= 92
|
||||
j2( 5,1)= 7
|
||||
j2( 5,0)= 63
|
||||
j3( 5,1)= 248
|
||||
j3( 5,0)= 205
|
||||
ten( 5,1)= 0.9999998070E+00
|
||||
ten( 5,0)= 0.3231413785E-06
|
||||
j1( 4,1)= 45
|
||||
j1( 4,0)= 17
|
||||
j2( 4,1)= 194
|
||||
j2( 4,0)= 205
|
||||
j3( 4,1)= 234
|
||||
j3( 4,0)= 32
|
||||
ten( 4,1)= 0.9999997641E+00
|
||||
ten( 4,0)= 0.4097578454E-06
|
||||
j1( 3,1)= 176
|
||||
j1( 3,0)= 101
|
||||
j2( 3,1)= 246
|
||||
j2( 3,0)= 156
|
||||
j3( 3,1)= 164
|
||||
j3( 3,0)= 59
|
||||
ten( 3,1)= 0.9999997464E+00
|
||||
ten( 3,0)= 0.4272763050E-06
|
||||
j1( 2,1)= 5
|
||||
j1( 2,0)= 102
|
||||
j2( 2,1)= 118
|
||||
j2( 2,0)= 138
|
||||
j3( 2,1)= 175
|
||||
j3( 2,0)= 112
|
||||
ten( 2,1)= 0.9999997340E+00
|
||||
ten( 2,0)= 0.4331109977E-06
|
||||
j1( 1,1)= 57
|
||||
j1( 1,0)= 211
|
||||
j2( 1,1)= 120
|
||||
j2( 1,0)= 154
|
||||
j3( 1,1)= 167
|
||||
j3( 1,0)= 98
|
||||
ten( 1,1)= 0.9999996868E+00
|
||||
ten( 1,0)= 0.4353645551E-06
|
||||
else if(class .eq. 'C') then
|
||||
j1( 10,1)= 310
|
||||
j1( 10,0)= 399
|
||||
j2( 10,1)= 361
|
||||
j2( 10,0)= 312
|
||||
j3( 10,1)= 11
|
||||
j3( 10,0)= 200
|
||||
ten( 10,1)= 0.9999999811E+00
|
||||
ten( 10,0)= 0.6358860105E-08
|
||||
j1( 9,1)= 11
|
||||
j1( 9,0)= 96
|
||||
j2( 9,1)= 493
|
||||
j2( 9,0)= 401
|
||||
j3( 9,1)= 118
|
||||
j3( 9,0)= 238
|
||||
ten( 9,1)= 0.9999999808E+00
|
||||
ten( 9,0)= 0.7946667324E-08
|
||||
j1( 8,1)= 451
|
||||
j1( 8,0)= 223
|
||||
j2( 8,1)= 270
|
||||
j2( 8,0)= 278
|
||||
j3( 8,1)= 443
|
||||
j3( 8,0)= 61
|
||||
ten( 8,1)= 0.9999999778E+00
|
||||
ten( 8,0)= 0.1058528198E-07
|
||||
j1( 7,1)= 149
|
||||
j1( 7,0)= 344
|
||||
j2( 7,1)= 117
|
||||
j2( 7,0)= 139
|
||||
j3( 7,1)= 199
|
||||
j3( 7,0)= 168
|
||||
ten( 7,1)= 0.9999999700E+00
|
||||
ten( 7,0)= 0.2456904724E-07
|
||||
j1( 6,1)= 243
|
||||
j1( 6,0)= 383
|
||||
j2( 6,1)= 87
|
||||
j2( 6,0)= 74
|
||||
j3( 6,1)= 5
|
||||
j3( 6,0)= 283
|
||||
ten( 6,1)= 0.9999999670E+00
|
||||
ten( 6,0)= 0.2954460854E-07
|
||||
j1( 5,1)= 509
|
||||
j1( 5,0)= 352
|
||||
j2( 5,1)= 43
|
||||
j2( 5,0)= 194
|
||||
j3( 5,1)= 127
|
||||
j3( 5,0)= 418
|
||||
ten( 5,1)= 0.9999999666E+00
|
||||
ten( 5,0)= 0.4643648310E-07
|
||||
j1( 4,1)= 163
|
||||
j1( 4,0)= 18
|
||||
j2( 4,1)= 280
|
||||
j2( 4,0)= 21
|
||||
j3( 4,1)= 75
|
||||
j3( 4,0)= 457
|
||||
ten( 4,1)= 0.9999999358E+00
|
||||
ten( 4,0)= 0.4987107616E-07
|
||||
j1( 3,1)= 146
|
||||
j1( 3,0)= 154
|
||||
j2( 3,1)= 93
|
||||
j2( 3,0)= 338
|
||||
j3( 3,1)= 312
|
||||
j3( 3,0)= 10
|
||||
ten( 3,1)= 0.9999999149E+00
|
||||
ten( 3,0)= 0.6491002580E-07
|
||||
j1( 2,1)= 203
|
||||
j1( 2,0)= 402
|
||||
j2( 2,1)= 10
|
||||
j2( 2,0)= 504
|
||||
j3( 2,1)= 51
|
||||
j3( 2,0)= 449
|
||||
ten( 2,1)= 0.9999999092E+00
|
||||
ten( 2,0)= 0.6990178747E-07
|
||||
j1( 1,1)= 151
|
||||
j1( 1,0)= 74
|
||||
j2( 1,1)= 401
|
||||
j2( 1,0)= 2
|
||||
j3( 1,1)= 331
|
||||
j3( 1,0)= 107
|
||||
ten( 1,1)= 0.9999999069E+00
|
||||
ten( 1,0)= 0.8774652827E-07
|
||||
endif
|
||||
|
||||
i1 = mm
|
||||
i0 = mm
|
||||
do i=mm,1,-1
|
||||
|
||||
best = 0.d0
|
||||
if(best .lt. ten( i1, 1 ))then
|
||||
jg( 0, i, 1) = 0
|
||||
jg( 1, i, 1) = is1 - 2 + j1( i1, 1 )
|
||||
jg( 2, i, 1) = is2 - 2 + j2( i1, 1 )
|
||||
jg( 3, i, 1) = is3 - 2 + j3( i1, 1 )
|
||||
i1 = i1-1
|
||||
else
|
||||
jg( 0, i, 1) = 0
|
||||
jg( 1, i, 1) = 0
|
||||
jg( 2, i, 1) = 0
|
||||
jg( 3, i, 1) = 0
|
||||
endif
|
||||
|
||||
best = 1.d0
|
||||
if(best .gt. ten( i0, 0 ))then
|
||||
jg( 0, i, 0) = 0
|
||||
jg( 1, i, 0) = is1 - 2 + j1( i0, 0 )
|
||||
jg( 2, i, 0) = is2 - 2 + j2( i0, 0 )
|
||||
jg( 3, i, 0) = is3 - 2 + j3( i0, 0 )
|
||||
i0 = i0-1
|
||||
else
|
||||
jg( 0, i, 0) = 0
|
||||
jg( 1, i, 0) = 0
|
||||
jg( 2, i, 0) = 0
|
||||
jg( 3, i, 0) = 0
|
||||
endif
|
||||
|
||||
enddo
|
||||
|
||||
!DVM$ region
|
||||
!DVM$ PARALLEL (i3,i2,i1) ON z(i1,i2,i3), private(i),cuda_block(32,6)
|
||||
do i3=1,n3
|
||||
do i2=1,n2
|
||||
do i1=1,n1
|
||||
z(i1,i2,i3) = 0.0D0
|
||||
do i = mm,1,-1
|
||||
if(i1 .eq. jg(1,i,0) .and. i2 .eq. jg(2,i,0)
|
||||
& .and. i3 .eq. jg(3,i,0)) then
|
||||
z(i1,i2,i3) = -1.0D0
|
||||
endif
|
||||
if(i1 .eq. jg(1,i,1) .and. i2 .eq. jg(2,i,1)
|
||||
& .and. i3 .eq. jg(3,i,1)) then
|
||||
z(i1,i2,i3) = 1.0D0
|
||||
endif
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!DVM$ end region
|
||||
|
||||
|
||||
call comm3(z,n1,n2,n3,k)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c call showall(z,n1,n2,n3)
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
@@ -0,0 +1,4 @@
|
||||
integer dvm_debug
|
||||
C dvm_debug = 0 - standard mode, dvm_debug > 0 - debugging mode
|
||||
parameter (dvm_debug=0)
|
||||
|
||||
@@ -0,0 +1,21 @@
|
||||
c---------------------------------------------------------------------
|
||||
c FDVM specifications
|
||||
c---------------------------------------------------------------------
|
||||
!! integer pv,pv1,pu1,pr1,pu(maxlevel),pr(maxlevel),pus,pus1
|
||||
integer psize(3),pdim
|
||||
common /pointers/ pv,pu1,pr1,pu,pr,pv1,pus,pus1
|
||||
common/processors/ psize,pdim
|
||||
DOUBLE PRECISION,POINTER,DIMENSION(:,:,:) ::
|
||||
& pv,pu1,pr1,pv1,pus,pus1
|
||||
!! DOUBLE PRECISION,POINTER,DIMENSION(:,:,:) ::
|
||||
!! & pu(maxlevel),pr(maxlevel)
|
||||
CDVM$ TEMPLATE tmp (1+2**(lt+1),1+2**(lt+1),1+2**(lt+1))
|
||||
CDVM$ DISTRIBUTE tmp (*,*,BLOCK)
|
||||
CDVM$ ALIGN :: pr1,pv,pu1,pus,pus1
|
||||
CDVM$ DYNAMIC pv,tmp,pus,pus1,pu1,pr1
|
||||
TYPE P
|
||||
DOUBLE PRECISION,POINTER,DIMENSION(:,:,:) :: p
|
||||
CDVM$ ALIGN :: p
|
||||
CDVM$ DYNAMIC p
|
||||
END TYPE
|
||||
TYPE(P) pu(maxlevel),pr(maxlevel)
|
||||
@@ -0,0 +1,52 @@
|
||||
c---------------------------------------------------------------------
|
||||
c Parameter lm (declared and set in "npbparams.h") is the log-base2 of
|
||||
c the edge size max for the partition on a given node, so must be changed
|
||||
c either to save space (if running a small case) or made bigger for larger
|
||||
c cases, for example, 512^3. Thus lm=7 means that the largest dimension
|
||||
c of a partition that can be solved on a node is 2^7 = 128. lm is set
|
||||
c automatically in npbparams.h
|
||||
c Parameters ndim1, ndim2, ndim3 are the local problem dimensions.
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
include 'npbparams.h'
|
||||
! nm - actual dimension including ghost cells for communications
|
||||
! nv - size of rhs array
|
||||
! nr - size of residual array
|
||||
! nm2 - size of communication buffer
|
||||
! maxlevel- maximum number of levels
|
||||
integer nm
|
||||
> , nv
|
||||
> , nr
|
||||
> , nm2
|
||||
> , maxlevel
|
||||
|
||||
parameter( nm=2+2**lm, nv=(2+2**ndim1)*(2+2**ndim2)*(2+2**ndim3) )
|
||||
parameter( nm2=2*nm*nm, maxlevel=11 )
|
||||
parameter( nr = (8*(nv+nm**2+5*nm+7*lm))/7 )
|
||||
c---------------------------------------------------------------------
|
||||
integer nx(maxlevel),ny(maxlevel),nz(maxlevel)
|
||||
common /mg3/ nx,ny,nz
|
||||
|
||||
character class
|
||||
common /ClassType/class
|
||||
|
||||
integer debug_vec(0:7)
|
||||
common /my_debug/ debug_vec
|
||||
|
||||
integer ir(maxlevel), m1(maxlevel), m2(maxlevel), m3(maxlevel)
|
||||
integer lt, lb, mi(3,maxlevel),nreq,lbdvm
|
||||
common /fap/ ir,m1,m2,m3,lt,lb,mi,nreq,lbdvm
|
||||
logical proc1
|
||||
parameter (proc1 = .TRUE.)
|
||||
c---------------------------------------------------------------------
|
||||
c Set at m=1024, can handle cases up to 1024^3 case
|
||||
c---------------------------------------------------------------------
|
||||
integer m
|
||||
parameter( m=1037 )
|
||||
|
||||
double precision buff(nm2,4)
|
||||
common /buffer/ buff
|
||||
|
||||
|
||||
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -0,0 +1,44 @@
|
||||
SHELL=/bin/sh
|
||||
BENCHMARK=sp
|
||||
BENCHMARKU=SP
|
||||
|
||||
include ../config/make.def
|
||||
include ../sys/make.common
|
||||
|
||||
SOURCES = sp.for \
|
||||
set_constants.for \
|
||||
initialize.for \
|
||||
exact_rhs.for \
|
||||
compute_rhs.for \
|
||||
verify.for \
|
||||
compute_errors.for \
|
||||
timers.for \
|
||||
print_result.for
|
||||
|
||||
SOURCES_SINGLE = z_solve.for x_solve.for y_solve.for
|
||||
SOURCES_MPI = x_solve_mpi.for y_solve_mpi.for z_solve_mpi.for
|
||||
|
||||
OBJS = ${SOURCES:.for=.o}
|
||||
OBJS_SINGLE = ${SOURCES_SINGLE:.for=.o}
|
||||
OBJS_MPI = ${SOURCES_MPI:.for=.o}
|
||||
|
||||
${PROGRAM}: config
|
||||
@if [ $(VERSION) = MPI ] ; then \
|
||||
${MAKE} MPI_VER; \
|
||||
else \
|
||||
${MAKE} SINGLE_VER; \
|
||||
fi
|
||||
|
||||
MPI_VER: $(OBJS) $(OBJS_MPI)
|
||||
${FLINK} -o ${PROGRAM} ${OBJS} $(OBJS_MPI)
|
||||
|
||||
SINGLE_VER: $(OBJS) $(OBJS_SINGLE)
|
||||
${FLINK} -o ${PROGRAM} ${OBJS} $(OBJS_SINGLE)
|
||||
|
||||
%.o: %.for npbparams.h header.h
|
||||
${F77} ${FFLAGS} -c -o $@ $<
|
||||
|
||||
clean:
|
||||
rm -f npbparams.h
|
||||
rm -f *.o *~
|
||||
rm -f *.cu *.cuf *.c *.f
|
||||
@@ -0,0 +1,12 @@
|
||||
@echo off
|
||||
|
||||
set CLASS=%1
|
||||
set OPT=%2
|
||||
|
||||
CALL ..\sys\setparams SP %CLASS%
|
||||
CALL %F77% %OPT% sp 1>out_%CLASS%.txt 2>err_%CLASS%.txt
|
||||
if exist sp.exe (
|
||||
copy sp.exe %BIN%\sp.%CLASS%.x.exe
|
||||
del sp.exe
|
||||
)
|
||||
|
||||
@@ -0,0 +1,116 @@
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
subroutine error_norm(rms)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c this function computes the norm of the difference between the
|
||||
c computed solution and the exact solution
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
include 'header.h'
|
||||
|
||||
integer i, j, k, m, d
|
||||
double precision xi, eta, zeta, u_exact(5), rms(5), add
|
||||
double precision r1,r2,r3,r4,r5
|
||||
do m = 1, 5
|
||||
rms(m) = 0.0d0
|
||||
enddo
|
||||
r1 = 0.0d0
|
||||
r2 = 0.0d0
|
||||
r3 = 0.0d0
|
||||
r4 = 0.0d0
|
||||
r5 = 0.0d0
|
||||
!DVM$ region
|
||||
!DVM$ parallel (k,j,i) on u(*,i,j,k),private(zeta,eta,xi,add,u_exact,m)
|
||||
!DVM$& ,reduction(SUM(r1),SUM(r2),SUM(r3),SUM(r4),SUM(r5))
|
||||
! DVM$& ,shadow_renew(u, rhs)
|
||||
do k = 0, problem_size-1
|
||||
do j = 0, problem_size-1
|
||||
do i = 0, problem_size-1
|
||||
zeta = dble(k) * dnzm1
|
||||
eta = dble(j) * dnym1
|
||||
xi = dble(i) * dnxm1
|
||||
do m = 1, 5
|
||||
u_exact(m) = ce(m,1) +
|
||||
> xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) + xi*ce(m,11)))) +
|
||||
> eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9) + eta*ce(m,12))))+
|
||||
> zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) +
|
||||
> zeta*ce(m,13))))
|
||||
end do
|
||||
|
||||
add = u(1,i,j,k)-u_exact(1)
|
||||
r1 = r1 + add*add
|
||||
add = u(2,i,j,k)-u_exact(2)
|
||||
r2 = r2 + add*add
|
||||
add = u(3,i,j,k)-u_exact(3)
|
||||
r3 = r3 + add*add
|
||||
add = u(4,i,j,k)-u_exact(4)
|
||||
r4 = r4 + add*add
|
||||
add = u(5,i,j,k)-u_exact(5)
|
||||
r5 = r5 + add*add
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
!DVM$ end region
|
||||
|
||||
rms(1) = r1
|
||||
rms(2) = r2
|
||||
rms(3) = r3
|
||||
rms(4) = r4
|
||||
rms(5) = r5
|
||||
do m = 1, 5
|
||||
do d = 1, 3
|
||||
rms(m) = rms(m) / dble(grid_points(d)-2)
|
||||
end do
|
||||
rms(m) = dsqrt(rms(m))
|
||||
end do
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
|
||||
|
||||
subroutine rhs_norm(rms)
|
||||
|
||||
include 'header.h'
|
||||
|
||||
integer i, j, k, d, m
|
||||
double precision rms(5), add
|
||||
|
||||
do m = 1, 5
|
||||
rms(m) = 0.0d0
|
||||
enddo
|
||||
|
||||
!DVM$ region
|
||||
!DVM$ parallel (k,j,i) on u(*,i,j,k),private(add)
|
||||
!DVM$& ,reduction(SUM(rms))
|
||||
do k = 1, nz2
|
||||
do j = 1, ny2
|
||||
do i = 1, nx2
|
||||
add = rhs(1,i,j,k)
|
||||
rms(1) = rms(1) + add*add
|
||||
add = rhs(2,i,j,k)
|
||||
rms(2) = rms(2) + add*add
|
||||
add = rhs(3,i,j,k)
|
||||
rms(3) = rms(3) + add*add
|
||||
add = rhs(4,i,j,k)
|
||||
rms(4) = rms(4) + add*add
|
||||
add = rhs(5,i,j,k)
|
||||
rms(5) = rms(5) + add*add
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
!DVM$ end region
|
||||
do m = 1, 5
|
||||
do d = 1, 3
|
||||
rms(m) = rms(m) / dble(grid_points(d)-2)
|
||||
end do
|
||||
rms(m) = dsqrt(rms(m))
|
||||
end do
|
||||
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,339 @@
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
subroutine compute_rhs(aditional_comp)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
include 'header.h'
|
||||
|
||||
integer i, j, k, m
|
||||
double precision aux, rho_inv, uijk, up1, um1, vijk, vp1, vm1,
|
||||
> wijk, wp1, wm1,rhs_(5)
|
||||
double precision t1, t2, t3, ac, ru1, uu, vv, ww,ac2inv
|
||||
integer aditional_comp
|
||||
|
||||
if (timeron) call timer_start(t_rhs)
|
||||
|
||||
!DVM$ region out(us,vs,ws,qs,rho_i,speed,square)
|
||||
|
||||
!DVM$ parallel (k,j,i) on u(*,i,j,k),private(rho_inv,aux,m)
|
||||
!DVM$& ,shadow_renew(u(0:0,2:3,2:3,2:3)),SHADOW_COMPUTE
|
||||
do k = 0, problem_size-1
|
||||
do j = 0, problem_size-1
|
||||
do i = 0, problem_size-1
|
||||
rho_inv = 1.0d0/u(1,i,j,k)
|
||||
rho_i(i,j,k) = rho_inv
|
||||
us(i,j,k) = u(2,i,j,k) * rho_inv
|
||||
vs(i,j,k) = u(3,i,j,k) * rho_inv
|
||||
ws(i,j,k) = u(4,i,j,k) * rho_inv
|
||||
square(i,j,k) = 0.5d0* (
|
||||
> u(2,i,j,k)*u(2,i,j,k) +
|
||||
> u(3,i,j,k)*u(3,i,j,k) +
|
||||
> u(4,i,j,k)*u(4,i,j,k) ) * rho_inv
|
||||
qs(i,j,k) = square(i,j,k) * rho_inv
|
||||
c---------------------------------------------------------------------
|
||||
c (don't need speed and ainx until the lhs computation)
|
||||
c---------------------------------------------------------------------
|
||||
aux = c1c2*rho_inv* (u(5,i,j,k) - square(i,j,k))
|
||||
speed(i,j,k) = dsqrt(aux)
|
||||
do m = 1, 5
|
||||
rhs(m,i,j,k) = forcing(m,i,j,k)
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
|
||||
!DVM$ parallel (k,j,i) on rhs(*,i,j,k),private(uijk,up1,um1,m
|
||||
!DVM$& ,vijk,vp1,vm1,wijk,wp1,wm1,rhs_,
|
||||
!DVM$& t1, t2, t3, ac, ru1, uu, vv, ww,ac2inv), CUDA_BLOCK(32,4)
|
||||
do k = 1, nz2
|
||||
do j = 1, ny2
|
||||
do i = 1, nx2
|
||||
uijk = us(i,j,k)
|
||||
up1 = us(i+1,j,k)
|
||||
um1 = us(i-1,j,k)
|
||||
|
||||
rhs_(1) = rhs(1,i,j,k)
|
||||
rhs_(2) = rhs(2,i,j,k)
|
||||
rhs_(3) = rhs(3,i,j,k)
|
||||
rhs_(4) = rhs(4,i,j,k)
|
||||
rhs_(5) = rhs(5,i,j,k)
|
||||
|
||||
rhs_(1) = rhs_(1) + dx1tx1 *
|
||||
> (u(1,i+1,j,k) - 2.0d0*u(1,i,j,k) +
|
||||
> u(1,i-1,j,k)) -
|
||||
> tx2 * (u(2,i+1,j,k) - u(2,i-1,j,k))
|
||||
|
||||
rhs_(2) = rhs_(2) + dx2tx1 *
|
||||
> (u(2,i+1,j,k) - 2.0d0*u(2,i,j,k) +
|
||||
> u(2,i-1,j,k)) +
|
||||
> xxcon2*con43 * (up1 - 2.0d0*uijk + um1) -
|
||||
> tx2 * (u(2,i+1,j,k)*up1 -
|
||||
> u(2,i-1,j,k)*um1 +
|
||||
> (u(5,i+1,j,k)- square(i+1,j,k)-
|
||||
> u(5,i-1,j,k)+ square(i-1,j,k))*
|
||||
> c2)
|
||||
|
||||
rhs_(3) = rhs_(3) + dx3tx1 *
|
||||
> (u(3,i+1,j,k) - 2.0d0*u(3,i,j,k) +
|
||||
> u(3,i-1,j,k)) +
|
||||
> xxcon2 * (vs(i+1,j,k) - 2.0d0*vs(i,j,k) +
|
||||
> vs(i-1,j,k)) -
|
||||
> tx2 * (u(3,i+1,j,k)*up1 -
|
||||
> u(3,i-1,j,k)*um1)
|
||||
|
||||
rhs_(4) = rhs_(4) + dx4tx1 *
|
||||
> (u(4,i+1,j,k) - 2.0d0*u(4,i,j,k) +
|
||||
> u(4,i-1,j,k)) +
|
||||
> xxcon2 * (ws(i+1,j,k) - 2.0d0*ws(i,j,k) +
|
||||
> ws(i-1,j,k)) -
|
||||
> tx2 * (u(4,i+1,j,k)*up1 -
|
||||
> u(4,i-1,j,k)*um1)
|
||||
|
||||
rhs_(5) = rhs_(5) + dx5tx1 *
|
||||
> (u(5,i+1,j,k) - 2.0d0*u(5,i,j,k) +
|
||||
> u(5,i-1,j,k)) +
|
||||
> xxcon3 * (qs(i+1,j,k) - 2.0d0*qs(i,j,k) +
|
||||
> qs(i-1,j,k)) +
|
||||
> xxcon4 * (up1*up1 - 2.0d0*uijk*uijk +
|
||||
> um1*um1) +
|
||||
> xxcon5 * (u(5,i+1,j,k)*rho_i(i+1,j,k) -
|
||||
> 2.0d0*u(5,i,j,k)*rho_i(i,j,k) +
|
||||
> u(5,i-1,j,k)*rho_i(i-1,j,k)) -
|
||||
> tx2 * ( (c1*u(5,i+1,j,k) -
|
||||
> c2*square(i+1,j,k))*up1 -
|
||||
> (c1*u(5,i-1,j,k) -
|
||||
> c2*square(i-1,j,k))*um1 )
|
||||
|
||||
if(i .eq. 1) then
|
||||
do m = 1, 5
|
||||
rhs_(m) = rhs_(m)- dssp *
|
||||
> ( 5.0d0*u(m,i,j,k) - 4.0d0*u(m,i+1,j,k) +
|
||||
> u(m,i+2,j,k))
|
||||
end do
|
||||
elseif(i .eq. 2) then
|
||||
do m = 1, 5
|
||||
rhs_(m) = rhs_(m) - dssp *
|
||||
> (-4.0d0*u(m,i-1,j,k) + 6.0d0*u(m,i,j,k) -
|
||||
> 4.0d0*u(m,i+1,j,k) + u(m,i+2,j,k))
|
||||
end do
|
||||
elseif(i .ge. 3 .and. i .le. nx2-2) then
|
||||
do m = 1, 5
|
||||
rhs_(m) = rhs_(m) - dssp *
|
||||
> ( u(m,i-2,j,k) - 4.0d0*u(m,i-1,j,k) +
|
||||
> 6.0*u(m,i,j,k) - 4.0d0*u(m,i+1,j,k) +
|
||||
> u(m,i+2,j,k) )
|
||||
end do
|
||||
elseif(i .eq. nx2-1) then
|
||||
do m = 1, 5
|
||||
rhs_(m) = rhs_(m) - dssp *
|
||||
> ( u(m,i-2,j,k) - 4.0d0*u(m,i-1,j,k) +
|
||||
> 6.0d0*u(m,i,j,k) - 4.0d0*u(m,i+1,j,k) )
|
||||
end do
|
||||
elseif( i .eq. nx2) then
|
||||
do m = 1, 5
|
||||
rhs_(m) = rhs_(m) - dssp *
|
||||
> ( u(m,i-2,j,k) - 4.d0*u(m,i-1,j,k) +
|
||||
> 5.d0*u(m,i,j,k) )
|
||||
end do
|
||||
endif
|
||||
|
||||
vijk = vs(i,j,k)
|
||||
vp1 = vs(i,j+1,k)
|
||||
vm1 = vs(i,j-1,k)
|
||||
rhs_(1) = rhs_(1) + dy1ty1 *
|
||||
> (u(1,i,j+1,k) - 2.0d0*u(1,i,j,k) +
|
||||
> u(1,i,j-1,k)) -
|
||||
> ty2 * (u(3,i,j+1,k) - u(3,i,j-1,k))
|
||||
rhs_(2) = rhs_(2) + dy2ty1 *
|
||||
> (u(2,i,j+1,k) - 2.0d0*u(2,i,j,k) +
|
||||
> u(2,i,j-1,k)) +
|
||||
> yycon2 * (us(i,j+1,k) - 2.0d0*us(i,j,k) +
|
||||
> us(i,j-1,k)) -
|
||||
> ty2 * (u(2,i,j+1,k)*vp1 -
|
||||
> u(2,i,j-1,k)*vm1)
|
||||
rhs_(3) = rhs_(3) + dy3ty1 *
|
||||
> (u(3,i,j+1,k) - 2.0d0*u(3,i,j,k) +
|
||||
> u(3,i,j-1,k)) +
|
||||
> yycon2*con43 * (vp1 - 2.0d0*vijk + vm1) -
|
||||
> ty2 * (u(3,i,j+1,k)*vp1 -
|
||||
> u(3,i,j-1,k)*vm1 +
|
||||
> (u(5,i,j+1,k) - square(i,j+1,k) -
|
||||
> u(5,i,j-1,k) + square(i,j-1,k))
|
||||
> *c2)
|
||||
rhs_(4) = rhs_(4) + dy4ty1 *
|
||||
> (u(4,i,j+1,k) - 2.0d0*u(4,i,j,k) +
|
||||
> u(4,i,j-1,k)) +
|
||||
> yycon2 * (ws(i,j+1,k) - 2.0d0*ws(i,j,k) +
|
||||
> ws(i,j-1,k)) -
|
||||
> ty2 * (u(4,i,j+1,k)*vp1 -
|
||||
> u(4,i,j-1,k)*vm1)
|
||||
rhs_(5) = rhs_(5) + dy5ty1 *
|
||||
> (u(5,i,j+1,k) - 2.0d0*u(5,i,j,k) +
|
||||
> u(5,i,j-1,k)) +
|
||||
> yycon3 * (qs(i,j+1,k) - 2.0d0*qs(i,j,k) +
|
||||
> qs(i,j-1,k)) +
|
||||
> yycon4 * (vp1*vp1 - 2.0d0*vijk*vijk +
|
||||
> vm1*vm1) +
|
||||
> yycon5 * (u(5,i,j+1,k)*rho_i(i,j+1,k) -
|
||||
> 2.0d0*u(5,i,j,k)*rho_i(i,j,k) +
|
||||
> u(5,i,j-1,k)*rho_i(i,j-1,k)) -
|
||||
> ty2 * ((c1*u(5,i,j+1,k) -
|
||||
> c2*square(i,j+1,k)) * vp1 -
|
||||
> (c1*u(5,i,j-1,k) -
|
||||
> c2*square(i,j-1,k)) * vm1)
|
||||
|
||||
if(j .eq. 1) then
|
||||
do m = 1, 5
|
||||
rhs_(m) = rhs_(m)- dssp *
|
||||
> ( 5.0d0*u(m,i,j,k) - 4.0d0*u(m,i,j+1,k) +
|
||||
> u(m,i,j+2,k))
|
||||
end do
|
||||
elseif(j .eq. 2) then
|
||||
do m = 1, 5
|
||||
rhs_(m) = rhs_(m) - dssp *
|
||||
> (-4.0d0*u(m,i,j-1,k) + 6.0d0*u(m,i,j,k) -
|
||||
> 4.0d0*u(m,i,j+1,k) + u(m,i,j+2,k))
|
||||
end do
|
||||
elseif(j .ge. 3 .and. j .le. ny2-2) then
|
||||
do m = 1, 5
|
||||
rhs_(m) = rhs_(m) - dssp *
|
||||
> ( u(m,i,j-2,k) - 4.0d0*u(m,i,j-1,k) +
|
||||
> 6.0*u(m,i,j,k) - 4.0d0*u(m,i,j+1,k) +
|
||||
> u(m,i,j+2,k) )
|
||||
end do
|
||||
elseif(j .eq. ny2-1) then
|
||||
do m = 1, 5
|
||||
rhs_(m) = rhs_(m) - dssp *
|
||||
> ( u(m,i,j-2,k) - 4.0d0*u(m,i,j-1,k) +
|
||||
> 6.0d0*u(m,i,j,k) - 4.0d0*u(m,i,j+1,k) )
|
||||
end do
|
||||
elseif(j .eq. ny2) then
|
||||
do m = 1, 5
|
||||
rhs_(m) = rhs_(m) - dssp *
|
||||
> ( u(m,i,j-2,k) - 4.d0*u(m,i,j-1,k) +
|
||||
> 5.d0*u(m,i,j,k) )
|
||||
end do
|
||||
endif
|
||||
|
||||
wijk = ws(i,j,k)
|
||||
wp1 = ws(i,j,k+1)
|
||||
wm1 = ws(i,j,k-1)
|
||||
|
||||
rhs_(1) = rhs_(1) + dz1tz1 *
|
||||
> (u(1,i,j,k+1) - 2.0d0*u(1,i,j,k) +
|
||||
> u(1,i,j,k-1)) -
|
||||
> tz2 * (u(4,i,j,k+1) - u(4,i,j,k-1))
|
||||
rhs_(2) = rhs_(2) + dz2tz1 *
|
||||
> (u(2,i,j,k+1) - 2.0d0*u(2,i,j,k) +
|
||||
> u(2,i,j,k-1)) +
|
||||
> zzcon2 * (us(i,j,k+1) - 2.0d0*us(i,j,k) +
|
||||
> us(i,j,k-1)) -
|
||||
> tz2 * (u(2,i,j,k+1)*wp1 -
|
||||
> u(2,i,j,k-1)*wm1)
|
||||
rhs_(3) = rhs_(3) + dz3tz1 *
|
||||
> (u(3,i,j,k+1) - 2.0d0*u(3,i,j,k) +
|
||||
> u(3,i,j,k-1)) +
|
||||
> zzcon2 * (vs(i,j,k+1) - 2.0d0*vs(i,j,k) +
|
||||
> vs(i,j,k-1)) -
|
||||
> tz2 * (u(3,i,j,k+1)*wp1 -
|
||||
> u(3,i,j,k-1)*wm1)
|
||||
rhs_(4) = rhs_(4) + dz4tz1 *
|
||||
> (u(4,i,j,k+1) - 2.0d0*u(4,i,j,k) +
|
||||
> u(4,i,j,k-1)) +
|
||||
> zzcon2*con43 * (wp1 - 2.0d0*wijk + wm1) -
|
||||
> tz2 * (u(4,i,j,k+1)*wp1 -
|
||||
> u(4,i,j,k-1)*wm1 +
|
||||
> (u(5,i,j,k+1) - square(i,j,k+1) -
|
||||
> u(5,i,j,k-1) + square(i,j,k-1))
|
||||
> *c2)
|
||||
rhs_(5) = rhs_(5) + dz5tz1 *
|
||||
> (u(5,i,j,k+1) - 2.0d0*u(5,i,j,k) +
|
||||
> u(5,i,j,k-1)) +
|
||||
> zzcon3 * (qs(i,j,k+1) - 2.0d0*qs(i,j,k) +
|
||||
> qs(i,j,k-1)) +
|
||||
> zzcon4 * (wp1*wp1 - 2.0d0*wijk*wijk +
|
||||
> wm1*wm1) +
|
||||
> zzcon5 * (u(5,i,j,k+1)*rho_i(i,j,k+1) -
|
||||
> 2.0d0*u(5,i,j,k)*rho_i(i,j,k) +
|
||||
> u(5,i,j,k-1)*rho_i(i,j,k-1)) -
|
||||
> tz2 * ( (c1*u(5,i,j,k+1) -
|
||||
> c2*square(i,j,k+1))*wp1 -
|
||||
> (c1*u(5,i,j,k-1) -
|
||||
> c2*square(i,j,k-1))*wm1)
|
||||
|
||||
if(k .eq. 1) then
|
||||
do m = 1, 5
|
||||
rhs_(m) = rhs_(m)- dssp *
|
||||
> ( 5.0d0*u(m,i,j,k) - 4.0d0*u(m,i,j,k+1) +
|
||||
> u(m,i,j,k+2))
|
||||
end do
|
||||
elseif(k .eq. 2) then
|
||||
do m = 1, 5
|
||||
rhs_(m) = rhs_(m) - dssp *
|
||||
> (-4.0d0*u(m,i,j,k-1) + 6.0d0*u(m,i,j,k) -
|
||||
> 4.0d0*u(m,i,j,k+1) + u(m,i,j,k+2))
|
||||
end do
|
||||
elseif(k .ge. 3 .and. k .le. nz2-2) then
|
||||
do m = 1, 5
|
||||
rhs_(m) = rhs_(m) - dssp *
|
||||
> ( u(m,i,j,k-2) - 4.0d0*u(m,i,j,k-1) +
|
||||
> 6.0*u(m,i,j,k) - 4.0d0*u(m,i,j,k+1) +
|
||||
> u(m,i,j,k+2) )
|
||||
end do
|
||||
elseif(k .eq. nz2-1) then
|
||||
do m = 1, 5
|
||||
rhs_(m) = rhs_(m) - dssp *
|
||||
> ( u(m,i,j,k-2) - 4.0d0*u(m,i,j,k-1) +
|
||||
> 6.0d0*u(m,i,j,k) - 4.0d0*u(m,i,j,k+1) )
|
||||
end do
|
||||
else
|
||||
do m = 1, 5
|
||||
rhs_(m) = rhs_(m) - dssp *
|
||||
> ( u(m,i,j,k-2) - 4.d0*u(m,i,j,k-1) +
|
||||
> 5.d0*u(m,i,j,k) )
|
||||
end do
|
||||
endif
|
||||
|
||||
rhs_(1) = rhs_(1) * dt
|
||||
rhs_(2) = rhs_(2) * dt
|
||||
rhs_(3) = rhs_(3) * dt
|
||||
rhs_(4) = rhs_(4) * dt
|
||||
rhs_(5) = rhs_(5) * dt
|
||||
|
||||
rhs(1,i,j,k) = rhs_(1)
|
||||
rhs(2,i,j,k) = rhs_(2)
|
||||
rhs(3,i,j,k) = rhs_(3)
|
||||
rhs(4,i,j,k) = rhs_(4)
|
||||
rhs(5,i,j,k) = rhs_(5)
|
||||
|
||||
if(aditional_comp .eq. 1) then
|
||||
ru1 = rho_i(i,j,k)
|
||||
uu = us(i,j,k)
|
||||
vv = vs(i,j,k)
|
||||
ww = ws(i,j,k)
|
||||
ac = speed(i,j,k)
|
||||
ac2inv = ac*ac
|
||||
|
||||
t1 = c2 / ac2inv * ( qs(i,j,k)*rhs_(1)-uu*rhs_(2)-
|
||||
> vv*rhs_(3)- ww*rhs_(4) + rhs_(5) )
|
||||
t2 = bt * ru1 * ( uu * rhs_(1) - rhs_(2) )
|
||||
t3 = ( bt * ru1 * ac ) * t1
|
||||
|
||||
rhs(1,i,j,k) = rhs_(1) - t1
|
||||
rhs(2,i,j,k) = - ru1 * ( ww*rhs_(1) - rhs_(4))
|
||||
rhs(3,i,j,k) = ru1 * ( vv*rhs_(1) - rhs_(3))
|
||||
rhs(4,i,j,k) = - t2 + t3
|
||||
rhs(5,i,j,k) = t2 + t3
|
||||
endif
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
|
||||
!DVM$ end region
|
||||
if (timeron) call timer_stop(t_rhs)
|
||||
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,307 @@
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
subroutine exact_rhs
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
|
||||
include 'header.h'
|
||||
|
||||
double precision dtemp(5), xi, eta, zeta, dtpp
|
||||
integer m, i, j, k, ip1, im1, jp1, p, p1,
|
||||
> jm1, km1, kp1,z
|
||||
double precision ue_(-2:2, 5), buf_(-2:2, 5),cuf_(-2:2),q_(-2:2)
|
||||
|
||||
|
||||
!DVM$ region
|
||||
!DVM$ parallel (k,j,i) on forcing(*,i,j,k),private(m)
|
||||
do k= 0, problem_size-1
|
||||
do j = 0, problem_size-1
|
||||
do i = 0, problem_size-1
|
||||
do m = 1, 5
|
||||
forcing(m,i,j,k) = 0.0d0
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c xi-direction flux differences
|
||||
c---------------------------------------------------------------------
|
||||
!DVM$ parallel (k,j,i) on forcing(*,i,j,k),private(zeta,eta,xi,m,dtemp
|
||||
!DVM$& ,buf_,cuf_,q_,dtpp,z,ue_)
|
||||
do k = 1, problem_size-2
|
||||
do j = 1, problem_size-2
|
||||
do i = 1, problem_size-2
|
||||
zeta = dble(k) * dnzm1
|
||||
eta = dble(j) * dnym1
|
||||
do z = -2, 2
|
||||
xi = dble(i + z) * dnxm1
|
||||
do m = 1, 5
|
||||
dtemp(m) = ce(m,1) +
|
||||
> xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) + xi*ce(m,11)))) +
|
||||
> eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9) + eta*ce(m,12))))+
|
||||
> zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) +
|
||||
> zeta*ce(m,13))))
|
||||
end do
|
||||
do m = 1, 5
|
||||
ue_(z,m) = dtemp(m)
|
||||
end do
|
||||
dtpp = 1.0d0 / dtemp(1)
|
||||
do m = 2, 5
|
||||
buf_(z, m) = dtpp * dtemp(m)
|
||||
end do
|
||||
|
||||
cuf_(z) = buf_(z,2) * buf_(z,2)
|
||||
buf_(z,1) = cuf_(z) + buf_(z,3) * buf_(z,3) +
|
||||
> buf_(z,4) * buf_(z,4)
|
||||
q_(z) = 0.5d0*(buf_(z,2)*ue_(z,2) + buf_(z,3)*
|
||||
> ue_(z,3) + buf_(z,4)*ue_(z,4))
|
||||
enddo
|
||||
|
||||
forcing(1,i,j,k) = forcing(1,i,j,k) -
|
||||
> tx2*( ue_(1,2)-ue_(-1,2) )+
|
||||
> dx1tx1*(ue_(1,1)-2.0d0*ue_(0,1)+ue_(-1,1))
|
||||
|
||||
forcing(2,i,j,k) = forcing(2,i,j,k) - tx2 * (
|
||||
> (ue_(1,2)*buf_(1,2)+c2*(ue_(1,5)-q_(1)))-
|
||||
> (ue_(-1,2)*buf_(-1,2)+c2*(ue_(-1,5)-q_(-1))))+
|
||||
> xxcon1*(buf_(1,2)-2.0d0*buf_(0,2)+buf_(-1,2))+
|
||||
> dx2tx1*( ue_(1,2)-2.0d0* ue_(0,2)+ue_(-1,2))
|
||||
|
||||
forcing(3,i,j,k) = forcing(3,i,j,k) - tx2 * (
|
||||
> ue_(1,3)*buf_(1,2)-ue_(-1,3)*buf_(-1,2))+
|
||||
> xxcon2*(buf_(1,3)-2.0d0*buf_(0,3)+buf_(-1,3))+
|
||||
> dx3tx1*( ue_(1,3)-2.0d0*ue_(0,3) +ue_(-1,3))
|
||||
|
||||
forcing(4,i,j,k) = forcing(4,i,j,k) - tx2*(
|
||||
> ue_(1,4)*buf_(1,2)-ue_(-1,4)*buf_(-1,2))+
|
||||
> xxcon2*(buf_(1,4)-2.0d0*buf_(0,4)+buf_(-1,4))+
|
||||
> dx4tx1*( ue_(1,4)-2.0d0* ue_(0,4)+ ue_(-1,4))
|
||||
|
||||
forcing(5,i,j,k) = forcing(5,i,j,k) - tx2*(
|
||||
> buf_(1,2)*(c1*ue_(1,5)-c2*q_(1))-
|
||||
> buf_(-1,2)*(c1*ue_(-1,5)-c2*q_(-1)))+
|
||||
> 0.5d0*xxcon3*(buf_(1,1)-2.0d0*buf_(0,1)+
|
||||
> buf_(-1,1))+
|
||||
> xxcon4*(cuf_(1)-2.0d0*cuf_(0)+cuf_(-1))+
|
||||
> xxcon5*(buf_(1,5)-2.0d0*buf_(0,5)+buf_(-1,5))+
|
||||
> dx5tx1*( ue_(1,5)-2.0d0* ue_(0,5)+ ue_(-1,5))
|
||||
do m = 1, 5
|
||||
if(i .eq. 1) then
|
||||
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
|
||||
> (5.0d0*ue_(0,m) - 4.0d0*ue_(1,m) +ue_(2,m))
|
||||
else if(i .eq. 2) then
|
||||
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
|
||||
> (-4.0d0*ue_(-1,m) + 6.0d0*ue_(0,m) -
|
||||
> 4.0d0*ue_(1,m) + ue_(2,m))
|
||||
else if(i .eq. problem_size-3) then
|
||||
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
|
||||
> (ue_(-2,m) - 4.0d0*ue_(-1,m) +
|
||||
> 6.0d0*ue_(0,m) - 4.0d0*ue_(1,m))
|
||||
else if(i .eq. problem_size-2) then
|
||||
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
|
||||
> (ue_(-2,m) - 4.0d0*ue_(-1,m) + 5.0d0*ue_(0,m))
|
||||
else
|
||||
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp*
|
||||
> (ue_(-2,m) - 4.0d0*ue_(-1,m) +
|
||||
> 6.0d0*ue_(0,m) - 4.0d0*ue_(1,m) + ue_(2,m))
|
||||
endif
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c eta-direction flux differences
|
||||
c---------------------------------------------------------------------
|
||||
!DVM$ parallel (k,j,i) on forcing(*,i,j,k),private(zeta,eta,xi,m,dtemp
|
||||
!DVM$& ,buf_,cuf_,q_,dtpp,z,ue_)
|
||||
do k = 1, problem_size- 2
|
||||
do j = 1, problem_size-2
|
||||
do i = 1, problem_size- 2
|
||||
zeta = dble(k) * dnzm1
|
||||
xi = dble(i) * dnxm1
|
||||
do z = -2, 2
|
||||
eta = dble(j + z) * dnym1
|
||||
do m = 1, 5
|
||||
dtemp(m) = ce(m,1) +
|
||||
> xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) + xi*ce(m,11)))) +
|
||||
> eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9) + eta*ce(m,12))))+
|
||||
> zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) +
|
||||
> zeta*ce(m,13))))
|
||||
end do
|
||||
do m = 1, 5
|
||||
ue_(z,m) = dtemp(m)
|
||||
end do
|
||||
dtpp = 1.0d0 / dtemp(1)
|
||||
do m = 2, 5
|
||||
buf_(z, m) = dtpp * dtemp(m)
|
||||
end do
|
||||
|
||||
cuf_(z) = buf_(z,3) * buf_(z,3)
|
||||
buf_(z,1) = cuf_(z) + buf_(z,2) * buf_(z,2) +
|
||||
> buf_(z,4) * buf_(z,4)
|
||||
q_(z) = 0.5d0*(buf_(z,2)*ue_(z,2) + buf_(z,3)
|
||||
> *ue_(z,3) + buf_(z,4) * ue_(z,4))
|
||||
enddo
|
||||
|
||||
forcing(1,i,j,k) = forcing(1,i,j,k) -
|
||||
> ty2*( ue_(1,3)-ue_(-1,3) )+
|
||||
> dy1ty1*(ue_(1,1)-2.0d0*ue_(0,1)+ue_(-1,1))
|
||||
|
||||
forcing(2,i,j,k) = forcing(2,i,j,k) - ty2*(
|
||||
> ue_(1,2)*buf_(1,3)-ue_(-1,2)*buf_(-1,3))+
|
||||
> yycon2*(buf_(1,2)-2.0d0*buf_(0,2)+buf_(-1,2))+
|
||||
> dy2ty1*( ue_(1,2)-2.0* ue_(0,2)+ ue_(-1,2))
|
||||
|
||||
forcing(3,i,j,k) = forcing(3,i,j,k) - ty2*(
|
||||
> (ue_(1,3)*buf_(1,3)+c2*(ue_(1,5)-q_(1)))-
|
||||
> (ue_(-1,3)*buf_(-1,3)+c2*(ue_(-1,5)-q_(-1))))+
|
||||
> yycon1*(buf_(1,3)-2.0d0*buf_(0,3)+buf_(-1,3))+
|
||||
> dy3ty1*( ue_(1,3)-2.0d0*ue_(0,3) +ue_(-1,3))
|
||||
|
||||
forcing(4,i,j,k) = forcing(4,i,j,k) - ty2*(
|
||||
> ue_(1,4)*buf_(1,3)-ue_(-1,4)*buf_(-1,3))+
|
||||
> yycon2*(buf_(1,4)-2.0d0*buf_(0,4)+buf_(-1,4))+
|
||||
> dy4ty1*( ue_(1,4)-2.0d0*ue_(0,4)+ ue_(-1,4))
|
||||
|
||||
forcing(5,i,j,k) = forcing(5,i,j,k) - ty2*(
|
||||
> buf_(1,3)*(c1*ue_(1,5)-c2*q_(1))-
|
||||
> buf_(-1,3)*(c1*ue_(-1,5)-c2*q_(-1)))+
|
||||
> 0.5d0*yycon3*(buf_(1,1)-2.0d0*buf_(0,1)+
|
||||
> buf_(-1,1))+
|
||||
> yycon4*(cuf_(1)-2.0d0*cuf_(0)+cuf_(-1))+
|
||||
> yycon5*(buf_(1,5)-2.0d0*buf_(0,5)+buf_(-1,5))+
|
||||
> dy5ty1*(ue_(1,5)-2.0d0*ue_(0,5)+ue_(-1,5))
|
||||
do m = 1, 5
|
||||
if(j .eq. 1) then
|
||||
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
|
||||
> (5.0d0*ue_(0,m) - 4.0d0*ue_(1,m) +ue_(2,m))
|
||||
else if(j .eq. 2) then
|
||||
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
|
||||
> (-4.0d0*ue_(-1,m) + 6.0d0*ue_(0,m) -
|
||||
> 4.0d0*ue_(1,m) + ue_(2,m))
|
||||
else if(j .eq. problem_size-3) then
|
||||
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
|
||||
> (ue_(-2,m) - 4.0d0*ue_(-1,m) +
|
||||
> 6.0d0*ue_(0,m) - 4.0d0*ue_(1,m))
|
||||
else if(j .eq. problem_size-2) then
|
||||
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
|
||||
> (ue_(-2,m) - 4.0d0*ue_(-1,m) + 5.0d0*ue_(0,m))
|
||||
else
|
||||
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp*
|
||||
> (ue_(-2,m) - 4.0d0*ue_(-1,m) +
|
||||
> 6.0d0*ue_(0,m) - 4.0d0*ue_(1,m) + ue_(2,m))
|
||||
endif
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c zeta-direction flux differences
|
||||
c---------------------------------------------------------------------
|
||||
!DVM$ parallel (k,j,i) on forcing(*,i,j,k),private(zeta,eta,xi,m
|
||||
!DVM$& ,buf_,cuf_,q_,ue_,dtpp,dtemp,z)
|
||||
do k = 1, problem_size-2
|
||||
do j = 1, problem_size-2
|
||||
do i = 1, problem_size-2
|
||||
xi = dble(i) * dnxm1
|
||||
eta = dble(j) * dnym1
|
||||
do z = -2, 2
|
||||
zeta = dble(k + z) * dnzm1
|
||||
do m = 1, 5
|
||||
dtemp(m) = ce(m,1) +
|
||||
> xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) + xi*ce(m,11)))) +
|
||||
> eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9) + eta*ce(m,12))))+
|
||||
> zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) +
|
||||
> zeta*ce(m,13))))
|
||||
end do
|
||||
do m = 1, 5
|
||||
ue_(z,m) = dtemp(m)
|
||||
end do
|
||||
dtpp = 1.0d0 / dtemp(1)
|
||||
do m = 2, 5
|
||||
buf_(z, m) = dtpp * dtemp(m)
|
||||
end do
|
||||
|
||||
cuf_(z) = buf_(z,4) * buf_(z,4)
|
||||
buf_(z,1) = cuf_(z) + buf_(z,2) * buf_(z,2) +
|
||||
> buf_(z,3) * buf_(z,3)
|
||||
q_(z) = 0.5d0*(buf_(z,2)*ue_(z,2) + buf_(z,3)*
|
||||
> ue_(z,3) + buf_(z,4)*ue_(z,4))
|
||||
enddo
|
||||
|
||||
forcing(1,i,j,k) = forcing(1,i,j,k) -
|
||||
> tz2*( ue_(1,4)-ue_(-1,4) )+
|
||||
> dz1tz1*(ue_(1,1)-2.0d0*ue_(0,1)+ue_(-1,1))
|
||||
|
||||
forcing(2,i,j,k) = forcing(2,i,j,k) - tz2 * (
|
||||
> ue_(1,2)*buf_(1,4)-ue_(-1,2)*buf_(-1,4))+
|
||||
> zzcon2*(buf_(1,2)-2.0d0*buf_(0,2)+buf_(-1,2))+
|
||||
> dz2tz1*( ue_(1,2)-2.0d0* ue_(0,2)+ ue_(-1,2))
|
||||
|
||||
forcing(3,i,j,k) = forcing(3,i,j,k) - tz2 * (
|
||||
> ue_(1,3)*buf_(1,4)-ue_(-1,3)*buf_(-1,4))+
|
||||
> zzcon2*(buf_(1,3)-2.0d0*buf_(0,3)+buf_(-1,3))+
|
||||
> dz3tz1*(ue_(1,3)-2.0d0*ue_(0,3)+ue_(-1,3))
|
||||
|
||||
forcing(4,i,j,k) = forcing(4,i,j,k) - tz2 * (
|
||||
> (ue_(1,4)*buf_(1,4)+c2*(ue_(1,5)-q_(1)))-
|
||||
> (ue_(-1,4)*buf_(-1,4)+c2*(ue_(-1,5)-q_(-1))))+
|
||||
> zzcon1*(buf_(1,4)-2.0d0*buf_(0,4)+buf_(-1,4))+
|
||||
> dz4tz1*( ue_(1,4)-2.0d0*ue_(0,4) +ue_(-1,4))
|
||||
|
||||
forcing(5,i,j,k) = forcing(5,i,j,k) - tz2 * (
|
||||
> buf_(1,4)*(c1*ue_(1,5)-c2*q_(1))-
|
||||
> buf_(-1,4)*(c1*ue_(-1,5)-c2*q_(-1)))+
|
||||
> 0.5d0*zzcon3*(buf_(1,1)-2.0d0*buf_(0,1)
|
||||
> +buf_(-1,1))+
|
||||
> zzcon4*(cuf_(1)-2.0d0*cuf_(0)+cuf_(-1))+
|
||||
> zzcon5*(buf_(1,5)-2.0d0*buf_(0,5)+buf_(-1,5))+
|
||||
> dz5tz1*( ue_(1,5)-2.0d0*ue_(0,5)+ ue_(-1,5))
|
||||
do m = 1, 5
|
||||
if(k .eq. 1) then
|
||||
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
|
||||
> (5.0d0*ue_(0,m) - 4.0d0*ue_(1,m) +ue_(2,m))
|
||||
else if(k .eq. 2) then
|
||||
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
|
||||
> (-4.0d0*ue_(-1,m) + 6.0d0*ue_(0,m) -
|
||||
> 4.0d0*ue_(1,m) + ue_(2,m))
|
||||
else if(k .eq. problem_size-3) then
|
||||
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
|
||||
> (ue_(-2,m) - 4.0d0*ue_(-1,m) +
|
||||
> 6.0d0*ue_(0,m) - 4.0d0*ue_(1,m))
|
||||
else if(k .eq. problem_size-2) then
|
||||
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
|
||||
> (ue_(-2,m) - 4.0d0*ue_(-1,m) + 5.0d0*ue_(0,m))
|
||||
else
|
||||
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp*
|
||||
> (ue_(-2,m) - 4.0d0*ue_(-1,m) +
|
||||
> 6.0d0*ue_(0,m) - 4.0d0*ue_(1,m) + ue_(2,m))
|
||||
endif
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c now change the sign of the forcing function,
|
||||
c---------------------------------------------------------------------
|
||||
!DVM$ parallel (k,j,i) on forcing(*,i,j,k),private(m)
|
||||
do k = 1, problem_size-2
|
||||
do j = 1, problem_size-2
|
||||
do i = 1, problem_size-2
|
||||
do m = 1, 5
|
||||
forcing(m,i,j,k) = -1.d0 * forcing(m,i,j,k)
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
!DVM$ end region
|
||||
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,120 @@
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
implicit none
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c The following include file is generated automatically by the
|
||||
c "setparams" utility. It defines
|
||||
c problem_size: 12, 64, 102, 162 (for class T, A, B, C)
|
||||
c dt_default: default time step for this problem size if no
|
||||
c config file
|
||||
c niter_default: default number of iterations for this problem size
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
include 'npbparams.h'
|
||||
|
||||
integer grid_points(3), nx2, ny2, nz2,stage_n
|
||||
common /global/ grid_points, nx2, ny2, nz2, timeron
|
||||
|
||||
double precision tx1, tx2, tx3, ty1, ty2, ty3, tz1, tz2, tz3,
|
||||
& dx1, dx2, dx3, dx4, dx5, dy1, dy2, dy3, dy4,
|
||||
& dy5, dz1, dz2, dz3, dz4, dz5, dssp, dt,
|
||||
& ce(5,13), dxmax, dymax, dzmax, xxcon1, xxcon2,
|
||||
& xxcon3, xxcon4, xxcon5, dx1tx1, dx2tx1, dx3tx1,
|
||||
& dx4tx1, dx5tx1, yycon1, yycon2, yycon3, yycon4,
|
||||
& yycon5, dy1ty1, dy2ty1, dy3ty1, dy4ty1, dy5ty1,
|
||||
& zzcon1, zzcon2, zzcon3, zzcon4, zzcon5, dz1tz1,
|
||||
& dz2tz1, dz3tz1, dz4tz1, dz5tz1, dnxm1, dnym1,
|
||||
& dnzm1, c1c2, c1c5, c3c4, c1345, conz1, c1, c2,
|
||||
& c3, c4, c5, c4dssp, c5dssp, dtdssp, dttx1, bt,
|
||||
& dttx2, dtty1, dtty2, dttz1, dttz2, c2dttx1,
|
||||
& c2dtty1, c2dttz1, comz1, comz4, comz5, comz6,
|
||||
& c3c4tx3, c3c4ty3, c3c4tz3, c2iv, con43, con16
|
||||
|
||||
common /constants/ tx1, tx2, tx3, ty1, ty2, ty3, tz1, tz2, tz3,
|
||||
& dx1, dx2, dx3, dx4, dx5, dy1, dy2, dy3, dy4,
|
||||
& dy5, dz1, dz2, dz3, dz4, dz5, dssp, dt,
|
||||
& ce, dxmax, dymax, dzmax, xxcon1, xxcon2,
|
||||
& xxcon3, xxcon4, xxcon5, dx1tx1, dx2tx1, dx3tx1,
|
||||
& dx4tx1, dx5tx1, yycon1, yycon2, yycon3, yycon4,
|
||||
& yycon5, dy1ty1, dy2ty1, dy3ty1, dy4ty1, dy5ty1,
|
||||
& zzcon1, zzcon2, zzcon3, zzcon4, zzcon5, dz1tz1,
|
||||
& dz2tz1, dz3tz1, dz4tz1, dz5tz1, dnxm1, dnym1,
|
||||
& dnzm1, c1c2, c1c5, c3c4, c1345, conz1, c1, c2,
|
||||
& c3, c4, c5, c4dssp, c5dssp, dtdssp, dttx1, bt,
|
||||
& dttx2, dtty1, dtty2, dttz1, dttz2, c2dttx1,
|
||||
& c2dtty1, c2dttz1, comz1, comz4, comz5, comz6,
|
||||
& c3c4tx3, c3c4ty3, c3c4tz3, c2iv, con43, con16,
|
||||
& stage_n
|
||||
|
||||
|
||||
integer IMAX, JMAX, KMAX, IMAXP, JMAXP
|
||||
|
||||
parameter (IMAX=problem_size,JMAX=problem_size,KMAX=problem_size)
|
||||
parameter (IMAXP=IMAX/2*2,JMAXP=JMAX/2*2)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c To improve cache performance, first two dimensions padded by 1
|
||||
c for even number sizes only
|
||||
c---------------------------------------------------------------------
|
||||
double precision
|
||||
& u (5, 0:IMAXP, 0:JMAXP, 0:KMAX),
|
||||
& us ( 0:IMAXP, 0:JMAXP, 0:KMAX),
|
||||
& vs ( 0:IMAXP, 0:JMAXP, 0:KMAX),
|
||||
& ws ( 0:IMAXP, 0:JMAXP, 0:KMAX),
|
||||
& qs ( 0:IMAXP, 0:JMAXP, 0:KMAX),
|
||||
& rho_i ( 0:IMAXP, 0:JMAXP, 0:KMAX),
|
||||
& speed ( 0:IMAXP, 0:JMAXP, 0:KMAX),
|
||||
& square ( 0:IMAXP, 0:JMAXP, 0:KMAX),
|
||||
& rhs (5, 0:IMAXP, 0:JMAXP, 0:KMAX),
|
||||
& forcing (5, 0:IMAXP, 0:JMAXP, 0:KMAX)
|
||||
|
||||
common /fields/ u, us, vs, ws, qs, rho_i, speed, square,
|
||||
& rhs, forcing
|
||||
|
||||
double precision cv(0:problem_size-1), rhon(0:problem_size-1),
|
||||
& rhos(0:problem_size-1), rhoq(0:problem_size-1),
|
||||
& cuf(0:problem_size-1), q(0:problem_size-1),
|
||||
& ue(0:problem_size-1,5), buf(0:problem_size-1,5),
|
||||
& rhon_(0:problem_size-1,0:problem_size-1),
|
||||
& cv_(0:problem_size-1,0:problem_size-1)
|
||||
common /work_1d/ cv,rhon,rhos,rhoq, cuf, q, ue, buf,rhon_,cv_
|
||||
|
||||
double precision
|
||||
& lhs(0:2,1:5,0:IMAXP, 0:JMAXP, 0:KMAX)
|
||||
common /work_lhs/ lhs
|
||||
|
||||
c-----------------------------------------------------------------------
|
||||
c Timer constants
|
||||
c-----------------------------------------------------------------------
|
||||
integer t_rhsx,t_rhsy,t_rhsz,t_xsolve,t_ysolve,t_zsolve,
|
||||
& t_rdis1,t_rdis2,t_tzetar,t_ninvr,t_pinvr,t_add,
|
||||
& t_rhs,t_txinvr,t_last,t_total
|
||||
logical timeron
|
||||
parameter (t_total = 1)
|
||||
parameter (t_rhsx = 2)
|
||||
parameter (t_rhsy = 3)
|
||||
parameter (t_rhsz = 4)
|
||||
parameter (t_rhs = 5)
|
||||
parameter (t_xsolve = 6)
|
||||
parameter (t_ysolve = 7)
|
||||
parameter (t_zsolve = 8)
|
||||
parameter (t_rdis1 = 9)
|
||||
parameter (t_rdis2 = 10)
|
||||
parameter (t_txinvr = 11)
|
||||
parameter (t_pinvr = 12)
|
||||
parameter (t_ninvr = 13)
|
||||
parameter (t_tzetar = 14)
|
||||
parameter (t_add = 15)
|
||||
parameter (t_last = 15)
|
||||
|
||||
!DVM$ SHADOW lhs(0:0,0:0,2:2,2:2,2:2)
|
||||
!DVM$ SHADOW (0:0,2:3,2:3,2:3) :: rhs,forcing,u
|
||||
!DVM$ SHADOW (2:3,2:3,2:3) :: qs,us,ws,vs,speed,square,rho_i
|
||||
|
||||
!DVM$ DISTRIBUTE u(*,BLOCK,BLOCK,BLOCK)
|
||||
!DVM$ ALIGN (*,i,j,k) WITH u(*,i,j,k) :: forcing,rhs
|
||||
!DVM$ ALIGN (*,*,i,j,k) WITH u(*,i,j,k) :: lhs
|
||||
!DVM$ ALIGN (i,j,k) WITH u(*,i,j,k) :: square,speed,rho_i,qs,ws,vs,us
|
||||
@@ -0,0 +1,189 @@
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
subroutine initialize
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c This subroutine initializes the field variable u using
|
||||
c tri-linear transfinite interpolation of the boundary values
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
include 'header.h'
|
||||
|
||||
integer i, j, k, m, ix, iy, iz
|
||||
double precision xi, eta, zeta, Pface(5,3,2), Pxi, Peta,
|
||||
> Pzeta, temp(5)
|
||||
|
||||
!DVM$ region
|
||||
!DVM$ parallel (k,j,i) on u(*,i,j,k), private(zeta, eta, xi, ix, pxi, m,
|
||||
!DVM$& pface, iy, peta, iz, pzeta, temp)
|
||||
do k = 0, problem_size-1
|
||||
do j = 0, problem_size-1
|
||||
do i = 0, problem_size-1
|
||||
u(1,i,j,k) = 1.0
|
||||
u(2,i,j,k) = 0.0
|
||||
u(3,i,j,k) = 0.0
|
||||
u(4,i,j,k) = 0.0
|
||||
u(5,i,j,k) = 1.0
|
||||
|
||||
zeta = dble(k) * dnzm1
|
||||
eta = dble(j) * dnym1
|
||||
xi = dble(i) * dnxm1
|
||||
|
||||
do ix = 1, 2
|
||||
Pxi = dble(ix-1)
|
||||
|
||||
do m = 1, 5
|
||||
Pface(m,1,ix) = ce(m,1) +
|
||||
> Pxi*(ce(m,2)+Pxi*(ce(m,5) +Pxi*(ce(m,8) +
|
||||
> Pxi*ce(m,11))))+eta*(ce(m,3) + eta*(ce(m,6)
|
||||
> + eta*(ce(m,9) + eta*ce(m,12))))+
|
||||
> zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) +
|
||||
> zeta*ce(m,13))))
|
||||
end do
|
||||
end do
|
||||
|
||||
do iy = 1, 2
|
||||
Peta = dble(iy-1)
|
||||
do m = 1, 5
|
||||
Pface(m,2,iy) = ce(m,1) +
|
||||
> xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) +
|
||||
> xi*ce(m,11)))) +
|
||||
> Peta*(ce(m,3) +Peta*(ce(m,6) +Peta*(ce(m,9)+
|
||||
> Peta*ce(m,12))))+
|
||||
> zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) +
|
||||
> zeta*ce(m,13))))
|
||||
end do
|
||||
end do
|
||||
|
||||
do iz = 1, 2
|
||||
Pzeta = dble(iz-1)
|
||||
do m = 1, 5
|
||||
Pface(m,3,iz) = ce(m,1) +
|
||||
> xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) +
|
||||
> xi*ce(m,11)))) +
|
||||
> eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9)+
|
||||
> eta*ce(m,12))))+
|
||||
> Pzeta*(ce(m,4) + Pzeta*(ce(m,7) + Pzeta*(ce(m,10) +
|
||||
> Pzeta*ce(m,13))))
|
||||
end do
|
||||
end do
|
||||
|
||||
do m = 1, 5
|
||||
Pxi = xi * Pface(m,1,2) +
|
||||
> (1.0d0-xi) * Pface(m,1,1)
|
||||
Peta = eta * Pface(m,2,2) +
|
||||
> (1.0d0-eta) * Pface(m,2,1)
|
||||
Pzeta = zeta * Pface(m,3,2) +
|
||||
> (1.0d0-zeta) * Pface(m,3,1)
|
||||
|
||||
u(m,i,j,k) = Pxi + Peta + Pzeta -
|
||||
> Pxi*Peta - Pxi*Pzeta - Peta*Pzeta +
|
||||
> Pxi*Peta*Pzeta
|
||||
end do
|
||||
|
||||
zeta = dble(k) * dnzm1
|
||||
eta = dble(j) * dnym1
|
||||
xi = 0.0d0
|
||||
if( i .eq. 0) then
|
||||
! call exact_solution(xi, eta, zeta, temp)
|
||||
do m = 1, 5
|
||||
temp(m) = ce(m,1) +
|
||||
> xi*(ce(m,2)+xi*(ce(m,5)+xi*(ce(m,8)+xi*ce(m,11))))+
|
||||
> eta*(ce(m,3)+eta*(ce(m,6)+eta*(ce(m,9)+eta*ce(m,12))))+
|
||||
> zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) +
|
||||
> zeta*ce(m,13))))
|
||||
end do
|
||||
do m = 1, 5
|
||||
u(m,i,j,k) = temp(m)
|
||||
end do
|
||||
endif
|
||||
|
||||
xi = 1.0d0
|
||||
if( i .eq. problem_size-1) then
|
||||
! call exact_solution(xi, eta, zeta, temp)
|
||||
do m = 1, 5
|
||||
temp(m) = ce(m,1) +
|
||||
> xi*(ce(m,2)+xi*(ce(m,5)+xi*(ce(m,8)+xi*ce(m,11))))+
|
||||
> eta*(ce(m,3)+eta*(ce(m,6)+eta*(ce(m,9)+eta*ce(m,12))))+
|
||||
> zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) +
|
||||
> zeta*ce(m,13))))
|
||||
end do
|
||||
do m = 1, 5
|
||||
u(m,i,j,k) = temp(m)
|
||||
end do
|
||||
endif
|
||||
|
||||
zeta = dble(k) * dnzm1
|
||||
eta = 0.0d0
|
||||
xi = dble(i) * dnxm1
|
||||
if( j .eq. 0) then
|
||||
! call exact_solution(xi, eta, zeta, temp)
|
||||
do m = 1, 5
|
||||
temp(m) = ce(m,1) +
|
||||
> xi*(ce(m,2)+xi*(ce(m,5)+xi*(ce(m,8)+xi*ce(m,11))))+
|
||||
> eta*(ce(m,3)+eta*(ce(m,6)+eta*(ce(m,9)+eta*ce(m,12))))
|
||||
> +zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) +
|
||||
> zeta*ce(m,13))))
|
||||
end do
|
||||
do m = 1, 5
|
||||
u(m,i,j,k) = temp(m)
|
||||
end do
|
||||
endif
|
||||
|
||||
eta = 1.0d0
|
||||
if( j .eq. problem_size-1) then
|
||||
! call exact_solution(xi, eta, zeta, temp)
|
||||
do m = 1, 5
|
||||
temp(m) = ce(m,1) +
|
||||
> xi*(ce(m,2)+xi*(ce(m,5)+xi*(ce(m,8)+xi*ce(m,11))))+
|
||||
> eta*(ce(m,3)+eta*(ce(m,6)+eta*(ce(m,9)+eta*ce(m,12))))
|
||||
> +zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) +
|
||||
> zeta*ce(m,13))))
|
||||
end do
|
||||
do m = 1, 5
|
||||
u(m,i,j,k) = temp(m)
|
||||
end do
|
||||
endif
|
||||
|
||||
zeta = 0.0d0
|
||||
eta = dble(j) * dnym1
|
||||
xi = dble(i) *dnxm1
|
||||
if( k .eq. 0) then
|
||||
! call exact_solution(xi, eta, zeta, temp)
|
||||
do m = 1, 5
|
||||
temp(m) = ce(m,1) +
|
||||
> xi*(ce(m,2)+xi*(ce(m,5)+xi*(ce(m,8)+xi*ce(m,11))))+
|
||||
> eta*(ce(m,3)+eta*(ce(m,6)+eta*(ce(m,9)+eta*ce(m,12))))
|
||||
> +zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) +
|
||||
> zeta*ce(m,13))))
|
||||
end do
|
||||
do m = 1, 5
|
||||
u(m,i,j,k) = temp(m)
|
||||
end do
|
||||
endif
|
||||
|
||||
zeta = 1.0d0
|
||||
if( k .eq. problem_size-1) then
|
||||
! call exact_solution(xi, eta, zeta, temp)
|
||||
do m = 1, 5
|
||||
temp(m) = ce(m,1) +
|
||||
> xi*(ce(m,2)+xi*(ce(m,5)+xi*(ce(m,8)+xi*ce(m,11))))+
|
||||
> eta*(ce(m,3)+eta*(ce(m,6)+eta*(ce(m,9)+eta*ce(m,12))))
|
||||
> +zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) +
|
||||
> zeta*ce(m,13))))
|
||||
end do
|
||||
do m = 1, 5
|
||||
u(m,i,j,k) = temp(m)
|
||||
end do
|
||||
endif
|
||||
end do
|
||||
end do
|
||||
end do
|
||||
!DVM$ end region
|
||||
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,121 @@
|
||||
subroutine print_results(name, class, n1, n2, n3, niter,
|
||||
> t, mops, optype, verified, npbversion,
|
||||
> compiletime, cs1, cs2, cs3, cs4, cs5, cs6, cs7)
|
||||
|
||||
implicit none
|
||||
character name*(*)
|
||||
character class*1
|
||||
integer n1, n2, n3, niter, j
|
||||
double precision t, mops
|
||||
character optype*24, size*15
|
||||
logical verified
|
||||
character*(*) npbversion, compiletime,
|
||||
> cs1, cs2, cs3, cs4, cs5, cs6, cs7
|
||||
integer num_threads, max_threads, i
|
||||
max_threads = 1
|
||||
num_threads = 1
|
||||
|
||||
write (*, 2) name
|
||||
2 format(//, ' ', A, ' Benchmark Completed.')
|
||||
|
||||
write (*, 3) Class
|
||||
3 format(' Class = ', 12x, a12)
|
||||
|
||||
c If this is not a grid-based problem (EP, FT, CG), then
|
||||
c we only print n1, which contains some measure of the
|
||||
c problem size. In that case, n2 and n3 are both zero.
|
||||
c Otherwise, we print the grid size n1xn2xn3
|
||||
|
||||
if ((n2 .eq. 0) .and. (n3 .eq. 0)) then
|
||||
if (name(1:2) .eq. 'EP') then
|
||||
write(size, '(f15.0)' ) 2.d0**n1
|
||||
j = 15
|
||||
if (size(j:j) .eq. '.') j = j - 1
|
||||
write (*,42) size(1:j)
|
||||
42 format(' Size = ',9x, a15)
|
||||
else
|
||||
write (*,44) n1
|
||||
44 format(' Size = ',12x, i12)
|
||||
endif
|
||||
else
|
||||
write (*, 4) n1,n2,n3
|
||||
4 format(' Size = ',9x, i4,'x',i4,'x',i4)
|
||||
endif
|
||||
|
||||
write (*, 5) niter
|
||||
5 format(' Iterations = ', 12x, i12)
|
||||
|
||||
write (*, 6) t
|
||||
6 format(' Time in seconds = ',12x, f12.2)
|
||||
|
||||
write (*,7) num_threads
|
||||
7 format(' Total threads = ', 12x, i12)
|
||||
|
||||
write (*,8) max_threads
|
||||
8 format(' Avail threads = ', 12x, i12)
|
||||
|
||||
if (num_threads .ne. max_threads) write (*,88)
|
||||
88 format(' Warning: Threads used differ from threads available')
|
||||
|
||||
write (*,9) mops
|
||||
9 format(' Mop/s total = ',12x, f12.2)
|
||||
|
||||
write (*,10) mops/float( num_threads )
|
||||
10 format(' Mop/s/thread = ', 12x, f12.2)
|
||||
|
||||
write(*, 11) optype
|
||||
11 format(' Operation type = ', a24)
|
||||
|
||||
if (verified) then
|
||||
write(*,12) ' SUCCESSFUL'
|
||||
else
|
||||
write(*,12) 'UNSUCCESSFUL'
|
||||
endif
|
||||
12 format(' Verification = ', 12x, a)
|
||||
|
||||
write(*,13) npbversion
|
||||
13 format(' Version = ', 12x, a12)
|
||||
|
||||
write(*,14) compiletime
|
||||
14 format(' Compile date = ', 12x, a12)
|
||||
|
||||
|
||||
write (*,121) cs1
|
||||
121 format(/, ' Compile options:', /,
|
||||
> ' F77 = ', A)
|
||||
|
||||
write (*,122) cs2
|
||||
122 format(' FLINK = ', A)
|
||||
|
||||
write (*,123) cs3
|
||||
123 format(' F_LIB = ', A)
|
||||
|
||||
write (*,124) cs4
|
||||
124 format(' F_INC = ', A)
|
||||
|
||||
write (*,125) cs5
|
||||
125 format(' FFLAGS = ', A)
|
||||
|
||||
write (*,126) cs6
|
||||
126 format(' FLINKFLAGS = ', A)
|
||||
|
||||
write(*, 127) cs7
|
||||
127 format(' RAND = ', A)
|
||||
|
||||
write (*,130)
|
||||
130 format(//' Please send all errors/feedbacks to:'//
|
||||
> ' NPB Development Team'/
|
||||
> ' npb@nas.nasa.gov'//)
|
||||
c 130 format(//' Please send the results of this run to:'//
|
||||
c > ' NPB Development Team '/
|
||||
c > ' Internet: npb@nas.nasa.gov'/
|
||||
c > ' '/
|
||||
c > ' If email is not available, send this to:'//
|
||||
c > ' MS T27A-1'/
|
||||
c > ' NASA Ames Research Center'/
|
||||
c > ' Moffett Field, CA 94035-1000'//
|
||||
c > ' Fax: 650-604-3957'//)
|
||||
|
||||
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,202 @@
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
subroutine set_constants
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
include 'header.h'
|
||||
|
||||
ce(1,1) = 2.0d0
|
||||
ce(1,2) = 0.0d0
|
||||
ce(1,3) = 0.0d0
|
||||
ce(1,4) = 4.0d0
|
||||
ce(1,5) = 5.0d0
|
||||
ce(1,6) = 3.0d0
|
||||
ce(1,7) = 0.5d0
|
||||
ce(1,8) = 0.02d0
|
||||
ce(1,9) = 0.01d0
|
||||
ce(1,10) = 0.03d0
|
||||
ce(1,11) = 0.5d0
|
||||
ce(1,12) = 0.4d0
|
||||
ce(1,13) = 0.3d0
|
||||
|
||||
ce(2,1) = 1.0d0
|
||||
ce(2,2) = 0.0d0
|
||||
ce(2,3) = 0.0d0
|
||||
ce(2,4) = 0.0d0
|
||||
ce(2,5) = 1.0d0
|
||||
ce(2,6) = 2.0d0
|
||||
ce(2,7) = 3.0d0
|
||||
ce(2,8) = 0.01d0
|
||||
ce(2,9) = 0.03d0
|
||||
ce(2,10) = 0.02d0
|
||||
ce(2,11) = 0.4d0
|
||||
ce(2,12) = 0.3d0
|
||||
ce(2,13) = 0.5d0
|
||||
|
||||
ce(3,1) = 2.0d0
|
||||
ce(3,2) = 2.0d0
|
||||
ce(3,3) = 0.0d0
|
||||
ce(3,4) = 0.0d0
|
||||
ce(3,5) = 0.0d0
|
||||
ce(3,6) = 2.0d0
|
||||
ce(3,7) = 3.0d0
|
||||
ce(3,8) = 0.04d0
|
||||
ce(3,9) = 0.03d0
|
||||
ce(3,10) = 0.05d0
|
||||
ce(3,11) = 0.3d0
|
||||
ce(3,12) = 0.5d0
|
||||
ce(3,13) = 0.4d0
|
||||
|
||||
ce(4,1) = 2.0d0
|
||||
ce(4,2) = 2.0d0
|
||||
ce(4,3) = 0.0d0
|
||||
ce(4,4) = 0.0d0
|
||||
ce(4,5) = 0.0d0
|
||||
ce(4,6) = 2.0d0
|
||||
ce(4,7) = 3.0d0
|
||||
ce(4,8) = 0.03d0
|
||||
ce(4,9) = 0.05d0
|
||||
ce(4,10) = 0.04d0
|
||||
ce(4,11) = 0.2d0
|
||||
ce(4,12) = 0.1d0
|
||||
ce(4,13) = 0.3d0
|
||||
|
||||
ce(5,1) = 5.0d0
|
||||
ce(5,2) = 4.0d0
|
||||
ce(5,3) = 3.0d0
|
||||
ce(5,4) = 2.0d0
|
||||
ce(5,5) = 0.1d0
|
||||
ce(5,6) = 0.4d0
|
||||
ce(5,7) = 0.3d0
|
||||
ce(5,8) = 0.05d0
|
||||
ce(5,9) = 0.04d0
|
||||
ce(5,10) = 0.03d0
|
||||
ce(5,11) = 0.1d0
|
||||
ce(5,12) = 0.3d0
|
||||
ce(5,13) = 0.2d0
|
||||
|
||||
c1 = 1.4d0
|
||||
c2 = 0.4d0
|
||||
c3 = 0.1d0
|
||||
c4 = 1.0d0
|
||||
c5 = 1.4d0
|
||||
|
||||
bt = dsqrt(0.5d0)
|
||||
|
||||
dnxm1 = 1.0d0 / dble(problem_size-1)
|
||||
dnym1 = 1.0d0 / dble(problem_size-1)
|
||||
dnzm1 = 1.0d0 / dble(problem_size-1)
|
||||
|
||||
c1c2 = c1 * c2
|
||||
c1c5 = c1 * c5
|
||||
c3c4 = c3 * c4
|
||||
c1345 = c1c5 * c3c4
|
||||
|
||||
conz1 = (1.0d0-c1c5)
|
||||
|
||||
tx1 = 1.0d0 / (dnxm1 * dnxm1)
|
||||
tx2 = 1.0d0 / (2.0d0 * dnxm1)
|
||||
tx3 = 1.0d0 / dnxm1
|
||||
|
||||
ty1 = 1.0d0 / (dnym1 * dnym1)
|
||||
ty2 = 1.0d0 / (2.0d0 * dnym1)
|
||||
ty3 = 1.0d0 / dnym1
|
||||
|
||||
tz1 = 1.0d0 / (dnzm1 * dnzm1)
|
||||
tz2 = 1.0d0 / (2.0d0 * dnzm1)
|
||||
tz3 = 1.0d0 / dnzm1
|
||||
|
||||
dx1 = 0.75d0
|
||||
dx2 = 0.75d0
|
||||
dx3 = 0.75d0
|
||||
dx4 = 0.75d0
|
||||
dx5 = 0.75d0
|
||||
|
||||
dy1 = 0.75d0
|
||||
dy2 = 0.75d0
|
||||
dy3 = 0.75d0
|
||||
dy4 = 0.75d0
|
||||
dy5 = 0.75d0
|
||||
|
||||
dz1 = 1.0d0
|
||||
dz2 = 1.0d0
|
||||
dz3 = 1.0d0
|
||||
dz4 = 1.0d0
|
||||
dz5 = 1.0d0
|
||||
|
||||
dxmax = dmax1(dx3, dx4)
|
||||
dymax = dmax1(dy2, dy4)
|
||||
dzmax = dmax1(dz2, dz3)
|
||||
|
||||
dssp = 0.25d0 * dmax1(dx1, dmax1(dy1, dz1) )
|
||||
|
||||
c4dssp = 4.0d0 * dssp
|
||||
c5dssp = 5.0d0 * dssp
|
||||
|
||||
dttx1 = dt*tx1
|
||||
dttx2 = dt*tx2
|
||||
dtty1 = dt*ty1
|
||||
dtty2 = dt*ty2
|
||||
dttz1 = dt*tz1
|
||||
dttz2 = dt*tz2
|
||||
|
||||
c2dttx1 = 2.0d0*dttx1
|
||||
c2dtty1 = 2.0d0*dtty1
|
||||
c2dttz1 = 2.0d0*dttz1
|
||||
|
||||
dtdssp = dt*dssp
|
||||
|
||||
comz1 = dtdssp
|
||||
comz4 = 4.0d0*dtdssp
|
||||
comz5 = 5.0d0*dtdssp
|
||||
comz6 = 6.0d0*dtdssp
|
||||
|
||||
c3c4tx3 = c3c4*tx3
|
||||
c3c4ty3 = c3c4*ty3
|
||||
c3c4tz3 = c3c4*tz3
|
||||
|
||||
dx1tx1 = dx1*tx1
|
||||
dx2tx1 = dx2*tx1
|
||||
dx3tx1 = dx3*tx1
|
||||
dx4tx1 = dx4*tx1
|
||||
dx5tx1 = dx5*tx1
|
||||
|
||||
dy1ty1 = dy1*ty1
|
||||
dy2ty1 = dy2*ty1
|
||||
dy3ty1 = dy3*ty1
|
||||
dy4ty1 = dy4*ty1
|
||||
dy5ty1 = dy5*ty1
|
||||
|
||||
dz1tz1 = dz1*tz1
|
||||
dz2tz1 = dz2*tz1
|
||||
dz3tz1 = dz3*tz1
|
||||
dz4tz1 = dz4*tz1
|
||||
dz5tz1 = dz5*tz1
|
||||
|
||||
c2iv = 2.5d0
|
||||
con43 = 4.0d0/3.0d0
|
||||
con16 = 1.0d0/6.0d0
|
||||
|
||||
xxcon1 = c3c4tx3*con43*tx3
|
||||
xxcon2 = c3c4tx3*tx3
|
||||
xxcon3 = c3c4tx3*conz1*tx3
|
||||
xxcon4 = c3c4tx3*con16*tx3
|
||||
xxcon5 = c3c4tx3*c1c5*tx3
|
||||
|
||||
yycon1 = c3c4ty3*con43*ty3
|
||||
yycon2 = c3c4ty3*ty3
|
||||
yycon3 = c3c4ty3*conz1*ty3
|
||||
yycon4 = c3c4ty3*con16*ty3
|
||||
yycon5 = c3c4ty3*c1c5*ty3
|
||||
|
||||
zzcon1 = c3c4tz3*con43*tz3
|
||||
zzcon2 = c3c4tz3*tz3
|
||||
zzcon3 = c3c4tz3*conz1*tz3
|
||||
zzcon4 = c3c4tz3*con16*tz3
|
||||
zzcon5 = c3c4tz3*c1c5*tz3
|
||||
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,231 @@
|
||||
!-------------------------------------------------------------------------!
|
||||
! !
|
||||
! N A S P A R A L L E L B E N C H M A R K S 3.3.1 !
|
||||
! !
|
||||
! D V M H V E R S I O N !
|
||||
! !
|
||||
! S P !
|
||||
! !
|
||||
!-------------------------------------------------------------------------!
|
||||
!-------------------------------------------------------------------------!
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c
|
||||
c Authors:
|
||||
c Original:
|
||||
c R. Van der Wijngaart
|
||||
c W. Saphir
|
||||
c H. Jin
|
||||
c Optimize for DVMH:
|
||||
c Kolganov A.S.
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
program SP
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
include 'header.h'
|
||||
|
||||
integer i, niter, step, fstatus, n3
|
||||
external timer_read
|
||||
double precision mflops, t, tmax, timer_read, trecs(t_last)
|
||||
logical verified
|
||||
character class
|
||||
character t_names(t_last)*8
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Read input file (if it exists), else take
|
||||
c defaults from parameters
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
open (unit=2,file='timer.flag',status='old', iostat=fstatus)
|
||||
if (fstatus .eq. 0) then
|
||||
timeron = .true.
|
||||
t_names(t_total) = 'total'
|
||||
t_names(t_rhsx) = 'rhsx'
|
||||
t_names(t_rhsy) = 'rhsy'
|
||||
t_names(t_rhsz) = 'rhsz'
|
||||
t_names(t_rhs) = 'rhs'
|
||||
t_names(t_xsolve) = 'xsolve'
|
||||
t_names(t_ysolve) = 'ysolve'
|
||||
t_names(t_zsolve) = 'zsolve'
|
||||
t_names(t_rdis1) = 'redist1'
|
||||
t_names(t_rdis2) = 'redist2'
|
||||
t_names(t_tzetar) = 'tzetar'
|
||||
t_names(t_ninvr) = 'ninvr'
|
||||
t_names(t_pinvr) = 'pinvr'
|
||||
t_names(t_txinvr) = 'txinvr'
|
||||
t_names(t_add) = 'add'
|
||||
close(2)
|
||||
else
|
||||
timeron = .false.
|
||||
endif
|
||||
|
||||
write(*, 1000)
|
||||
open (unit=2,file='inputsp.data',status='old', iostat=fstatus)
|
||||
|
||||
if (fstatus .eq. 0) then
|
||||
write(*,233)
|
||||
233 format(' Reading from input file inputsp.data')
|
||||
read (2,*) niter
|
||||
read (2,*) dt
|
||||
read (2,*) grid_points(1), grid_points(2), grid_points(3)
|
||||
close(2)
|
||||
else
|
||||
write(*,234)
|
||||
niter = niter_default
|
||||
dt = dt_default
|
||||
grid_points(1) = problem_size
|
||||
grid_points(2) = problem_size
|
||||
grid_points(3) = problem_size
|
||||
endif
|
||||
234 format(' No input file inputsp.data. Using compiled defaults')
|
||||
open (unit = 2,file = 'inputStage',status = 'old',iostat = fstat
|
||||
&us)
|
||||
if (fstatus .eq. 0) then
|
||||
read (unit = 2,fmt = *) stage_n
|
||||
close (unit = 2)
|
||||
else
|
||||
stage_n = 0
|
||||
endif
|
||||
write(*,*) 'stage = ', stage_n
|
||||
|
||||
write(*, 1001) problem_size, problem_size, problem_size
|
||||
write(*, 1002) niter, dt
|
||||
write(*, *)
|
||||
|
||||
1000 format(//, ' NAS Parallel Benchmarks (NPB3.3.1-DVMH)',
|
||||
> ' - SP Benchmark', /)
|
||||
1001 format(' Size: ', i4, 'x', i4, 'x', i4)
|
||||
1002 format(' Iterations: ', i4, ' dt: ', F11.7)
|
||||
1003 format(' Number of available threads: ', i5)
|
||||
|
||||
if ( (problem_size .gt. IMAX) .or.
|
||||
> (problem_size .gt. JMAX) .or.
|
||||
> (problem_size .gt. KMAX) ) then
|
||||
print *, (grid_points(i),i=1,3)
|
||||
print *,' Problem size too big for compiled array sizes'
|
||||
goto 999
|
||||
endif
|
||||
nx2 = problem_size - 2
|
||||
ny2 = problem_size - 2
|
||||
nz2 = problem_size - 2
|
||||
|
||||
call set_constants
|
||||
call exact_rhs
|
||||
|
||||
call initialize
|
||||
call adi_first
|
||||
call adi_first
|
||||
call initialize
|
||||
|
||||
do i = 1, t_last
|
||||
call timer_clear(i)
|
||||
end do
|
||||
call timer_start(1)
|
||||
!DVM$ BARRIER
|
||||
do step = 1, niter
|
||||
|
||||
if (mod(step, 20) .eq. 0 .or. step .eq. 1) then
|
||||
write(*, 200) step
|
||||
200 format(' Time step ', i4)
|
||||
endif
|
||||
|
||||
call adi
|
||||
|
||||
end do
|
||||
call timer_stop(1)
|
||||
tmax = timer_read(1)
|
||||
|
||||
call verify(niter, class, verified)
|
||||
|
||||
if( tmax .ne. 0. ) then
|
||||
n3 = problem_size*problem_size*problem_size
|
||||
t = (problem_size+problem_size+problem_size)/3.0
|
||||
mflops = (881.174 * float( n3 )
|
||||
> -4683.91 * t**2
|
||||
> +11484.5 * t
|
||||
> -19272.4) * float( niter ) / (tmax*1000000.0d0)
|
||||
else
|
||||
mflops = 0.0
|
||||
endif
|
||||
|
||||
call print_results('SP', class, problem_size,
|
||||
> problem_size, problem_size, niter,
|
||||
> tmax, mflops, ' floating point',
|
||||
> verified, npbversion,compiletime, cs1, cs2, cs3, cs4, cs5,
|
||||
> cs6, '(none)')
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c More timers
|
||||
c---------------------------------------------------------------------
|
||||
if (.not.timeron) goto 999
|
||||
|
||||
do i=1, t_last
|
||||
trecs(i) = timer_read(i)
|
||||
end do
|
||||
if (tmax .eq. 0.0) tmax = 1.0
|
||||
|
||||
write(*,800)
|
||||
800 format(' SECTION Time (secs)')
|
||||
|
||||
do i=1, t_last
|
||||
write(*,810) t_names(i), trecs(i), trecs(i)*100./tmax
|
||||
if (i.eq.t_rhs) then
|
||||
t = trecs(t_rhsx) + trecs(t_rhsy) + trecs(t_rhsz)
|
||||
write(*,820) 'sub-rhs', t, t*100./tmax
|
||||
t = trecs(t_rhs) - t
|
||||
write(*,820) 'rest-rhs', t, t*100./tmax
|
||||
elseif (i.eq.t_zsolve) then
|
||||
t = trecs(t_zsolve) - trecs(t_rdis1) - trecs(t_rdis2)
|
||||
write(*,820) 'sub-zsol', t, t*100./tmax
|
||||
elseif (i.eq.t_rdis2) then
|
||||
t = trecs(t_rdis1) + trecs(t_rdis2)
|
||||
write(*,820) 'redist', t, t*100./tmax
|
||||
endif
|
||||
810 format(2x,a8,':',f9.3,' (',f6.2,'%)')
|
||||
820 format(' --> ',a8,':',f9.3,' (',f6.2,'%)')
|
||||
end do
|
||||
|
||||
999 continue
|
||||
|
||||
end
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
subroutine adi_first
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
call compute_rhs(1)
|
||||
call x_solve
|
||||
call y_solve
|
||||
call z_solve
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
subroutine adi
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
!DVM$ interval 1
|
||||
call compute_rhs(1)
|
||||
!DVM$ end interval
|
||||
!DVM$ interval 12
|
||||
call x_solve
|
||||
!DVM$ end interval
|
||||
!DVM$ interval 13
|
||||
call y_solve
|
||||
!DVM$ end interval
|
||||
!DVM$ interval 14
|
||||
call z_solve
|
||||
!DVM$ end interval
|
||||
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,99 @@
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
subroutine timer_clear(n)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
implicit none
|
||||
integer n
|
||||
|
||||
double precision start(64), elapsed(64)
|
||||
common /tt/ start, elapsed
|
||||
|
||||
elapsed(n) = 0.0
|
||||
return
|
||||
end
|
||||
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
subroutine timer_start(n)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
implicit none
|
||||
external elapsed_time
|
||||
double precision elapsed_time
|
||||
integer n
|
||||
double precision start(64), elapsed(64)
|
||||
common /tt/ start, elapsed
|
||||
|
||||
start(n) = elapsed_time()
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
subroutine timer_stop(n)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
implicit none
|
||||
external elapsed_time
|
||||
double precision elapsed_time
|
||||
integer n
|
||||
double precision start(64), elapsed(64)
|
||||
common /tt/ start, elapsed
|
||||
|
||||
double precision t, now
|
||||
now = elapsed_time()
|
||||
t = now - start(n)
|
||||
elapsed(n) = elapsed(n) + t
|
||||
|
||||
return
|
||||
end
|
||||
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
double precision function timer_read(n)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
implicit none
|
||||
integer n
|
||||
double precision start(64), elapsed(64)
|
||||
common /tt/ start, elapsed
|
||||
|
||||
timer_read = elapsed(n)
|
||||
return
|
||||
end
|
||||
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
double precision function elapsed_time()
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
implicit none
|
||||
|
||||
double precision t,dvtime
|
||||
t = dvtime()
|
||||
elapsed_time = t
|
||||
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,356 @@
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
subroutine verify(no_time_steps, class, verified)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c verification routine
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
include 'header.h'
|
||||
|
||||
double precision xcrref(5),xceref(5),xcrdif(5),xcedif(5),
|
||||
> epsilon, xce(5), xcr(5), dtref
|
||||
integer m, no_time_steps
|
||||
character class
|
||||
logical verified
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c tolerance level
|
||||
c---------------------------------------------------------------------
|
||||
epsilon = 1.0d-08
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c compute the error norm and the residual norm, and exit if not printing
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
call error_norm(xce)
|
||||
call compute_rhs(0)
|
||||
call rhs_norm(xcr)
|
||||
|
||||
do m = 1, 5
|
||||
xcr(m) = xcr(m) / dt
|
||||
enddo
|
||||
|
||||
class = 'U'
|
||||
verified = .true.
|
||||
|
||||
do m = 1,5
|
||||
xcrref(m) = 1.0
|
||||
xceref(m) = 1.0
|
||||
end do
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c reference data for 12X12X12 grids after 100 time steps, with DT = 1.50d-02
|
||||
c---------------------------------------------------------------------
|
||||
if ( (problem_size .eq. 12 ) .and.
|
||||
> (problem_size .eq. 12 ) .and.
|
||||
> (problem_size .eq. 12 ) .and.
|
||||
> (no_time_steps .eq. 100 )) then
|
||||
|
||||
class = 'S'
|
||||
dtref = 1.5d-2
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Reference values of RMS-norms of residual.
|
||||
c---------------------------------------------------------------------
|
||||
xcrref(1) = 2.7470315451339479d-02
|
||||
xcrref(2) = 1.0360746705285417d-02
|
||||
xcrref(3) = 1.6235745065095532d-02
|
||||
xcrref(4) = 1.5840557224455615d-02
|
||||
xcrref(5) = 3.4849040609362460d-02
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Reference values of RMS-norms of solution error.
|
||||
c---------------------------------------------------------------------
|
||||
xceref(1) = 2.7289258557377227d-05
|
||||
xceref(2) = 1.0364446640837285d-05
|
||||
xceref(3) = 1.6154798287166471d-05
|
||||
xceref(4) = 1.5750704994480102d-05
|
||||
xceref(5) = 3.4177666183390531d-05
|
||||
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c reference data for 36X36X36 grids after 400 time steps, with DT = 1.5d-03
|
||||
c---------------------------------------------------------------------
|
||||
elseif ( (problem_size .eq. 36) .and.
|
||||
> (problem_size .eq. 36) .and.
|
||||
> (problem_size .eq. 36) .and.
|
||||
> (no_time_steps . eq. 400) ) then
|
||||
|
||||
class = 'W'
|
||||
dtref = 1.5d-3
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Reference values of RMS-norms of residual.
|
||||
c---------------------------------------------------------------------
|
||||
xcrref(1) = 0.1893253733584d-02
|
||||
xcrref(2) = 0.1717075447775d-03
|
||||
xcrref(3) = 0.2778153350936d-03
|
||||
xcrref(4) = 0.2887475409984d-03
|
||||
xcrref(5) = 0.3143611161242d-02
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Reference values of RMS-norms of solution error.
|
||||
c---------------------------------------------------------------------
|
||||
xceref(1) = 0.7542088599534d-04
|
||||
xceref(2) = 0.6512852253086d-05
|
||||
xceref(3) = 0.1049092285688d-04
|
||||
xceref(4) = 0.1128838671535d-04
|
||||
xceref(5) = 0.1212845639773d-03
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c reference data for 64X64X64 grids after 400 time steps, with DT = 1.5d-03
|
||||
c---------------------------------------------------------------------
|
||||
elseif ( (problem_size .eq. 64) .and.
|
||||
> (problem_size .eq. 64) .and.
|
||||
> (problem_size .eq. 64) .and.
|
||||
> (no_time_steps . eq. 400) ) then
|
||||
|
||||
class = 'A'
|
||||
dtref = 1.5d-3
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Reference values of RMS-norms of residual.
|
||||
c---------------------------------------------------------------------
|
||||
xcrref(1) = 2.4799822399300195d0
|
||||
xcrref(2) = 1.1276337964368832d0
|
||||
xcrref(3) = 1.5028977888770491d0
|
||||
xcrref(4) = 1.4217816211695179d0
|
||||
xcrref(5) = 2.1292113035138280d0
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Reference values of RMS-norms of solution error.
|
||||
c---------------------------------------------------------------------
|
||||
xceref(1) = 1.0900140297820550d-04
|
||||
xceref(2) = 3.7343951769282091d-05
|
||||
xceref(3) = 5.0092785406541633d-05
|
||||
xceref(4) = 4.7671093939528255d-05
|
||||
xceref(5) = 1.3621613399213001d-04
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c reference data for 102X102X102 grids after 400 time steps,
|
||||
c with DT = 1.0d-03
|
||||
c---------------------------------------------------------------------
|
||||
elseif ( (problem_size .eq. 102) .and.
|
||||
> (problem_size .eq. 102) .and.
|
||||
> (problem_size .eq. 102) .and.
|
||||
> (no_time_steps . eq. 400) ) then
|
||||
|
||||
class = 'B'
|
||||
dtref = 1.0d-3
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Reference values of RMS-norms of residual.
|
||||
c---------------------------------------------------------------------
|
||||
xcrref(1) = 0.6903293579998d+02
|
||||
xcrref(2) = 0.3095134488084d+02
|
||||
xcrref(3) = 0.4103336647017d+02
|
||||
xcrref(4) = 0.3864769009604d+02
|
||||
xcrref(5) = 0.5643482272596d+02
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Reference values of RMS-norms of solution error.
|
||||
c---------------------------------------------------------------------
|
||||
xceref(1) = 0.9810006190188d-02
|
||||
xceref(2) = 0.1022827905670d-02
|
||||
xceref(3) = 0.1720597911692d-02
|
||||
xceref(4) = 0.1694479428231d-02
|
||||
xceref(5) = 0.1847456263981d-01
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c reference data for 162X162X162 grids after 400 time steps,
|
||||
c with DT = 0.67d-03
|
||||
c---------------------------------------------------------------------
|
||||
elseif ( (problem_size .eq. 162) .and.
|
||||
> (problem_size .eq. 162) .and.
|
||||
> (problem_size .eq. 162) .and.
|
||||
> (no_time_steps . eq. 400) ) then
|
||||
|
||||
class = 'C'
|
||||
dtref = 0.67d-3
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Reference values of RMS-norms of residual.
|
||||
c---------------------------------------------------------------------
|
||||
xcrref(1) = 0.5881691581829d+03
|
||||
xcrref(2) = 0.2454417603569d+03
|
||||
xcrref(3) = 0.3293829191851d+03
|
||||
xcrref(4) = 0.3081924971891d+03
|
||||
xcrref(5) = 0.4597223799176d+03
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Reference values of RMS-norms of solution error.
|
||||
c---------------------------------------------------------------------
|
||||
xceref(1) = 0.2598120500183d+00
|
||||
xceref(2) = 0.2590888922315d-01
|
||||
xceref(3) = 0.5132886416320d-01
|
||||
xceref(4) = 0.4806073419454d-01
|
||||
xceref(5) = 0.5483377491301d+00
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c reference data for 408X408X408 grids after 500 time steps,
|
||||
c with DT = 0.3d-03
|
||||
c---------------------------------------------------------------------
|
||||
elseif ( (problem_size .eq. 408) .and.
|
||||
> (problem_size .eq. 408) .and.
|
||||
> (problem_size .eq. 408) .and.
|
||||
> (no_time_steps . eq. 500) ) then
|
||||
|
||||
class = 'D'
|
||||
dtref = 0.30d-3
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Reference values of RMS-norms of residual.
|
||||
c---------------------------------------------------------------------
|
||||
xcrref(1) = 0.1044696216887d+05
|
||||
xcrref(2) = 0.3204427762578d+04
|
||||
xcrref(3) = 0.4648680733032d+04
|
||||
xcrref(4) = 0.4238923283697d+04
|
||||
xcrref(5) = 0.7588412036136d+04
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Reference values of RMS-norms of solution error.
|
||||
c---------------------------------------------------------------------
|
||||
xceref(1) = 0.5089471423669d+01
|
||||
xceref(2) = 0.5323514855894d+00
|
||||
xceref(3) = 0.1187051008971d+01
|
||||
xceref(4) = 0.1083734951938d+01
|
||||
xceref(5) = 0.1164108338568d+02
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c reference data for 1020X1020X1020 grids after 500 time steps,
|
||||
c with DT = 0.1d-03
|
||||
c---------------------------------------------------------------------
|
||||
elseif ( (problem_size .eq. 1020) .and.
|
||||
> (problem_size .eq. 1020) .and.
|
||||
> (problem_size .eq. 1020) .and.
|
||||
> (no_time_steps . eq. 500) ) then
|
||||
|
||||
class = 'E'
|
||||
dtref = 0.10d-3
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Reference values of RMS-norms of residual.
|
||||
c---------------------------------------------------------------------
|
||||
xcrref(1) = 0.6255387422609d+05
|
||||
xcrref(2) = 0.1495317020012d+05
|
||||
xcrref(3) = 0.2347595750586d+05
|
||||
xcrref(4) = 0.2091099783534d+05
|
||||
xcrref(5) = 0.4770412841218d+05
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Reference values of RMS-norms of solution error.
|
||||
c---------------------------------------------------------------------
|
||||
xceref(1) = 0.6742735164909d+02
|
||||
xceref(2) = 0.5390656036938d+01
|
||||
xceref(3) = 0.1680647196477d+02
|
||||
xceref(4) = 0.1536963126457d+02
|
||||
xceref(5) = 0.1575330146156d+03
|
||||
|
||||
|
||||
else
|
||||
verified = .false.
|
||||
endif
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c verification test for residuals if gridsize is one of
|
||||
c the defined grid sizes above (class .ne. 'U')
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Compute the difference of solution values and the known reference values.
|
||||
c---------------------------------------------------------------------
|
||||
do m = 1, 5
|
||||
|
||||
xcrdif(m) = dabs((xcr(m)-xcrref(m))/xcrref(m))
|
||||
xcedif(m) = dabs((xce(m)-xceref(m))/xceref(m))
|
||||
|
||||
enddo
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Output the comparison of computed results to known cases.
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
if (class .ne. 'U') then
|
||||
write(*, 1990) class
|
||||
1990 format(' Verification being performed for class ', a)
|
||||
write (*,2000) epsilon
|
||||
2000 format(' accuracy setting for epsilon = ', E20.13)
|
||||
verified = (dabs(dt-dtref) .le. epsilon)
|
||||
if (.not.verified) then
|
||||
class = 'U'
|
||||
write (*,1000) dtref
|
||||
1000 format(' DT does not match the reference value of ',
|
||||
> E15.8)
|
||||
endif
|
||||
else
|
||||
write(*, 1995)
|
||||
1995 format(' Unknown class')
|
||||
endif
|
||||
|
||||
|
||||
if (class .ne. 'U') then
|
||||
write (*, 2001)
|
||||
else
|
||||
write (*, 2005)
|
||||
endif
|
||||
|
||||
2001 format(' Comparison of RMS-norms of residual')
|
||||
2005 format(' RMS-norms of residual')
|
||||
do m = 1, 5
|
||||
if (class .eq. 'U') then
|
||||
write(*, 2015) m, xcr(m)
|
||||
else if (xcrdif(m) .le. epsilon .and.
|
||||
& (.not. isnan(xcrdif(m)))) then
|
||||
write (*,2011) m,xcr(m),xcrref(m),xcrdif(m)
|
||||
else
|
||||
verified = .false.
|
||||
write (*,2010) m,xcr(m),xcrref(m),xcrdif(m)
|
||||
endif
|
||||
enddo
|
||||
|
||||
if (class .ne. 'U') then
|
||||
write (*,2002)
|
||||
else
|
||||
write (*,2006)
|
||||
endif
|
||||
2002 format(' Comparison of RMS-norms of solution error')
|
||||
2006 format(' RMS-norms of solution error')
|
||||
|
||||
do m = 1, 5
|
||||
if (class .eq. 'U') then
|
||||
write(*, 2015) m, xce(m)
|
||||
else if (xcedif(m) .le. epsilon .and.
|
||||
& (.not. isnan(xcedif(m)))) then
|
||||
write (*,2011) m,xce(m),xceref(m),xcedif(m)
|
||||
else
|
||||
verified = .false.
|
||||
write (*,2010) m,xce(m),xceref(m),xcedif(m)
|
||||
endif
|
||||
enddo
|
||||
|
||||
2010 format(' FAILURE: ', i2, E20.13, E20.13, E20.13)
|
||||
2011 format(' ', i2, E20.13, E20.13, E20.13)
|
||||
2015 format(' ', i2, E20.13)
|
||||
|
||||
if (class .eq. 'U') then
|
||||
write(*, 2022)
|
||||
write(*, 2023)
|
||||
2022 format(' No reference values provided')
|
||||
2023 format(' No verification performed')
|
||||
else if (verified) then
|
||||
write(*, 2020)
|
||||
2020 format(' Verification Successful')
|
||||
else
|
||||
write(*, 2021)
|
||||
2021 format(' Verification failed')
|
||||
endif
|
||||
|
||||
return
|
||||
|
||||
|
||||
end
|
||||
@@ -0,0 +1,392 @@
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
subroutine x_solve
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c this function performs the solution of the approximate factorization
|
||||
c step in the x-direction for all five matrix components
|
||||
c simultaneously. The Thomas algorithm is employed to solve the
|
||||
c systems for the x-lines. Boundary conditions are non-periodic
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
include 'header.h'
|
||||
|
||||
integer i, j, k, i1, i2, m, m1
|
||||
double precision ru1, fac1, fac2, rhs__(5,0:2),t1,t2
|
||||
double precision lhs__(5,0:2), lhsm__(5,0:2), lhsp__(5,0:2)
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
if (timeron) call timer_start(t_xsolve)
|
||||
|
||||
!DVM$ region local(lhs)
|
||||
!DVM$ parallel (k,j) on u(*,*,j,k)
|
||||
!DVM$& , CUDA_BLOCK(32,4)
|
||||
!DVM$& ,private(m,i,ru1,i1,i2,fac1,fac2,lhs__, lhsp__, lhsm__, rhs__,
|
||||
!DVM$& t1,t2)
|
||||
do k = 1, nz2
|
||||
do j = 1, ny2
|
||||
do i = 0, problem_size-1
|
||||
|
||||
if(i .eq. 0) then
|
||||
lhs__(1,0) = 0.0d0
|
||||
lhsp__(1,0) = 0.0d0
|
||||
lhsm__(1,0) = 0.0d0
|
||||
|
||||
lhs__(2,0) = 0.0d0
|
||||
lhsp__(2,0) = 0.0d0
|
||||
lhsm__(2,0) = 0.0d0
|
||||
|
||||
lhs__(3,0) = 1.0d0
|
||||
lhsp__(3,0) = 1.0d0
|
||||
lhsm__(3,0) = 1.0d0
|
||||
|
||||
lhs__(4,0) = 0.0d0
|
||||
lhsp__(4,0) = 0.0d0
|
||||
lhsm__(4,0) = 0.0d0
|
||||
|
||||
lhs__(5,0) = 0.0d0
|
||||
lhsp__(5,0) = 0.0d0
|
||||
lhsm__(5,0) = 0.0d0
|
||||
|
||||
lhs__(1,1) = 0.0d0
|
||||
ru1 = c3c4*1.0d0/u(1,1-1,j,k)
|
||||
ru1 = dmax1(dx2+con43*ru1,
|
||||
> dx5+c1c5*ru1,
|
||||
> dxmax+ru1,
|
||||
> dx1)
|
||||
lhs__(2,1) = - dttx2 * us(1-1,j,k) - dttx1 * ru1
|
||||
ru1 = c3c4*1.0d0/u(1,1,j,k)
|
||||
ru1 = dmax1(dx2+con43*ru1,
|
||||
> dx5+c1c5*ru1,
|
||||
> dxmax+ru1,
|
||||
> dx1)
|
||||
lhs__(3,1) = 1.0d0 + c2dttx1 * ru1
|
||||
ru1 = c3c4*1.0d0/u(1,1+1,j,k)
|
||||
ru1 = dmax1(dx2+con43*ru1,
|
||||
> dx5+c1c5*ru1,
|
||||
> dxmax+ru1,
|
||||
> dx1)
|
||||
lhs__(4,1) = dttx2 * us(1+1,j,k) - dttx1 * ru1
|
||||
lhs__(5,1) = 0.0d0
|
||||
|
||||
lhs__(3,1) = lhs__(3,1) + comz5
|
||||
lhs__(4,1) = lhs__(4,1) - comz4
|
||||
lhs__(5,1) = lhs__(5,1) + comz1
|
||||
|
||||
lhsp__(1,1) = lhs__(1,1)
|
||||
lhsp__(2,1) = lhs__(2,1) - dttx2 * speed(1-1,j,k)
|
||||
lhsp__(3,1) = lhs__(3,1)
|
||||
lhsp__(4,1) = lhs__(4,1) + dttx2 * speed(1+1,j,k)
|
||||
lhsp__(5,1) = lhs__(5,1)
|
||||
lhsm__(1,1) = lhs__(1,1)
|
||||
lhsm__(2,1) = lhs__(2,1) + dttx2 * speed(1-1,j,k)
|
||||
lhsm__(3,1) = lhs__(3,1)
|
||||
lhsm__(4,1) = lhs__(4,1) - dttx2 * speed(1+1,j,k)
|
||||
lhsm__(5,1) = lhs__(5,1)
|
||||
endif
|
||||
|
||||
if(i + 2 .lt. problem_size-1) then
|
||||
m = i + 2
|
||||
lhs__(1,2) = 0.0d0
|
||||
|
||||
ru1 = c3c4*1.0d0/u(1,m-1,j,k)
|
||||
ru1 = dmax1(dx2+con43*ru1,
|
||||
> dx5+c1c5*ru1,
|
||||
> dxmax+ru1,
|
||||
> dx1)
|
||||
lhs__(2,2) = - dttx2 * us(m-1,j,k) - dttx1 * ru1
|
||||
ru1 = c3c4*1.0d0/u(1,m,j,k)
|
||||
ru1 = dmax1(dx2+con43*ru1,
|
||||
> dx5+c1c5*ru1,
|
||||
> dxmax+ru1,
|
||||
> dx1)
|
||||
lhs__(3,2) = 1.0d0 + c2dttx1 * ru1
|
||||
ru1 = c3c4*1.0d0/u(1,m+1,j,k)
|
||||
ru1 = dmax1(dx2+con43*ru1,
|
||||
> dx5+c1c5*ru1,
|
||||
> dxmax+ru1,
|
||||
> dx1)
|
||||
lhs__(4,2) = dttx2 * us(m+1,j,k) - dttx1 * ru1
|
||||
lhs__(5,2) = 0.0d0
|
||||
|
||||
if(m .eq. 1) then
|
||||
lhs__(3,2) = lhs__(3,2) + comz5
|
||||
lhs__(4,2) = lhs__(4,2) - comz4
|
||||
lhs__(5,2) = lhs__(5,2) + comz1
|
||||
else if(m .eq. 2) then
|
||||
lhs__(2,2) = lhs__(2,2) - comz4
|
||||
lhs__(3,2) = lhs__(3,2) + comz6
|
||||
lhs__(4,2) = lhs__(4,2) - comz4
|
||||
lhs__(5,2) = lhs__(5,2) + comz1
|
||||
else if(m .ge. 3 .and. m .le. nx2-2) then
|
||||
lhs__(1,2) = lhs__(1,2) + comz1
|
||||
lhs__(2,2) = lhs__(2,2) - comz4
|
||||
lhs__(3,2) = lhs__(3,2) + comz6
|
||||
lhs__(4,2) = lhs__(4,2) - comz4
|
||||
lhs__(5,2) = lhs__(5,2) + comz1
|
||||
else if(m .eq. nx2-1) then
|
||||
lhs__(1,2) = lhs__(1,2) + comz1
|
||||
lhs__(2,2) = lhs__(2,2) - comz4
|
||||
lhs__(3,2) = lhs__(3,2) + comz6
|
||||
lhs__(4,2) = lhs__(4,2) - comz4
|
||||
else if(m .eq. nx2) then
|
||||
lhs__(1,2) = lhs__(1,2) + comz1
|
||||
lhs__(2,2) = lhs__(2,2) - comz4
|
||||
lhs__(3,2) = lhs__(3,2) + comz5
|
||||
endif
|
||||
|
||||
lhsp__(1,2) = lhs__(1,2)
|
||||
lhsp__(2,2) = lhs__(2,2) - dttx2 * speed(m-1,j,k)
|
||||
lhsp__(3,2) = lhs__(3,2)
|
||||
lhsp__(4,2) = lhs__(4,2) + dttx2 * speed(m+1,j,k)
|
||||
lhsp__(5,2) = lhs__(5,2)
|
||||
lhsm__(1,2) = lhs__(1,2)
|
||||
lhsm__(2,2) = lhs__(2,2) + dttx2 * speed(m-1,j,k)
|
||||
lhsm__(3,2) = lhs__(3,2)
|
||||
lhsm__(4,2) = lhs__(4,2) - dttx2 * speed(m+1,j,k)
|
||||
lhsm__(5,2) = lhs__(5,2)
|
||||
else if(i + 2 .eq. nx2+1) then
|
||||
lhs__(1,2) = 0.0d0
|
||||
lhsp__(1,2) = 0.0d0
|
||||
lhsm__(1,2) = 0.0d0
|
||||
|
||||
lhs__(2,2) = 0.0d0
|
||||
lhsp__(2,2) = 0.0d0
|
||||
lhsm__(2,2) = 0.0d0
|
||||
|
||||
lhs__(3,2) = 1.0d0
|
||||
lhsp__(3,2) = 1.0d0
|
||||
lhsm__(3,2) = 1.0d0
|
||||
|
||||
lhs__(4,2) = 0.0d0
|
||||
lhsp__(4,2) = 0.0d0
|
||||
lhsm__(4,2) = 0.0d0
|
||||
|
||||
lhs__(5,2) = 0.0d0
|
||||
lhsp__(5,2) = 0.0d0
|
||||
lhsm__(5,2) = 0.0d0
|
||||
endif
|
||||
!********************************** end of init
|
||||
|
||||
i1 = i + 1
|
||||
i2 = i + 2
|
||||
fac1 = 1.d0/lhs__(3,0)
|
||||
lhs__(4,0) = fac1*lhs__(4,0)
|
||||
lhs__(5,0) = fac1*lhs__(5,0)
|
||||
do m = 1, 3
|
||||
rhs(m,i,j,k) = fac1*rhs(m,i,j,k)
|
||||
end do
|
||||
|
||||
if(i .le. nx2-1) then
|
||||
lhs__(3,1)=lhs__(3,1)-lhs__(2,1)*lhs__(4,0)
|
||||
lhs__(4,1)=lhs__(4,1)-lhs__(2,1)*lhs__(5,0)
|
||||
lhs__(2,2)=lhs__(2,2)-lhs__(1,2)*lhs__(4,0)
|
||||
lhs__(3,2)=lhs__(3,2)-lhs__(1,2)*lhs__(5,0)
|
||||
do m = 1, 3
|
||||
rhs(m,i1,j,k)=rhs(m,i1,j,k)-lhs__(2,1)*rhs(m,i,j,k)
|
||||
rhs(m,i2,j,k)=rhs(m,i2,j,k)-lhs__(1,2)*rhs(m,i,j,k)
|
||||
end do
|
||||
|
||||
else
|
||||
lhs__(3,1)=lhs__(3,1)-lhs__(2,1)*lhs__(4,0)
|
||||
lhs__(4,1)=lhs__(4,1)-lhs__(2,1)*lhs__(5,0)
|
||||
fac2 = 1.d0/lhs__(3,1)
|
||||
do m = 1, 3
|
||||
rhs(m,i1,j,k) = rhs(m,i1,j,k)-lhs__(2,1)*rhs(m,i,j,k)
|
||||
rhs(m,i1,j,k) = fac2*rhs(m,i1,j,k)
|
||||
end do
|
||||
endif
|
||||
|
||||
m = 4
|
||||
fac1 = 1.d0/lhsp__(3,0)
|
||||
lhsp__(4,0) = fac1*lhsp__(4,0)
|
||||
lhsp__(5,0) = fac1*lhsp__(5,0)
|
||||
rhs(m,i,j,k) = fac1*rhs(m,i,j,k)
|
||||
lhsp__(3,1)=lhsp__(3,1)-lhsp__(2,1)*lhsp__(4,0)
|
||||
lhsp__(4,1)=lhsp__(4,1)-lhsp__(2,1)*lhsp__(5,0)
|
||||
rhs(m,i1,j,k) = rhs(m,i1,j,k)-lhsp__(2,1)*rhs(m,i,j,k)
|
||||
if(i .lt. nx2) then
|
||||
lhsp__(2,2)=lhsp__(2,2)-lhsp__(1,2)*lhsp__(4,0)
|
||||
lhsp__(3,2)=lhsp__(3,2)-lhsp__(1,2)*lhsp__(5,0)
|
||||
rhs(m,i2,j,k)=rhs(m,i2,j,k)-lhsp__(1,2)*rhs(m,i,j,k)
|
||||
endif
|
||||
m = 5
|
||||
fac1 = 1.d0/lhsm__(3,0)
|
||||
lhsm__(4,0) = fac1*lhsm__(4,0)
|
||||
lhsm__(5,0) = fac1*lhsm__(5,0)
|
||||
rhs(m,i,j,k) = fac1*rhs(m,i,j,k)
|
||||
lhsm__(3,1)=lhsm__(3,1)-lhsm__(2,1)*lhsm__(4,0)
|
||||
lhsm__(4,1)=lhsm__(4,1)-lhsm__(2,1)*lhsm__(5,0)
|
||||
rhs(m,i1,j,k)=rhs(m,i1,j,k) -lhsm__(2,1)*rhs(m,i,j,k)
|
||||
if(i .lt. nx2) then
|
||||
lhsm__(2,2)=lhsm__(2,2)-lhsm__(1,2)*lhsm__(4,0)
|
||||
lhsm__(3,2)=lhsm__(3,2)-lhsm__(1,2)*lhsm__(5,0)
|
||||
rhs(m,i2,j,k)=rhs(m,i2,j,k) -lhsm__(1,2)*rhs(m,i,j,k)
|
||||
endif
|
||||
|
||||
if(i .eq. nx2) then
|
||||
rhs(4,i1,j,k) = rhs(4,i1,j,k)/lhsp__(3,1)
|
||||
rhs(5,i1,j,k) = rhs(5,i1,j,k)/lhsm__(3,1)
|
||||
do m = 1, 3
|
||||
rhs(m,i,j,k) = rhs(m,i,j,k)-lhs__(4,0)*rhs(m,i1,j,k)
|
||||
end do
|
||||
rhs(4,i,j,k) = rhs(4,i,j,k)-lhsp__(4,0)*rhs(4,i1,j,k)
|
||||
rhs(5,i,j,k) = rhs(5,i,j,k)-lhsm__(4,0)*rhs(5,i1,j,k)
|
||||
endif
|
||||
|
||||
|
||||
lhs(0,4,i,j,k) = lhs__(4,0)
|
||||
lhs(1,4,i,j,k) = lhsp__(4,0)
|
||||
lhs(2,4,i,j,k) = lhsm__(4,0)
|
||||
|
||||
lhs(0,5,i,j,k) = lhs__(5,0)
|
||||
lhs(1,5,i,j,k) = lhsp__(5,0)
|
||||
lhs(2,5,i,j,k) = lhsm__(5,0)
|
||||
|
||||
lhs__(1,0) = lhs__(1,1)
|
||||
lhsp__(1,0) = lhsp__(1,1)
|
||||
lhsm__(1,0) = lhsm__(1,1)
|
||||
lhs__(1,1) = lhs__(1,2)
|
||||
lhsp__(1,1) = lhsp__(1,2)
|
||||
lhsm__(1,1) = lhsm__(1,2)
|
||||
|
||||
lhs__(2,0) = lhs__(2,1)
|
||||
lhsp__(2,0) = lhsp__(2,1)
|
||||
lhsm__(2,0) = lhsm__(2,1)
|
||||
lhs__(2,1) = lhs__(2,2)
|
||||
lhsp__(2,1) = lhsp__(2,2)
|
||||
lhsm__(2,1) = lhsm__(2,2)
|
||||
|
||||
lhs__(3,0) = lhs__(3,1)
|
||||
lhsp__(3,0) = lhsp__(3,1)
|
||||
lhsm__(3,0) = lhsm__(3,1)
|
||||
lhs__(3,1) = lhs__(3,2)
|
||||
lhsp__(3,1) = lhsp__(3,2)
|
||||
lhsm__(3,1) = lhsm__(3,2)
|
||||
|
||||
lhs__(4,0) = lhs__(4,1)
|
||||
lhsp__(4,0) = lhsp__(4,1)
|
||||
lhsm__(4,0) = lhsm__(4,1)
|
||||
lhs__(4,1) = lhs__(4,2)
|
||||
lhsp__(4,1) = lhsp__(4,2)
|
||||
lhsm__(4,1) = lhsm__(4,2)
|
||||
|
||||
lhs__(5,0) = lhs__(5,1)
|
||||
lhsp__(5,0) = lhsp__(5,1)
|
||||
lhsm__(5,0) = lhsm__(5,1)
|
||||
lhs__(5,1) = lhs__(5,2)
|
||||
lhsp__(5,1) = lhsp__(5,2)
|
||||
lhsm__(5,1) = lhsm__(5,2)
|
||||
enddo
|
||||
|
||||
i = problem_size-3
|
||||
rhs__(1,2) = rhs(1,i+2,j,k)
|
||||
rhs__(2,2) = rhs(2,i+2,j,k)
|
||||
rhs__(3,2) = rhs(3,i+2,j,k)
|
||||
rhs__(4,2) = rhs(4,i+2,j,k)
|
||||
rhs__(5,2) = rhs(5,i+2,j,k)
|
||||
|
||||
rhs__(1,1) = rhs(1,i+1,j,k)
|
||||
rhs__(2,1) = rhs(2,i+1,j,k)
|
||||
rhs__(3,1) = rhs(3,i+1,j,k)
|
||||
rhs__(4,1) = rhs(4,i+1,j,k)
|
||||
rhs__(5,1) = rhs(5,i+1,j,k)
|
||||
|
||||
rhs__(1,0) = rhs(1,i,j,k)
|
||||
rhs__(2,0) = rhs(2,i,j,k)
|
||||
rhs__(3,0) = rhs(3,i,j,k)
|
||||
rhs__(4,0) = rhs(4,i,j,k)
|
||||
rhs__(5,0) = rhs(5,i,j,k)
|
||||
|
||||
rhs__(1,0) = rhs__(1,0) -
|
||||
> lhs(0,4,i,j,k)*rhs__(1,1) -
|
||||
> lhs(0,5,i,j,k)*rhs__(1,2)
|
||||
rhs__(2,0) = rhs__(2,0) -
|
||||
> lhs(0,4,i,j,k)*rhs__(2,1) -
|
||||
> lhs(0,5,i,j,k)*rhs__(2,2)
|
||||
rhs__(3,0) = rhs__(3,0) -
|
||||
> lhs(0,4,i,j,k)*rhs__(3,1) -
|
||||
> lhs(0,5,i,j,k)*rhs__(3,2)
|
||||
|
||||
rhs__(4,0) = rhs__(4,0) -
|
||||
> lhs(1,4,i,j,k)*rhs__(4,1) -
|
||||
> lhs(1,5,i,j,k)*rhs__(4,2)
|
||||
rhs__(5,0) = rhs__(5,0) -
|
||||
> lhs(2,4,i,j,k)*rhs__(5,1) -
|
||||
> lhs(2,5,i,j,k)*rhs__(5,2)
|
||||
|
||||
rhs__(1,2) = rhs__(1,1)
|
||||
rhs__(2,2) = rhs__(2,1)
|
||||
rhs__(3,2) = rhs__(3,1)
|
||||
rhs__(4,2) = rhs__(4,1)
|
||||
rhs__(5,2) = rhs__(5,1)
|
||||
|
||||
rhs__(1,1) = rhs__(1,0)
|
||||
rhs__(2,1) = rhs__(2,0)
|
||||
rhs__(3,1) = rhs__(3,0)
|
||||
rhs__(4,1) = rhs__(4,0)
|
||||
rhs__(5,1) = rhs__(5,0)
|
||||
|
||||
do i = problem_size-4, 0, -1
|
||||
rhs__(1,0) = rhs(1,i,j,k)
|
||||
rhs__(2,0) = rhs(2,i,j,k)
|
||||
rhs__(3,0) = rhs(3,i,j,k)
|
||||
rhs__(4,0) = rhs(4,i,j,k)
|
||||
rhs__(5,0) = rhs(5,i,j,k)
|
||||
|
||||
rhs__(1,0) = rhs__(1,0) -
|
||||
> lhs(0,4,i,j,k)*rhs__(1,1) -
|
||||
> lhs(0,5,i,j,k)*rhs__(1,2)
|
||||
rhs__(2,0) = rhs__(2,0) -
|
||||
> lhs(0,4,i,j,k)*rhs__(2,1) -
|
||||
> lhs(0,5,i,j,k)*rhs__(2,2)
|
||||
rhs__(3,0) = rhs__(3,0) -
|
||||
> lhs(0,4,i,j,k)*rhs__(3,1) -
|
||||
> lhs(0,5,i,j,k)*rhs__(3,2)
|
||||
|
||||
rhs__(4,0) = rhs__(4,0) -
|
||||
> lhs(1,4,i,j,k)*rhs__(4,1) -
|
||||
> lhs(1,5,i,j,k)*rhs__(4,2)
|
||||
rhs__(5,0) = rhs__(5,0) -
|
||||
> lhs(2,4,i,j,k)*rhs__(5,1) -
|
||||
> lhs(2,5,i,j,k)*rhs__(5,2)
|
||||
|
||||
t1 = bt * rhs__(3,2)
|
||||
t2 = 0.5d0 * ( rhs__(4,2)+rhs__(5,2))
|
||||
rhs(1,i+2,j,k) = -rhs__(2,2)
|
||||
rhs(2,i+2,j,k) = rhs__(1,2)
|
||||
rhs(3,i+2,j,k) = bt * (rhs__(4,2)-rhs__(5,2))
|
||||
rhs(4,i+2,j,k) = -t1 + t2
|
||||
rhs(5,i+2,j,k) = t1 + t2
|
||||
|
||||
rhs__(1,2) = rhs__(1,1)
|
||||
rhs__(2,2) = rhs__(2,1)
|
||||
rhs__(3,2) = rhs__(3,1)
|
||||
rhs__(4,2) = rhs__(4,1)
|
||||
rhs__(5,2) = rhs__(5,1)
|
||||
|
||||
rhs__(1,1) = rhs__(1,0)
|
||||
rhs__(2,1) = rhs__(2,0)
|
||||
rhs__(3,1) = rhs__(3,0)
|
||||
rhs__(4,1) = rhs__(4,0)
|
||||
rhs__(5,1) = rhs__(5,0)
|
||||
end do
|
||||
t1 = bt * rhs__(3,2)
|
||||
t2 = 0.5d0 * ( rhs__(4,2)+rhs__(5,2))
|
||||
rhs(1,1,j,k) = -rhs__(2,2)
|
||||
rhs(2,1,j,k) = rhs__(1,2)
|
||||
rhs(3,1,j,k) = bt * (rhs__(4,2)-rhs__(5,2))
|
||||
rhs(4,1,j,k) = -t1 + t2
|
||||
rhs(5,1,j,k) = t1 + t2
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!DVM$ end region
|
||||
if (timeron) call timer_stop(t_xsolve)
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,321 @@
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
subroutine x_solve
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c this function performs the solution of the approximate factorization
|
||||
c step in the x-direction for all five matrix components
|
||||
c simultaneously. The Thomas algorithm is employed to solve the
|
||||
c systems for the x-lines. Boundary conditions are non-periodic
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
include 'header.h'
|
||||
|
||||
integer i, j, k, i1, i2, m
|
||||
double precision ru1, fac1, fac2, t1,t2,t3
|
||||
double precision lhs__(5,0:2), lhsm__(5,0:2), lhsp__(5,0:2)
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
if (timeron) call timer_start(t_xsolve)
|
||||
|
||||
!DVM$ region local(lhs)
|
||||
|
||||
!DVM$ parallel (k,j,i) on rhs(*,i,j,k)
|
||||
!DVM$& ,private(m,ru1,i1,i2,fac1,fac2,lhs__, lhsp__, lhsm__)
|
||||
!DVM$& ,ACROSS(OUT:rhs(0:0,0:2,0:0,0:0), lhs(0:0,0:0,0:2,0:0,0:0))
|
||||
!DVM$& ,stage(stage_n)
|
||||
do k = 1, nz2
|
||||
do j = 1, ny2
|
||||
do i = 0, problem_size-1
|
||||
|
||||
if(i .eq. 0) then
|
||||
lhs__(1,0) = 0.0d0
|
||||
lhsp__(1,0) = 0.0d0
|
||||
lhsm__(1,0) = 0.0d0
|
||||
|
||||
lhs__(2,0) = 0.0d0
|
||||
lhsp__(2,0) = 0.0d0
|
||||
lhsm__(2,0) = 0.0d0
|
||||
|
||||
lhs__(3,0) = 1.0d0
|
||||
lhsp__(3,0) = 1.0d0
|
||||
lhsm__(3,0) = 1.0d0
|
||||
|
||||
lhs__(4,0) = 0.0d0
|
||||
lhsp__(4,0) = 0.0d0
|
||||
lhsm__(4,0) = 0.0d0
|
||||
|
||||
lhs__(5,0) = 0.0d0
|
||||
lhsp__(5,0) = 0.0d0
|
||||
lhsm__(5,0) = 0.0d0
|
||||
|
||||
lhs__(1,1) = 0.0d0
|
||||
ru1 = c3c4*1.0d0/u(1,i,j,k)
|
||||
ru1 = dmax1(dx2+con43*ru1,
|
||||
> dx5+c1c5*ru1,
|
||||
> dxmax+ru1,
|
||||
> dx1)
|
||||
lhs__(2,1) = - dttx2 * us(i,j,k) - dttx1 * ru1
|
||||
ru1 = c3c4*1.0d0/u(1,i+1,j,k)
|
||||
ru1 = dmax1(dx2+con43*ru1,
|
||||
> dx5+c1c5*ru1,
|
||||
> dxmax+ru1,
|
||||
> dx1)
|
||||
lhs__(3,1) = 1.0d0 + c2dttx1 * ru1
|
||||
ru1 = c3c4*1.0d0/u(1,i+2,j,k)
|
||||
ru1 = dmax1(dx2+con43*ru1,
|
||||
> dx5+c1c5*ru1,
|
||||
> dxmax+ru1,
|
||||
> dx1)
|
||||
lhs__(4,1) = dttx2 * us(i+2,j,k) - dttx1 * ru1
|
||||
lhs__(5,1) = 0.0d0
|
||||
|
||||
lhs__(3,1) = lhs__(3,1) + comz5
|
||||
lhs__(4,1) = lhs__(4,1) - comz4
|
||||
lhs__(5,1) = lhs__(5,1) + comz1
|
||||
|
||||
lhsp__(1,1) = lhs__(1,1)
|
||||
lhsp__(2,1) = lhs__(2,1) - dttx2 * speed(i,j,k)
|
||||
lhsp__(3,1) = lhs__(3,1)
|
||||
lhsp__(4,1) = lhs__(4,1) + dttx2 * speed(i+2,j,k)
|
||||
lhsp__(5,1) = lhs__(5,1)
|
||||
lhsm__(1,1) = lhs__(1,1)
|
||||
lhsm__(2,1) = lhs__(2,1) + dttx2 * speed(i,j,k)
|
||||
lhsm__(3,1) = lhs__(3,1)
|
||||
lhsm__(4,1) = lhs__(4,1) - dttx2 * speed(i+2,j,k)
|
||||
lhsm__(5,1) = lhs__(5,1)
|
||||
else
|
||||
do m = 1, 5
|
||||
lhs__(m,0) = lhs(0,m,i,j,k)
|
||||
lhsp__(m,0) = lhs(1,m,i,j,k)
|
||||
lhsm__(m,0) = lhs(2,m,i,j,k)
|
||||
|
||||
lhs__(m,1) = lhs(0,m,i+1,j,k)
|
||||
lhsp__(m,1) = lhs(1,m,i+1,j,k)
|
||||
lhsm__(m,1) = lhs(2,m,i+1,j,k)
|
||||
enddo
|
||||
endif
|
||||
|
||||
if(i + 2 .lt. problem_size-1) then
|
||||
m = i + 2
|
||||
lhs__(1,2) = 0.0d0
|
||||
|
||||
ru1 = c3c4*1.0d0/u(1,m-1,j,k)
|
||||
ru1 = dmax1(dx2+con43*ru1,
|
||||
> dx5+c1c5*ru1,
|
||||
> dxmax+ru1,
|
||||
> dx1)
|
||||
lhs__(2,2) = - dttx2 * us(m-1,j,k) - dttx1 * ru1
|
||||
ru1 = c3c4*1.0d0/u(1,m,j,k)
|
||||
ru1 = dmax1(dx2+con43*ru1,
|
||||
> dx5+c1c5*ru1,
|
||||
> dxmax+ru1,
|
||||
> dx1)
|
||||
lhs__(3,2) = 1.0d0 + c2dttx1 * ru1
|
||||
ru1 = c3c4*1.0d0/u(1,m+1,j,k)
|
||||
ru1 = dmax1(dx2+con43*ru1,
|
||||
> dx5+c1c5*ru1,
|
||||
> dxmax+ru1,
|
||||
> dx1)
|
||||
lhs__(4,2) = dttx2 * us(m+1,j,k) - dttx1 * ru1
|
||||
lhs__(5,2) = 0.0d0
|
||||
|
||||
if(m .eq. 1) then
|
||||
lhs__(3,2) = lhs__(3,2) + comz5
|
||||
lhs__(4,2) = lhs__(4,2) - comz4
|
||||
lhs__(5,2) = lhs__(5,2) + comz1
|
||||
else if(m .eq. 2) then
|
||||
lhs__(2,2) = lhs__(2,2) - comz4
|
||||
lhs__(3,2) = lhs__(3,2) + comz6
|
||||
lhs__(4,2) = lhs__(4,2) - comz4
|
||||
lhs__(5,2) = lhs__(5,2) + comz1
|
||||
else if(m .ge. 3 .and. m .le. nx2-2) then
|
||||
lhs__(1,2) = lhs__(1,2) + comz1
|
||||
lhs__(2,2) = lhs__(2,2) - comz4
|
||||
lhs__(3,2) = lhs__(3,2) + comz6
|
||||
lhs__(4,2) = lhs__(4,2) - comz4
|
||||
lhs__(5,2) = lhs__(5,2) + comz1
|
||||
else if(m .eq. nx2-1) then
|
||||
lhs__(1,2) = lhs__(1,2) + comz1
|
||||
lhs__(2,2) = lhs__(2,2) - comz4
|
||||
lhs__(3,2) = lhs__(3,2) + comz6
|
||||
lhs__(4,2) = lhs__(4,2) - comz4
|
||||
else if(m .eq. nx2) then
|
||||
lhs__(1,2) = lhs__(1,2) + comz1
|
||||
lhs__(2,2) = lhs__(2,2) - comz4
|
||||
lhs__(3,2) = lhs__(3,2) + comz5
|
||||
endif
|
||||
|
||||
lhsp__(1,2) = lhs__(1,2)
|
||||
lhsp__(2,2) = lhs__(2,2) - dttx2 * speed(m-1,j,k)
|
||||
lhsp__(3,2) = lhs__(3,2)
|
||||
lhsp__(4,2) = lhs__(4,2) + dttx2 * speed(m+1,j,k)
|
||||
lhsp__(5,2) = lhs__(5,2)
|
||||
lhsm__(1,2) = lhs__(1,2)
|
||||
lhsm__(2,2) = lhs__(2,2) + dttx2 * speed(m-1,j,k)
|
||||
lhsm__(3,2) = lhs__(3,2)
|
||||
lhsm__(4,2) = lhs__(4,2) - dttx2 * speed(m+1,j,k)
|
||||
lhsm__(5,2) = lhs__(5,2)
|
||||
else if(i + 2 .eq. nx2+1) then
|
||||
lhs__(1,2) = 0.0d0
|
||||
lhsp__(1,2) = 0.0d0
|
||||
lhsm__(1,2) = 0.0d0
|
||||
|
||||
lhs__(2,2) = 0.0d0
|
||||
lhsp__(2,2) = 0.0d0
|
||||
lhsm__(2,2) = 0.0d0
|
||||
|
||||
lhs__(3,2) = 1.0d0
|
||||
lhsp__(3,2) = 1.0d0
|
||||
lhsm__(3,2) = 1.0d0
|
||||
|
||||
lhs__(4,2) = 0.0d0
|
||||
lhsp__(4,2) = 0.0d0
|
||||
lhsm__(4,2) = 0.0d0
|
||||
|
||||
lhs__(5,2) = 0.0d0
|
||||
lhsp__(5,2) = 0.0d0
|
||||
lhsm__(5,2) = 0.0d0
|
||||
endif
|
||||
!********************************** end of init
|
||||
|
||||
i1 = i + 1
|
||||
i2 = i + 2
|
||||
fac1 = 1.d0/lhs__(3,0)
|
||||
lhs__(4,0) = fac1*lhs__(4,0)
|
||||
lhs__(5,0) = fac1*lhs__(5,0)
|
||||
do m = 1, 3
|
||||
rhs(m,i,j,k) = fac1*rhs(m,i,j,k)
|
||||
end do
|
||||
|
||||
if(i .le. nx2-1) then
|
||||
lhs__(3,1)=lhs__(3,1)-lhs__(2,1)*lhs__(4,0)
|
||||
lhs__(4,1)=lhs__(4,1)-lhs__(2,1)*lhs__(5,0)
|
||||
lhs__(2,2)=lhs__(2,2)-lhs__(1,2)*lhs__(4,0)
|
||||
lhs__(3,2)=lhs__(3,2)-lhs__(1,2)*lhs__(5,0)
|
||||
do m = 1, 3
|
||||
rhs(m,i1,j,k)=rhs(m,i1,j,k)-lhs__(2,1)*rhs(m,i,j,k)
|
||||
rhs(m,i2,j,k)=rhs(m,i2,j,k)-lhs__(1,2)*rhs(m,i,j,k)
|
||||
end do
|
||||
|
||||
else
|
||||
lhs__(3,1)=lhs__(3,1)-lhs__(2,1)*lhs__(4,0)
|
||||
lhs__(4,1)=lhs__(4,1)-lhs__(2,1)*lhs__(5,0)
|
||||
if (lhs__(3,1) .ne. 0) then
|
||||
fac2 = 1.d0/lhs__(3,1)
|
||||
else
|
||||
fac2 = 0
|
||||
endif
|
||||
do m = 1, 3
|
||||
rhs(m,i1,j,k) = rhs(m,i1,j,k)-lhs__(2,1)*rhs(m,i,j,k)
|
||||
rhs(m,i1,j,k) = fac2*rhs(m,i1,j,k)
|
||||
end do
|
||||
endif
|
||||
|
||||
m = 4
|
||||
fac1 = 1.d0/lhsp__(3,0)
|
||||
lhsp__(4,0) = fac1*lhsp__(4,0)
|
||||
lhsp__(5,0) = fac1*lhsp__(5,0)
|
||||
rhs(m,i,j,k) = fac1*rhs(m,i,j,k)
|
||||
lhsp__(3,1)=lhsp__(3,1)-lhsp__(2,1)*lhsp__(4,0)
|
||||
lhsp__(4,1)=lhsp__(4,1)-lhsp__(2,1)*lhsp__(5,0)
|
||||
rhs(m,i1,j,k) = rhs(m,i1,j,k)-lhsp__(2,1)*rhs(m,i,j,k)
|
||||
if(i .lt. nx2) then
|
||||
lhsp__(2,2)=lhsp__(2,2)-lhsp__(1,2)*lhsp__(4,0)
|
||||
lhsp__(3,2)=lhsp__(3,2)-lhsp__(1,2)*lhsp__(5,0)
|
||||
rhs(m,i2,j,k)=rhs(m,i2,j,k)-lhsp__(1,2)*rhs(m,i,j,k)
|
||||
endif
|
||||
m = 5
|
||||
fac1 = 1.d0/lhsm__(3,0)
|
||||
lhsm__(4,0) = fac1*lhsm__(4,0)
|
||||
lhsm__(5,0) = fac1*lhsm__(5,0)
|
||||
rhs(m,i,j,k) = fac1*rhs(m,i,j,k)
|
||||
lhsm__(3,1)=lhsm__(3,1)-lhsm__(2,1)*lhsm__(4,0)
|
||||
lhsm__(4,1)=lhsm__(4,1)-lhsm__(2,1)*lhsm__(5,0)
|
||||
rhs(m,i1,j,k)=rhs(m,i1,j,k) -lhsm__(2,1)*rhs(m,i,j,k)
|
||||
if(i .lt. nx2) then
|
||||
lhsm__(2,2)=lhsm__(2,2)-lhsm__(1,2)*lhsm__(4,0)
|
||||
lhsm__(3,2)=lhsm__(3,2)-lhsm__(1,2)*lhsm__(5,0)
|
||||
rhs(m,i2,j,k)=rhs(m,i2,j,k) -lhsm__(1,2)*rhs(m,i,j,k)
|
||||
endif
|
||||
|
||||
if(i .eq. nx2) then
|
||||
rhs(4,i1,j,k) = rhs(4,i1,j,k)/lhsp__(3,1)
|
||||
rhs(5,i1,j,k) = rhs(5,i1,j,k)/lhsm__(3,1)
|
||||
do m = 1, 3
|
||||
rhs(m,i,j,k) = rhs(m,i,j,k)-lhs__(4,0)*rhs(m,i1,j,k)
|
||||
end do
|
||||
rhs(4,i,j,k) = rhs(4,i,j,k)-lhsp__(4,0)*rhs(4,i1,j,k)
|
||||
rhs(5,i,j,k) = rhs(5,i,j,k)-lhsm__(4,0)*rhs(5,i1,j,k)
|
||||
endif
|
||||
|
||||
do m = 1, 5
|
||||
lhs(0,m,i,j,k) = lhs__(m,0)
|
||||
lhs(1,m,i,j,k) = lhsp__(m,0)
|
||||
lhs(2,m,i,j,k) = lhsm__(m,0)
|
||||
|
||||
lhs(0,m,i+1,j,k) = lhs__(m,1)
|
||||
lhs(1,m,i+1,j,k) = lhsp__(m,1)
|
||||
lhs(2,m,i+1,j,k) = lhsm__(m,1)
|
||||
|
||||
if (i .lt. nx2) then
|
||||
lhs(0,m,i+2,j,k) = lhs__(m,2)
|
||||
lhs(1,m,i+2,j,k) = lhsp__(m,2)
|
||||
lhs(2,m,i+2,j,k) = lhsm__(m,2)
|
||||
endif
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!DVM$ PARALLEL (k,j,i) on rhs(*,i,j,k),ACROSS(rhs(0:0,0:2,0:0,0:0))
|
||||
!DVM$& ,stage(stage_n)
|
||||
do k = 1, nz2
|
||||
do j = 1, ny2
|
||||
do i = problem_size-3, 0, -1
|
||||
|
||||
rhs(1,i,j,k) = rhs(1,i,j,k) -
|
||||
& lhs(0,4,i,j,k)*rhs(1,i+1,j,k) -
|
||||
& lhs(0,5,i,j,k)*rhs(1,i+2,j,k)
|
||||
rhs(2,i,j,k) = rhs(2,i,j,k) -
|
||||
& lhs(0,4,i,j,k)*rhs(2,i+1,j,k) -
|
||||
& lhs(0,5,i,j,k)*rhs(2,i+2,j,k)
|
||||
rhs(3,i,j,k) = rhs(3,i,j,k) -
|
||||
& lhs(0,4,i,j,k)*rhs(3,i+1,j,k) -
|
||||
& lhs(0,5,i,j,k)*rhs(3,i+2,j,k)
|
||||
|
||||
rhs(4,i,j,k) = rhs(4,i,j,k) -
|
||||
& lhs(1,4,i,j,k)*rhs(4,i+1,j,k) -
|
||||
& lhs(1,5,i,j,k)*rhs(4,i+2,j,k)
|
||||
rhs(5,i,j,k) = rhs(5,i,j,k) -
|
||||
& lhs(2,4,i,j,k)*rhs(5,i+1,j,k) -
|
||||
& lhs(2,5,i,j,k)*rhs(5,i+2,j,k)
|
||||
end do
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!DVM$ PARALLEL (k,j,i) on rhs(*,i,j,k),PRIVATE(t1,t2,t3)
|
||||
do k = 1, nz2
|
||||
do j = 1, ny2
|
||||
do i = 1, nx2
|
||||
t1 = bt * rhs(3,i,j,k)
|
||||
t2 = 0.5d0 * (rhs(4,i,j,k)+rhs(5,i,j,k))
|
||||
t3 = rhs(1,i,j,k)
|
||||
|
||||
rhs(1,i,j,k) = -rhs(2,i,j,k)
|
||||
rhs(2,i,j,k) = t3
|
||||
rhs(3,i,j,k) = bt * (rhs(4,i,j,k)-rhs(5,i,j,k))
|
||||
rhs(4,i,j,k) = -t1 + t2
|
||||
rhs(5,i,j,k) = t1 + t2
|
||||
end do
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ end region
|
||||
if (timeron) call timer_stop(t_xsolve)
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,396 @@
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
subroutine y_solve
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c this function performs the solution of the approximate factorization
|
||||
c step in the y-direction for all five matrix components
|
||||
c simultaneously. The Thomas algorithm is employed to solve the
|
||||
c systems for the y-lines. Boundary conditions are non-periodic
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
include 'header.h'
|
||||
|
||||
integer i, j, k, j1, j2, m, m1
|
||||
double precision ru1, fac1, fac2, rhs__(5,0:2),t1,t2
|
||||
double precision lhs__(5,0:2), lhsm__(5,0:2), lhsp__(5,0:2)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
if (timeron) call timer_start(t_ysolve)
|
||||
|
||||
!DVM$ region local(lhs)
|
||||
!DVM$ parallel (k,i) on u(*,i,*,k)
|
||||
!DVM$& , CUDA_BLOCK(32,4)
|
||||
!DVM$& ,private(m,j1,j2,fac1,fac2,ru1,lhs__,lhsp__,lhsm__,j,rhs__,
|
||||
!DVM$& t1,t2)
|
||||
do k = 1, nz2
|
||||
do i = 1, nx2
|
||||
|
||||
lhs__(1,0) = 0.0d0
|
||||
lhsp__(1,0) = 0.0d0
|
||||
lhsm__(1,0) = 0.0d0
|
||||
|
||||
lhs__(2,0) = 0.0d0
|
||||
lhsp__(2,0) = 0.0d0
|
||||
lhsm__(2,0) = 0.0d0
|
||||
|
||||
lhs__(3,0) = 1.0d0
|
||||
lhsp__(3,0) = 1.0d0
|
||||
lhsm__(3,0) = 1.0d0
|
||||
|
||||
lhs__(4,0) = 0.0d0
|
||||
lhsp__(4,0) = 0.0d0
|
||||
lhsm__(4,0) = 0.0d0
|
||||
|
||||
lhs__(5,0) = 0.0d0
|
||||
lhsp__(5,0) = 0.0d0
|
||||
lhsm__(5,0) = 0.0d0
|
||||
|
||||
lhs__(1,1) = 0.0d0
|
||||
|
||||
ru1 = c3c4*1.0d0/u(1,i,1-1,k)
|
||||
ru1 = dmax1( dy3 + con43 * ru1,
|
||||
> dy5 + c1c5*ru1,
|
||||
> dymax + ru1,
|
||||
> dy1)
|
||||
lhs__(2,1) = - dtty2 * vs(i,1-1,k) - dtty1 * ru1
|
||||
ru1 = c3c4*1.0d0/u(1,i,1,k)
|
||||
ru1 = dmax1( dy3 + con43 * ru1,
|
||||
> dy5 + c1c5*ru1,
|
||||
> dymax + ru1,
|
||||
> dy1)
|
||||
lhs__(3,1) = 1.0d0 + c2dtty1 * ru1
|
||||
ru1 = c3c4*1.0d0/u(1,i,1+1,k)
|
||||
ru1 = dmax1( dy3 + con43 * ru1,
|
||||
> dy5 + c1c5*ru1,
|
||||
> dymax + ru1,
|
||||
> dy1)
|
||||
lhs__(4,1) = dtty2 * vs(i,1+1,k) - dtty1 * ru1
|
||||
lhs__(5,1) = 0.0d0
|
||||
|
||||
lhs__(3,1) = lhs__(3,1) + comz5
|
||||
lhs__(4,1) = lhs__(4,1) - comz4
|
||||
lhs__(5,1) = lhs__(5,1) + comz1
|
||||
|
||||
lhsp__(1,1) = lhs__(1,1)
|
||||
lhsp__(2,1) = lhs__(2,1) - dtty2 * speed(i,1-1,k)
|
||||
lhsp__(3,1) = lhs__(3,1)
|
||||
lhsp__(4,1) = lhs__(4,1) + dtty2 * speed(i,1+1,k)
|
||||
lhsp__(5,1) = lhs__(5,1)
|
||||
lhsm__(1,1) = lhs__(1,1)
|
||||
lhsm__(2,1) = lhs__(2,1) + dtty2 * speed(i,1-1,k)
|
||||
lhsm__(3,1) = lhs__(3,1)
|
||||
lhsm__(4,1) = lhs__(4,1) - dtty2 * speed(i,1+1,k)
|
||||
lhsm__(5,1) = lhs__(5,1)
|
||||
|
||||
do j = 0, ny2+1
|
||||
if(j + 2 .lt. ny2 + 1) then
|
||||
m = j + 2
|
||||
lhs__(1,2) = 0.0d0
|
||||
|
||||
ru1 = c3c4*1.0d0/u(1,i,m-1,k)
|
||||
ru1 = dmax1( dy3 + con43 * ru1,
|
||||
> dy5 + c1c5*ru1,
|
||||
> dymax + ru1,
|
||||
> dy1)
|
||||
lhs__(2,2) = - dtty2 * vs(i,m-1,k) - dtty1 * ru1
|
||||
ru1 = c3c4*1.0d0/u(1,i,m,k)
|
||||
ru1 = dmax1( dy3 + con43 * ru1,
|
||||
> dy5 + c1c5*ru1,
|
||||
> dymax + ru1,
|
||||
> dy1)
|
||||
lhs__(3,2) = 1.0d0 + c2dtty1 * ru1
|
||||
ru1 = c3c4*1.0d0/u(1,i,m+1,k)
|
||||
ru1 = dmax1( dy3 + con43 * ru1,
|
||||
> dy5 + c1c5*ru1,
|
||||
> dymax + ru1,
|
||||
> dy1)
|
||||
lhs__(4,2) = dtty2 * vs(i,m+1,k) - dtty1 * ru1
|
||||
lhs__(5,2) = 0.0d0
|
||||
|
||||
if(m .eq. 1) then
|
||||
lhs__(3,2) = lhs__(3,2) + comz5
|
||||
lhs__(4,2) = lhs__(4,2) - comz4
|
||||
lhs__(5,2) = lhs__(5,2) + comz1
|
||||
else if(m .eq. 2) then
|
||||
lhs__(2,2) = lhs__(2,2) - comz4
|
||||
lhs__(3,2) = lhs__(3,2) + comz6
|
||||
lhs__(4,2) = lhs__(4,2) - comz4
|
||||
lhs__(5,2) = lhs__(5,2) + comz1
|
||||
else if(m .ge. 3 .and. m .le. ny2-2) then
|
||||
lhs__(1,2) = lhs__(1,2) + comz1
|
||||
lhs__(2,2) = lhs__(2,2) - comz4
|
||||
lhs__(3,2) = lhs__(3,2) + comz6
|
||||
lhs__(4,2) = lhs__(4,2) - comz4
|
||||
lhs__(5,2) = lhs__(5,2) + comz1
|
||||
else if(m .eq. ny2-1) then
|
||||
lhs__(1,2) = lhs__(1,2) + comz1
|
||||
lhs__(2,2) = lhs__(2,2) - comz4
|
||||
lhs__(3,2) = lhs__(3,2) + comz6
|
||||
lhs__(4,2) = lhs__(4,2) - comz4
|
||||
else if(m .eq. ny2) then
|
||||
lhs__(1,2) = lhs__(1,2) + comz1
|
||||
lhs__(2,2) = lhs__(2,2) - comz4
|
||||
lhs__(3,2) = lhs__(3,2) + comz5
|
||||
endif
|
||||
|
||||
lhsp__(1,2) = lhs__(1,2)
|
||||
lhsp__(2,2) = lhs__(2,2) - dtty2 * speed(i,m-1,k)
|
||||
lhsp__(3,2) = lhs__(3,2)
|
||||
lhsp__(4,2) = lhs__(4,2) + dtty2 * speed(i,m+1,k)
|
||||
lhsp__(5,2) = lhs__(5,2)
|
||||
lhsm__(1,2) = lhs__(1,2)
|
||||
lhsm__(2,2) = lhs__(2,2) + dtty2 * speed(i,m-1,k)
|
||||
lhsm__(3,2) = lhs__(3,2)
|
||||
lhsm__(4,2) = lhs__(4,2) - dtty2 * speed(i,m+1,k)
|
||||
lhsm__(5,2) = lhs__(5,2)
|
||||
else if(j + 2 .eq. ny2+1) then
|
||||
|
||||
lhs__(1,2) = 0.0d0
|
||||
lhsp__(1,2) = 0.0d0
|
||||
lhsm__(1,2) = 0.0d0
|
||||
|
||||
lhs__(2,2) = 0.0d0
|
||||
lhsp__(2,2) = 0.0d0
|
||||
lhsm__(2,2) = 0.0d0
|
||||
|
||||
lhs__(3,2) = 1.0d0
|
||||
lhsp__(3,2) = 1.0d0
|
||||
lhsm__(3,2) = 1.0d0
|
||||
|
||||
lhs__(4,2) = 0.0d0
|
||||
lhsp__(4,2) = 0.0d0
|
||||
lhsm__(4,2) = 0.0d0
|
||||
|
||||
lhs__(5,2) = 0.0d0
|
||||
lhsp__(5,2) = 0.0d0
|
||||
lhsm__(5,2) = 0.0d0
|
||||
|
||||
endif
|
||||
!********************************** end of init
|
||||
|
||||
j1 = j + 1
|
||||
j2 = j + 2
|
||||
fac1 = 1.d0/lhs__(3,0)
|
||||
lhs__(4,0) = fac1*lhs__(4,0)
|
||||
lhs__(5,0) = fac1*lhs__(5,0)
|
||||
do m = 1, 3
|
||||
rhs(m,i,j,k) = fac1*rhs(m,i,j,k)
|
||||
end do
|
||||
|
||||
if(j .le. ny2-1) then
|
||||
lhs__(3,1)=lhs__(3,1)-lhs__(2,1)*lhs__(4,0)
|
||||
lhs__(4,1)=lhs__(4,1)-lhs__(2,1)*lhs__(5,0)
|
||||
lhs__(2,2)=lhs__(2,2)-lhs__(1,2)*lhs__(4,0)
|
||||
lhs__(3,2)=lhs__(3,2)-lhs__(1,2)*lhs__(5,0)
|
||||
do m = 1, 3
|
||||
rhs(m,i,j1,k)=rhs(m,i,j1,k)-lhs__(2,1)*rhs(m,i,j,k)
|
||||
rhs(m,i,j2,k)=rhs(m,i,j2,k)-lhs__(1,2)*rhs(m,i,j,k)
|
||||
end do
|
||||
|
||||
else
|
||||
lhs__(3,1)=lhs__(3,1)-lhs__(2,1)*lhs__(4,0)
|
||||
lhs__(4,1)=lhs__(4,1)-lhs__(2,1)*lhs__(5,0)
|
||||
fac2 = 1.d0/lhs__(3,1)
|
||||
do m = 1, 3
|
||||
rhs(m,i,j1,k) = rhs(m,i,j1,k)-lhs__(2,1)*rhs(m,i,j,k)
|
||||
rhs(m,i,j1,k) = fac2*rhs(m,i,j1,k)
|
||||
end do
|
||||
endif
|
||||
|
||||
m = 4
|
||||
fac1 = 1.d0/lhsp__(3,0)
|
||||
lhsp__(4,0) = fac1*lhsp__(4,0)
|
||||
lhsp__(5,0) = fac1*lhsp__(5,0)
|
||||
rhs(m,i,j,k) = fac1*rhs(m,i,j,k)
|
||||
lhsp__(3,1)=lhsp__(3,1)-lhsp__(2,1)*lhsp__(4,0)
|
||||
lhsp__(4,1)=lhsp__(4,1)-lhsp__(2,1)*lhsp__(5,0)
|
||||
rhs(m,i,j1,k) = rhs(m,i,j1,k)-lhsp__(2,1)*rhs(m,i,j,k)
|
||||
if(j .lt. ny2) then
|
||||
lhsp__(2,2)=lhsp__(2,2)-lhsp__(1,2)*lhsp__(4,0)
|
||||
lhsp__(3,2)=lhsp__(3,2)-lhsp__(1,2)*lhsp__(5,0)
|
||||
rhs(m,i,j2,k)=rhs(m,i,j2,k)-lhsp__(1,2)*rhs(m,i,j,k)
|
||||
endif
|
||||
m = 5
|
||||
fac1 = 1.d0/lhsm__(3,0)
|
||||
lhsm__(4,0) = fac1*lhsm__(4,0)
|
||||
lhsm__(5,0) = fac1*lhsm__(5,0)
|
||||
rhs(m,i,j,k) = fac1*rhs(m,i,j,k)
|
||||
lhsm__(3,1)=lhsm__(3,1)-lhsm__(2,1)*lhsm__(4,0)
|
||||
lhsm__(4,1)=lhsm__(4,1)-lhsm__(2,1)*lhsm__(5,0)
|
||||
rhs(m,i,j1,k)=rhs(m,i,j1,k) -lhsm__(2,1)*rhs(m,i,j,k)
|
||||
if(j .lt. ny2) then
|
||||
lhsm__(2,2)=lhsm__(2,2)-lhsm__(1,2)*lhsm__(4,0)
|
||||
lhsm__(3,2)=lhsm__(3,2)-lhsm__(1,2)*lhsm__(5,0)
|
||||
rhs(m,i,j2,k)=rhs(m,i,j2,k) -lhsm__(1,2)*rhs(m,i,j,k)
|
||||
endif
|
||||
|
||||
if(j .eq. ny2) then
|
||||
rhs(4,i,j1,k) = rhs(4,i,j1,k)/lhsp__(3,1)
|
||||
rhs(5,i,j1,k) = rhs(5,i,j1,k)/lhsm__(3,1)
|
||||
do m = 1, 3
|
||||
rhs(m,i,j,k) = rhs(m,i,j,k)-lhs__(4,0)*rhs(m,i,j1,k)
|
||||
end do
|
||||
rhs(4,i,j,k) = rhs(4,i,j,k)-lhsp__(4,0)*rhs(4,i,j1,k)
|
||||
rhs(5,i,j,k) = rhs(5,i,j,k)-lhsm__(4,0)*rhs(5,i,j1,k)
|
||||
endif
|
||||
lhs(0,4,i,j,k) = lhs__(4,0)
|
||||
lhs(1,4,i,j,k) = lhsp__(4,0)
|
||||
lhs(2,4,i,j,k) = lhsm__(4,0)
|
||||
|
||||
lhs(0,5,i,j,k) = lhs__(5,0)
|
||||
lhs(1,5,i,j,k) = lhsp__(5,0)
|
||||
lhs(2,5,i,j,k) = lhsm__(5,0)
|
||||
|
||||
lhs__(1,0) = lhs__(1,1)
|
||||
lhsp__(1,0) = lhsp__(1,1)
|
||||
lhsm__(1,0) = lhsm__(1,1)
|
||||
lhs__(1,1) = lhs__(1,2)
|
||||
lhsp__(1,1) = lhsp__(1,2)
|
||||
lhsm__(1,1) = lhsm__(1,2)
|
||||
|
||||
lhs__(2,0) = lhs__(2,1)
|
||||
lhsp__(2,0) = lhsp__(2,1)
|
||||
lhsm__(2,0) = lhsm__(2,1)
|
||||
lhs__(2,1) = lhs__(2,2)
|
||||
lhsp__(2,1) = lhsp__(2,2)
|
||||
lhsm__(2,1) = lhsm__(2,2)
|
||||
|
||||
lhs__(3,0) = lhs__(3,1)
|
||||
lhsp__(3,0) = lhsp__(3,1)
|
||||
lhsm__(3,0) = lhsm__(3,1)
|
||||
lhs__(3,1) = lhs__(3,2)
|
||||
lhsp__(3,1) = lhsp__(3,2)
|
||||
lhsm__(3,1) = lhsm__(3,2)
|
||||
|
||||
lhs__(4,0) = lhs__(4,1)
|
||||
lhsp__(4,0) = lhsp__(4,1)
|
||||
lhsm__(4,0) = lhsm__(4,1)
|
||||
lhs__(4,1) = lhs__(4,2)
|
||||
lhsp__(4,1) = lhsp__(4,2)
|
||||
lhsm__(4,1) = lhsm__(4,2)
|
||||
|
||||
lhs__(5,0) = lhs__(5,1)
|
||||
lhsp__(5,0) = lhsp__(5,1)
|
||||
lhsm__(5,0) = lhsm__(5,1)
|
||||
lhs__(5,1) = lhs__(5,2)
|
||||
lhsp__(5,1) = lhsp__(5,2)
|
||||
lhsm__(5,1) = lhsm__(5,2)
|
||||
enddo
|
||||
|
||||
j = problem_size-3
|
||||
rhs__(1,2) = rhs(1,i,j+2,k)
|
||||
rhs__(2,2) = rhs(2,i,j+2,k)
|
||||
rhs__(3,2) = rhs(3,i,j+2,k)
|
||||
rhs__(4,2) = rhs(4,i,j+2,k)
|
||||
rhs__(5,2) = rhs(5,i,j+2,k)
|
||||
|
||||
rhs__(1,1) = rhs(1,i,j+1,k)
|
||||
rhs__(2,1) = rhs(2,i,j+1,k)
|
||||
rhs__(3,1) = rhs(3,i,j+1,k)
|
||||
rhs__(4,1) = rhs(4,i,j+1,k)
|
||||
rhs__(5,1) = rhs(5,i,j+1,k)
|
||||
|
||||
rhs__(1,0) = rhs(1,i,j,k)
|
||||
rhs__(2,0) = rhs(2,i,j,k)
|
||||
rhs__(3,0) = rhs(3,i,j,k)
|
||||
rhs__(4,0) = rhs(4,i,j,k)
|
||||
rhs__(5,0) = rhs(5,i,j,k)
|
||||
|
||||
rhs__(1,0) = rhs__(1,0) -
|
||||
> lhs(0,4,i,j,k)*rhs__(1,1) -
|
||||
> lhs(0,5,i,j,k)*rhs__(1,2)
|
||||
rhs__(2,0) = rhs__(2,0) -
|
||||
> lhs(0,4,i,j,k)*rhs__(2,1) -
|
||||
> lhs(0,5,i,j,k)*rhs__(2,2)
|
||||
rhs__(3,0) = rhs__(3,0) -
|
||||
> lhs(0,4,i,j,k)*rhs__(3,1) -
|
||||
> lhs(0,5,i,j,k)*rhs__(3,2)
|
||||
|
||||
rhs__(4,0) = rhs__(4,0) -
|
||||
> lhs(1,4,i,j,k)*rhs__(4,1) -
|
||||
> lhs(1,5,i,j,k)*rhs__(4,2)
|
||||
rhs__(5,0) = rhs__(5,0) -
|
||||
> lhs(2,4,i,j,k)*rhs__(5,1) -
|
||||
> lhs(2,5,i,j,k)*rhs__(5,2)
|
||||
|
||||
rhs__(1,2) = rhs__(1,1)
|
||||
rhs__(2,2) = rhs__(2,1)
|
||||
rhs__(3,2) = rhs__(3,1)
|
||||
rhs__(4,2) = rhs__(4,1)
|
||||
rhs__(5,2) = rhs__(5,1)
|
||||
|
||||
rhs__(1,1) = rhs__(1,0)
|
||||
rhs__(2,1) = rhs__(2,0)
|
||||
rhs__(3,1) = rhs__(3,0)
|
||||
rhs__(4,1) = rhs__(4,0)
|
||||
rhs__(5,1) = rhs__(5,0)
|
||||
|
||||
do j = problem_size-4, 0, -1
|
||||
rhs__(1,0) = rhs(1,i,j,k)
|
||||
rhs__(2,0) = rhs(2,i,j,k)
|
||||
rhs__(3,0) = rhs(3,i,j,k)
|
||||
rhs__(4,0) = rhs(4,i,j,k)
|
||||
rhs__(5,0) = rhs(5,i,j,k)
|
||||
|
||||
rhs__(1,0) = rhs__(1,0) -
|
||||
> lhs(0,4,i,j,k)*rhs__(1,1) -
|
||||
> lhs(0,5,i,j,k)*rhs__(1,2)
|
||||
rhs__(2,0) = rhs__(2,0) -
|
||||
> lhs(0,4,i,j,k)*rhs__(2,1) -
|
||||
> lhs(0,5,i,j,k)*rhs__(2,2)
|
||||
rhs__(3,0) = rhs__(3,0) -
|
||||
> lhs(0,4,i,j,k)*rhs__(3,1) -
|
||||
> lhs(0,5,i,j,k)*rhs__(3,2)
|
||||
|
||||
rhs__(4,0) = rhs__(4,0) -
|
||||
> lhs(1,4,i,j,k)*rhs__(4,1) -
|
||||
> lhs(1,5,i,j,k)*rhs__(4,2)
|
||||
rhs__(5,0) = rhs__(5,0) -
|
||||
> lhs(2,4,i,j,k)*rhs__(5,1) -
|
||||
> lhs(2,5,i,j,k)*rhs__(5,2)
|
||||
|
||||
t1 = bt * rhs__(1,2)
|
||||
t2 = 0.5d0 * (rhs__(4,2) + rhs__(5,2))
|
||||
rhs(1,i,j+2,k) = bt * (rhs__(4,2) - rhs__(5,2))
|
||||
rhs(2,i,j+2,k) = -rhs__(3,2)
|
||||
rhs(3,i,j+2,k) = rhs__(2,2)
|
||||
rhs(4,i,j+2,k) = -t1 + t2
|
||||
rhs(5,i,j+2,k) = t1 + t2
|
||||
|
||||
rhs__(1,2) = rhs__(1,1)
|
||||
rhs__(2,2) = rhs__(2,1)
|
||||
rhs__(3,2) = rhs__(3,1)
|
||||
rhs__(4,2) = rhs__(4,1)
|
||||
rhs__(5,2) = rhs__(5,1)
|
||||
|
||||
rhs__(1,1) = rhs__(1,0)
|
||||
rhs__(2,1) = rhs__(2,0)
|
||||
rhs__(3,1) = rhs__(3,0)
|
||||
rhs__(4,1) = rhs__(4,0)
|
||||
rhs__(5,1) = rhs__(5,0)
|
||||
enddo
|
||||
t1 = bt * rhs__(1,2)
|
||||
t2 = 0.5d0 * (rhs__(4,2) + rhs__(5,2))
|
||||
rhs(1,i,j+2,k) = bt * (rhs__(4,2) - rhs__(5,2))
|
||||
rhs(2,i,j+2,k) = -rhs__(3,2)
|
||||
rhs(3,i,j+2,k) = rhs__(2,2)
|
||||
rhs(4,i,j+2,k) = -t1 + t2
|
||||
rhs(5,i,j+2,k) = t1 + t2
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!DVM$ end region
|
||||
if (timeron) call timer_stop(t_ysolve)
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,330 @@
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
subroutine y_solve
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c this function performs the solution of the approximate factorization
|
||||
c step in the y-direction for all five matrix components
|
||||
c simultaneously. The Thomas algorithm is employed to solve the
|
||||
c systems for the y-lines. Boundary conditions are non-periodic
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
include 'header.h'
|
||||
|
||||
integer i, j, k, j1, j2, m, m1
|
||||
double precision ru1, fac1, fac2, t1,t2,t3
|
||||
double precision lhs__(5,0:2), lhsm__(5,0:2), lhsp__(5,0:2)
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
if (timeron) call timer_start(t_ysolve)
|
||||
|
||||
!DVM$ region local(lhs)
|
||||
!DVM$ parallel (k,j,i) on rhs(*,i,j,k)
|
||||
!DVM$& ,private(m,j1,j2,fac1,fac2,ru1,lhs__,lhsp__,lhsm__)
|
||||
!DVM$& ,ACROSS(OUT:rhs(0:0,0:0,0:2,0:0), lhs(0:0,0:0,0:0,0:2,0:0))
|
||||
!DVM$& ,stage(stage_n)
|
||||
do k = 1, nz2
|
||||
do j = 0, problem_size-1
|
||||
do i = 1, nx2
|
||||
if (j .eq. 0) then
|
||||
lhs__(1,0) = 0.0d0
|
||||
lhsp__(1,0) = 0.0d0
|
||||
lhsm__(1,0) = 0.0d0
|
||||
|
||||
lhs__(2,0) = 0.0d0
|
||||
lhsp__(2,0) = 0.0d0
|
||||
lhsm__(2,0) = 0.0d0
|
||||
|
||||
lhs__(3,0) = 1.0d0
|
||||
lhsp__(3,0) = 1.0d0
|
||||
lhsm__(3,0) = 1.0d0
|
||||
|
||||
lhs__(4,0) = 0.0d0
|
||||
lhsp__(4,0) = 0.0d0
|
||||
lhsm__(4,0) = 0.0d0
|
||||
|
||||
lhs__(5,0) = 0.0d0
|
||||
lhsp__(5,0) = 0.0d0
|
||||
lhsm__(5,0) = 0.0d0
|
||||
|
||||
lhs__(1,1) = 0.0d0
|
||||
|
||||
ru1 = c3c4*1.0d0/u(1,i,j,k)
|
||||
ru1 = dmax1( dy3 + con43 * ru1,
|
||||
> dy5 + c1c5*ru1,
|
||||
> dymax + ru1,
|
||||
> dy1)
|
||||
lhs__(2,1) = - dtty2 *
|
||||
> u(3,i,0,k) * (1.0d0/u(1,i,j,k))- dtty1 * ru1
|
||||
ru1 = c3c4*1.0d0/u(1,i,j+1,k)
|
||||
ru1 = dmax1( dy3 + con43 * ru1,
|
||||
> dy5 + c1c5*ru1,
|
||||
> dymax + ru1,
|
||||
> dy1)
|
||||
lhs__(3,1) = 1.0d0 + c2dtty1 * ru1
|
||||
ru1 = c3c4*1.0d0/u(1,i,j+2,k)
|
||||
ru1 = dmax1( dy3 + con43 * ru1,
|
||||
> dy5 + c1c5*ru1,
|
||||
> dymax + ru1,
|
||||
> dy1)
|
||||
lhs__(4,1) = dtty2 *
|
||||
> u(3,i,2,k) * (1.0d0/u(1,i,j+2,k)) - dtty1 * ru1
|
||||
lhs__(5,1) = 0.0d0
|
||||
|
||||
lhs__(3,1) = lhs__(3,1) + comz5
|
||||
lhs__(4,1) = lhs__(4,1) - comz4
|
||||
lhs__(5,1) = lhs__(5,1) + comz1
|
||||
|
||||
lhsp__(1,1) = lhs__(1,1)
|
||||
lhsp__(2,1) = lhs__(2,1) - dtty2 * speed(i,j,k)
|
||||
lhsp__(3,1) = lhs__(3,1)
|
||||
lhsp__(4,1) = lhs__(4,1) + dtty2 * speed(i,j+2,k)
|
||||
lhsp__(5,1) = lhs__(5,1)
|
||||
lhsm__(1,1) = lhs__(1,1)
|
||||
lhsm__(2,1) = lhs__(2,1) + dtty2 * speed(i,j,k)
|
||||
lhsm__(3,1) = lhs__(3,1)
|
||||
lhsm__(4,1) = lhs__(4,1) - dtty2 * speed(i,j+2,k)
|
||||
lhsm__(5,1) = lhs__(5,1)
|
||||
else
|
||||
do m = 1, 5
|
||||
lhs__(m,0) = lhs(0,m,i,j,k)
|
||||
lhsp__(m,0) = lhs(1,m,i,j,k)
|
||||
lhsm__(m,0) = lhs(2,m,i,j,k)
|
||||
|
||||
lhs__(m,1) = lhs(0,m,i,j+1,k)
|
||||
lhsp__(m,1) = lhs(1,m,i,j+1,k)
|
||||
lhsm__(m,1) = lhs(2,m,i,j+1,k)
|
||||
enddo
|
||||
endif
|
||||
|
||||
if(j + 2 .lt. ny2 + 1) then
|
||||
m = j + 2
|
||||
lhs__(1,2) = 0.0d0
|
||||
|
||||
ru1 = c3c4*1.0d0/u(1,i,m-1,k)
|
||||
ru1 = dmax1( dy3 + con43 * ru1,
|
||||
> dy5 + c1c5*ru1,
|
||||
> dymax + ru1,
|
||||
> dy1)
|
||||
lhs__(2,2) = - dtty2 *
|
||||
> u(3,i,m-1,k) * (1.0d0/u(1,i,m-1,k)) - dtty1 * ru1
|
||||
ru1 = c3c4*1.0d0/u(1,i,m,k)
|
||||
ru1 = dmax1( dy3 + con43 * ru1,
|
||||
> dy5 + c1c5*ru1,
|
||||
> dymax + ru1,
|
||||
> dy1)
|
||||
lhs__(3,2) = 1.0d0 + c2dtty1 * ru1
|
||||
ru1 = c3c4*1.0d0/u(1,i,m+1,k)
|
||||
ru1 = dmax1( dy3 + con43 * ru1,
|
||||
> dy5 + c1c5*ru1,
|
||||
> dymax + ru1,
|
||||
> dy1)
|
||||
lhs__(4,2) = dtty2 *
|
||||
> u(3,i,m+1,k) * (1.0d0/u(1,i,m+1,k)) - dtty1 * ru1
|
||||
lhs__(5,2) = 0.0d0
|
||||
|
||||
if(m .eq. 1) then
|
||||
lhs__(3,2) = lhs__(3,2) + comz5
|
||||
lhs__(4,2) = lhs__(4,2) - comz4
|
||||
lhs__(5,2) = lhs__(5,2) + comz1
|
||||
else if(m .eq. 2) then
|
||||
lhs__(2,2) = lhs__(2,2) - comz4
|
||||
lhs__(3,2) = lhs__(3,2) + comz6
|
||||
lhs__(4,2) = lhs__(4,2) - comz4
|
||||
lhs__(5,2) = lhs__(5,2) + comz1
|
||||
else if(m .ge. 3 .and. m .le. ny2-2) then
|
||||
lhs__(1,2) = lhs__(1,2) + comz1
|
||||
lhs__(2,2) = lhs__(2,2) - comz4
|
||||
lhs__(3,2) = lhs__(3,2) + comz6
|
||||
lhs__(4,2) = lhs__(4,2) - comz4
|
||||
lhs__(5,2) = lhs__(5,2) + comz1
|
||||
else if(m .eq. ny2-1) then
|
||||
lhs__(1,2) = lhs__(1,2) + comz1
|
||||
lhs__(2,2) = lhs__(2,2) - comz4
|
||||
lhs__(3,2) = lhs__(3,2) + comz6
|
||||
lhs__(4,2) = lhs__(4,2) - comz4
|
||||
else if(m .eq. ny2) then
|
||||
lhs__(1,2) = lhs__(1,2) + comz1
|
||||
lhs__(2,2) = lhs__(2,2) - comz4
|
||||
lhs__(3,2) = lhs__(3,2) + comz5
|
||||
endif
|
||||
|
||||
lhsp__(1,2) = lhs__(1,2)
|
||||
lhsp__(2,2) = lhs__(2,2) - dtty2 * speed(i,m-1,k)
|
||||
lhsp__(3,2) = lhs__(3,2)
|
||||
lhsp__(4,2) = lhs__(4,2) + dtty2 * speed(i,m+1,k)
|
||||
lhsp__(5,2) = lhs__(5,2)
|
||||
lhsm__(1,2) = lhs__(1,2)
|
||||
lhsm__(2,2) = lhs__(2,2) + dtty2 * speed(i,m-1,k)
|
||||
lhsm__(3,2) = lhs__(3,2)
|
||||
lhsm__(4,2) = lhs__(4,2) - dtty2 * speed(i,m+1,k)
|
||||
lhsm__(5,2) = lhs__(5,2)
|
||||
else if(j + 2 .eq. ny2+1) then
|
||||
|
||||
lhs__(1,2) = 0.0d0
|
||||
lhsp__(1,2) = 0.0d0
|
||||
lhsm__(1,2) = 0.0d0
|
||||
|
||||
lhs__(2,2) = 0.0d0
|
||||
lhsp__(2,2) = 0.0d0
|
||||
lhsm__(2,2) = 0.0d0
|
||||
|
||||
lhs__(3,2) = 1.0d0
|
||||
lhsp__(3,2) = 1.0d0
|
||||
lhsm__(3,2) = 1.0d0
|
||||
|
||||
lhs__(4,2) = 0.0d0
|
||||
lhsp__(4,2) = 0.0d0
|
||||
lhsm__(4,2) = 0.0d0
|
||||
|
||||
lhs__(5,2) = 0.0d0
|
||||
lhsp__(5,2) = 0.0d0
|
||||
lhsm__(5,2) = 0.0d0
|
||||
|
||||
endif
|
||||
!********************************** end of init
|
||||
|
||||
j1 = j + 1
|
||||
j2 = j + 2
|
||||
fac1 = 1.d0/lhs__(3,0)
|
||||
lhs__(4,0) = fac1*lhs__(4,0)
|
||||
lhs__(5,0) = fac1*lhs__(5,0)
|
||||
do m = 1, 3
|
||||
rhs(m,i,j,k) = fac1*rhs(m,i,j,k)
|
||||
end do
|
||||
|
||||
if(j .le. ny2-1) then
|
||||
lhs__(3,1)=lhs__(3,1)-lhs__(2,1)*lhs__(4,0)
|
||||
lhs__(4,1)=lhs__(4,1)-lhs__(2,1)*lhs__(5,0)
|
||||
lhs__(2,2)=lhs__(2,2)-lhs__(1,2)*lhs__(4,0)
|
||||
lhs__(3,2)=lhs__(3,2)-lhs__(1,2)*lhs__(5,0)
|
||||
do m = 1, 3
|
||||
rhs(m,i,j1,k)=rhs(m,i,j1,k)-lhs__(2,1)*rhs(m,i,j,k)
|
||||
rhs(m,i,j2,k)=rhs(m,i,j2,k)-lhs__(1,2)*rhs(m,i,j,k)
|
||||
end do
|
||||
|
||||
else
|
||||
lhs__(3,1)=lhs__(3,1)-lhs__(2,1)*lhs__(4,0)
|
||||
lhs__(4,1)=lhs__(4,1)-lhs__(2,1)*lhs__(5,0)
|
||||
if (lhs__(3,1) .ne. 0) then
|
||||
fac2 = 1.d0/lhs__(3,1)
|
||||
else
|
||||
fac2 = 0
|
||||
endif
|
||||
do m = 1, 3
|
||||
rhs(m,i,j1,k) = rhs(m,i,j1,k)-lhs__(2,1)*rhs(m,i,j,k)
|
||||
rhs(m,i,j1,k) = fac2*rhs(m,i,j1,k)
|
||||
end do
|
||||
endif
|
||||
|
||||
m = 4
|
||||
fac1 = 1.d0/lhsp__(3,0)
|
||||
lhsp__(4,0) = fac1*lhsp__(4,0)
|
||||
lhsp__(5,0) = fac1*lhsp__(5,0)
|
||||
rhs(m,i,j,k) = fac1*rhs(m,i,j,k)
|
||||
lhsp__(3,1)=lhsp__(3,1)-lhsp__(2,1)*lhsp__(4,0)
|
||||
lhsp__(4,1)=lhsp__(4,1)-lhsp__(2,1)*lhsp__(5,0)
|
||||
rhs(m,i,j1,k) = rhs(m,i,j1,k)-lhsp__(2,1)*rhs(m,i,j,k)
|
||||
if(j .lt. ny2) then
|
||||
lhsp__(2,2)=lhsp__(2,2)-lhsp__(1,2)*lhsp__(4,0)
|
||||
lhsp__(3,2)=lhsp__(3,2)-lhsp__(1,2)*lhsp__(5,0)
|
||||
rhs(m,i,j2,k)=rhs(m,i,j2,k)-lhsp__(1,2)*rhs(m,i,j,k)
|
||||
endif
|
||||
m = 5
|
||||
fac1 = 1.d0/lhsm__(3,0)
|
||||
lhsm__(4,0) = fac1*lhsm__(4,0)
|
||||
lhsm__(5,0) = fac1*lhsm__(5,0)
|
||||
rhs(m,i,j,k) = fac1*rhs(m,i,j,k)
|
||||
lhsm__(3,1)=lhsm__(3,1)-lhsm__(2,1)*lhsm__(4,0)
|
||||
lhsm__(4,1)=lhsm__(4,1)-lhsm__(2,1)*lhsm__(5,0)
|
||||
rhs(m,i,j1,k)=rhs(m,i,j1,k) -lhsm__(2,1)*rhs(m,i,j,k)
|
||||
if(j .lt. ny2) then
|
||||
lhsm__(2,2)=lhsm__(2,2)-lhsm__(1,2)*lhsm__(4,0)
|
||||
lhsm__(3,2)=lhsm__(3,2)-lhsm__(1,2)*lhsm__(5,0)
|
||||
rhs(m,i,j2,k)=rhs(m,i,j2,k) -lhsm__(1,2)*rhs(m,i,j,k)
|
||||
endif
|
||||
|
||||
if(j .eq. ny2) then
|
||||
rhs(4,i,j1,k) = rhs(4,i,j1,k)/lhsp__(3,1)
|
||||
rhs(5,i,j1,k) = rhs(5,i,j1,k)/lhsm__(3,1)
|
||||
do m = 1, 3
|
||||
rhs(m,i,j,k) = rhs(m,i,j,k)-lhs__(4,0)*rhs(m,i,j1,k)
|
||||
end do
|
||||
rhs(4,i,j,k) = rhs(4,i,j,k)-lhsp__(4,0)*rhs(4,i,j1,k)
|
||||
rhs(5,i,j,k) = rhs(5,i,j,k)-lhsm__(4,0)*rhs(5,i,j1,k)
|
||||
endif
|
||||
|
||||
do m = 1,5
|
||||
lhs(0,m,i,j,k) = lhs__(m,0)
|
||||
lhs(1,m,i,j,k) = lhsp__(m,0)
|
||||
lhs(2,m,i,j,k) = lhsm__(m,0)
|
||||
|
||||
lhs(0,m,i,j+1,k) = lhs__(m,1)
|
||||
lhs(1,m,i,j+1,k) = lhsp__(m,1)
|
||||
lhs(2,m,i,j+1,k) = lhsm__(m,1)
|
||||
if (j .lt. ny2) then
|
||||
lhs(0,m,i,j+2,k) = lhs__(m,2)
|
||||
lhs(1,m,i,j+2,k) = lhsp__(m,2)
|
||||
lhs(2,m,i,j+2,k) = lhsm__(m,2)
|
||||
endif
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!DVM$ parallel (k,j,i) on rhs(*,i,j,k),ACROSS(rhs(0:0,0:0,0:2,0:0))
|
||||
!DVM$& ,stage(stage_n)
|
||||
do k = 1, nz2
|
||||
do j = problem_size-3, 0, -1
|
||||
do i = 1, nx2
|
||||
rhs(1,i,j,k) = rhs(1,i,j,k) -
|
||||
& lhs(0,4,i,j,k)*rhs(1,i,j+1,k) -
|
||||
& lhs(0,5,i,j,k)*rhs(1,i,j+2,k)
|
||||
rhs(2,i,j,k) = rhs(2,i,j,k) -
|
||||
& lhs(0,4,i,j,k)*rhs(2,i,j+1,k) -
|
||||
& lhs(0,5,i,j,k)*rhs(2,i,j+2,k)
|
||||
rhs(3,i,j,k) = rhs(3,i,j,k) -
|
||||
& lhs(0,4,i,j,k)*rhs(3,i,j+1,k) -
|
||||
& lhs(0,5,i,j,k)*rhs(3,i,j+2,k)
|
||||
|
||||
rhs(4,i,j,k) = rhs(4,i,j,k) -
|
||||
& lhs(1,4,i,j,k)*rhs(4,i,j+1,k) -
|
||||
& lhs(1,5,i,j,k)*rhs(4,i,j+2,k)
|
||||
rhs(5,i,j,k) = rhs(5,i,j,k) -
|
||||
& lhs(2,4,i,j,k)*rhs(5,i,j+1,k) -
|
||||
& lhs(2,5,i,j,k)*rhs(5,i,j+2,k)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!DVM$ parallel (k,j,i) on rhs(*,i,j,k),PRIVATE(t1,t2,t3)
|
||||
do k = 1, nz2
|
||||
do j = 1, ny2
|
||||
do i = 1, nx2
|
||||
t1 = bt * rhs(1,i,j,k)
|
||||
t2 = 0.5d0 * (rhs(4,i,j,k) + rhs(5,i,j,k))
|
||||
t3 = rhs(2,i,j,k)
|
||||
|
||||
rhs(1,i,j,k) = bt * (rhs(4,i,j,k) - rhs(5,i,j,k))
|
||||
rhs(2,i,j,k) = -rhs(3,i,j,k)
|
||||
rhs(3,i,j,k) = t3
|
||||
rhs(4,i,j,k) = -t1 + t2
|
||||
rhs(5,i,j,k) = t1 + t2
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!DVM$ end region
|
||||
if (timeron) call timer_stop(t_ysolve)
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,433 @@
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
subroutine z_solve
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c this function performs the solution of the approximate factorization
|
||||
c step in the z-direction for all five matrix components
|
||||
c simultaneously. The Thomas algorithm is employed to solve the
|
||||
c systems for the z-lines. Boundary conditions are non-periodic
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
include 'header.h'
|
||||
|
||||
integer i, j, k, k1, k2, m, m1
|
||||
double precision ru1, fac1, fac2, rhs__(5,0:2)
|
||||
double precision lhs__(5,0:2), lhsm__(5,0:2), lhsp__(5,0:2)
|
||||
double precision t1,t2,t3,ac,xvel,yvel,zvel,btuz,ac2u,uzik1
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Prepare for z-solve, array redistribution
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
if (timeron) call timer_start(t_zsolve)
|
||||
|
||||
!DVM$ region local(lhs)
|
||||
!DVM$ parallel (j,i) on u(*,i,j,*)
|
||||
!DVM$& , CUDA_BLOCK(32,4)
|
||||
!DVM$& ,private(m,k1,k2,ru1,fac1,fac2,k,lhs__,lhsp__,lhsm__,rhs__,
|
||||
!DVM$& t1,t2,t3,ac,xvel,yvel,zvel,btuz,ac2u,uzik1)
|
||||
do j = 1, ny2
|
||||
do i = 1, nx2
|
||||
|
||||
lhs__(1,0) = 0.0d0
|
||||
lhsp__(1,0) = 0.0d0
|
||||
lhsm__(1,0) = 0.0d0
|
||||
|
||||
lhs__(2,0) = 0.0d0
|
||||
lhsp__(2,0) = 0.0d0
|
||||
lhsm__(2,0) = 0.0d0
|
||||
|
||||
lhs__(3,0) = 1.0d0
|
||||
lhsp__(3,0) = 1.0d0
|
||||
lhsm__(3,0) = 1.0d0
|
||||
|
||||
lhs__(4,0) = 0.0d0
|
||||
lhsp__(4,0) = 0.0d0
|
||||
lhsm__(4,0) = 0.0d0
|
||||
|
||||
lhs__(5,0) = 0.0d0
|
||||
lhsp__(5,0) = 0.0d0
|
||||
lhsm__(5,0) = 0.0d0
|
||||
|
||||
|
||||
lhs__(1,1) = 0.0d0
|
||||
ru1 = c3c4*1.0d0/u(1,i,j,0)
|
||||
ru1 = dmax1(dz4 + con43 * ru1,
|
||||
> dz5 + c1c5 * ru1,
|
||||
> dzmax + ru1,
|
||||
> dz1)
|
||||
lhs__(2,1) = - dttz2 * ws(i,j,0) - dttz1 * ru1
|
||||
ru1 = c3c4*1.0d0/u(1,i,j,1)
|
||||
ru1 = dmax1(dz4 + con43 * ru1,
|
||||
> dz5 + c1c5 * ru1,
|
||||
> dzmax + ru1,
|
||||
> dz1)
|
||||
lhs__(3,1) = 1.0d0 + c2dttz1 * ru1
|
||||
ru1 = c3c4*1.0d0/u(1,i,j,2)
|
||||
ru1 = dmax1(dz4 + con43 * ru1,
|
||||
> dz5 + c1c5 * ru1,
|
||||
> dzmax + ru1,
|
||||
> dz1)
|
||||
lhs__(4,1) = dttz2 * ws(i,j,2) - dttz1 * ru1
|
||||
lhs__(5,1) = 0.0d0
|
||||
|
||||
lhs__(3,1) = lhs__(3,1) + comz5
|
||||
lhs__(4,1) = lhs__(4,1) - comz4
|
||||
lhs__(5,1) = lhs__(5,1) + comz1
|
||||
|
||||
lhsp__(1,1) = lhs__(1,1)
|
||||
lhsp__(2,1) = lhs__(2,1) - dttz2 * speed(i,j,1-1)
|
||||
lhsp__(3,1) = lhs__(3,1)
|
||||
lhsp__(4,1) = lhs__(4,1) + dttz2 * speed(i,j,1+1)
|
||||
lhsp__(5,1) = lhs__(5,1)
|
||||
lhsm__(1,1) = lhs__(1,1)
|
||||
lhsm__(2,1) = lhs__(2,1) + dttz2 * speed(i,j,1-1)
|
||||
lhsm__(3,1) = lhs__(3,1)
|
||||
lhsm__(4,1) = lhs__(4,1) - dttz2 * speed(i,j,1+1)
|
||||
lhsm__(5,1) = lhs__(5,1)
|
||||
|
||||
do k = 0, nz2+1
|
||||
if(k + 2 .lt. nz2 + 1) then
|
||||
m = k + 2
|
||||
lhs__(1,2) = 0.0d0
|
||||
|
||||
ru1 = c3c4*1.0d0/u(1,i,j,m-1)
|
||||
ru1 = dmax1(dz4 + con43 * ru1,
|
||||
> dz5 + c1c5 * ru1,
|
||||
> dzmax + ru1,
|
||||
> dz1)
|
||||
lhs__(2,2) = - dttz2 * ws(i,j,m-1) - dttz1 * ru1
|
||||
ru1 = c3c4*1.0d0/u(1,i,j,m)
|
||||
ru1 = dmax1(dz4 + con43 * ru1,
|
||||
> dz5 + c1c5 * ru1,
|
||||
> dzmax + ru1,
|
||||
> dz1)
|
||||
lhs__(3,2) = 1.0d0 + c2dttz1 * ru1
|
||||
ru1 = c3c4*1.0d0/u(1,i,j,m+1)
|
||||
ru1 = dmax1(dz4 + con43 * ru1,
|
||||
> dz5 + c1c5 * ru1,
|
||||
> dzmax + ru1,
|
||||
> dz1)
|
||||
lhs__(4,2) = dttz2 * ws(i,j,m+1) - dttz1 * ru1
|
||||
lhs__(5,2) = 0.0d0
|
||||
|
||||
if(m .eq. 1) then
|
||||
lhs__(3,2) = lhs__(3,2) + comz5
|
||||
lhs__(4,2) = lhs__(4,2) - comz4
|
||||
lhs__(5,2) = lhs__(5,2) + comz1
|
||||
else if(m .eq. 2) then
|
||||
lhs__(2,2) = lhs__(2,2) - comz4
|
||||
lhs__(3,2) = lhs__(3,2) + comz6
|
||||
lhs__(4,2) = lhs__(4,2) - comz4
|
||||
lhs__(5,2) = lhs__(5,2) + comz1
|
||||
else if(m .ge. 3 .and. m .le. nz2-2) then
|
||||
lhs__(1,2) = lhs__(1,2) + comz1
|
||||
lhs__(2,2) = lhs__(2,2) - comz4
|
||||
lhs__(3,2) = lhs__(3,2) + comz6
|
||||
lhs__(4,2) = lhs__(4,2) - comz4
|
||||
lhs__(5,2) = lhs__(5,2) + comz1
|
||||
else if(m .eq. nz2-1) then
|
||||
lhs__(1,2) = lhs__(1,2) + comz1
|
||||
lhs__(2,2) = lhs__(2,2) - comz4
|
||||
lhs__(3,2) = lhs__(3,2) + comz6
|
||||
lhs__(4,2) = lhs__(4,2) - comz4
|
||||
else if(m .eq. nz2) then
|
||||
lhs__(1,2) = lhs__(1,2) + comz1
|
||||
lhs__(2,2) = lhs__(2,2) - comz4
|
||||
lhs__(3,2) = lhs__(3,2) + comz5
|
||||
endif
|
||||
|
||||
lhsp__(1,2) = lhs__(1,2)
|
||||
lhsp__(2,2) = lhs__(2,2) - dttz2 * speed(i,j,m-1)
|
||||
lhsp__(3,2) = lhs__(3,2)
|
||||
lhsp__(4,2) = lhs__(4,2) + dttz2 * speed(i,j,m+1)
|
||||
lhsp__(5,2) = lhs__(5,2)
|
||||
lhsm__(1,2) = lhs__(1,2)
|
||||
lhsm__(2,2) = lhs__(2,2) + dttz2 * speed(i,j,m-1)
|
||||
lhsm__(3,2) = lhs__(3,2)
|
||||
lhsm__(4,2) = lhs__(4,2) - dttz2 * speed(i,j,m+1)
|
||||
lhsm__(5,2) = lhs__(5,2)
|
||||
else if(k + 2 .eq. nz2+1) then
|
||||
|
||||
lhs__(1,2) = 0.0d0
|
||||
lhsp__(1,2) = 0.0d0
|
||||
lhsm__(1,2) = 0.0d0
|
||||
|
||||
lhs__(2,2) = 0.0d0
|
||||
lhsp__(2,2) = 0.0d0
|
||||
lhsm__(2,2) = 0.0d0
|
||||
|
||||
lhs__(3,2) = 1.0d0
|
||||
lhsp__(3,2) = 1.0d0
|
||||
lhsm__(3,2) = 1.0d0
|
||||
|
||||
lhs__(4,2) = 0.0d0
|
||||
lhsp__(4,2) = 0.0d0
|
||||
lhsm__(4,2) = 0.0d0
|
||||
|
||||
lhs__(5,2) = 0.0d0
|
||||
lhsp__(5,2) = 0.0d0
|
||||
lhsm__(5,2) = 0.0d0
|
||||
|
||||
endif
|
||||
!********************************** end of init
|
||||
|
||||
k1 = k + 1
|
||||
k2 = k + 2
|
||||
fac1 = 1.d0/lhs__(3,0)
|
||||
lhs__(4,0) = fac1*lhs__(4,0)
|
||||
lhs__(5,0) = fac1*lhs__(5,0)
|
||||
do m = 1, 3
|
||||
rhs(m,i,j,k) = fac1*rhs(m,i,j,k)
|
||||
end do
|
||||
|
||||
if(k .le. nz2-1) then
|
||||
lhs__(3,1)=lhs__(3,1)-lhs__(2,1)*lhs__(4,0)
|
||||
lhs__(4,1)=lhs__(4,1)-lhs__(2,1)*lhs__(5,0)
|
||||
lhs__(2,2)=lhs__(2,2)-lhs__(1,2)*lhs__(4,0)
|
||||
lhs__(3,2)=lhs__(3,2)-lhs__(1,2)*lhs__(5,0)
|
||||
do m = 1, 3
|
||||
rhs(m,i,j,k1)=rhs(m,i,j,k1)-lhs__(2,1)*rhs(m,i,j,k)
|
||||
rhs(m,i,j,k2)=rhs(m,i,j,k2)-lhs__(1,2)*rhs(m,i,j,k)
|
||||
end do
|
||||
|
||||
else
|
||||
lhs__(3,1)=lhs__(3,1)-lhs__(2,1)*lhs__(4,0)
|
||||
lhs__(4,1)=lhs__(4,1)-lhs__(2,1)*lhs__(5,0)
|
||||
fac2 = 1.d0/lhs__(3,1)
|
||||
do m = 1, 3
|
||||
rhs(m,i,j,k1) = rhs(m,i,j,k1)-lhs__(2,1)*rhs(m,i,j,k)
|
||||
rhs(m,i,j,k1) = fac2*rhs(m,i,j,k1)
|
||||
end do
|
||||
endif
|
||||
|
||||
m = 4
|
||||
fac1 = 1.d0/lhsp__(3,0)
|
||||
lhsp__(4,0) = fac1*lhsp__(4,0)
|
||||
lhsp__(5,0) = fac1*lhsp__(5,0)
|
||||
rhs(m,i,j,k) = fac1*rhs(m,i,j,k)
|
||||
lhsp__(3,1)=lhsp__(3,1)-lhsp__(2,1)*lhsp__(4,0)
|
||||
lhsp__(4,1)=lhsp__(4,1)-lhsp__(2,1)*lhsp__(5,0)
|
||||
rhs(m,i,j,k1) = rhs(m,i,j,k1)-lhsp__(2,1)*rhs(m,i,j,k)
|
||||
if(k .lt. nz2) then
|
||||
lhsp__(2,2)=lhsp__(2,2)-lhsp__(1,2)*lhsp__(4,0)
|
||||
lhsp__(3,2)=lhsp__(3,2)-lhsp__(1,2)*lhsp__(5,0)
|
||||
rhs(m,i,j,k2)=rhs(m,i,j,k2)-lhsp__(1,2)*rhs(m,i,j,k)
|
||||
endif
|
||||
m = 5
|
||||
fac1 = 1.d0/lhsm__(3,0)
|
||||
lhsm__(4,0) = fac1*lhsm__(4,0)
|
||||
lhsm__(5,0) = fac1*lhsm__(5,0)
|
||||
rhs(m,i,j,k) = fac1*rhs(m,i,j,k)
|
||||
lhsm__(3,1)=lhsm__(3,1)-lhsm__(2,1)*lhsm__(4,0)
|
||||
lhsm__(4,1)=lhsm__(4,1)-lhsm__(2,1)*lhsm__(5,0)
|
||||
rhs(m,i,j,k1)=rhs(m,i,j,k1) -lhsm__(2,1)*rhs(m,i,j,k)
|
||||
if(k .lt. nz2) then
|
||||
lhsm__(2,2)=lhsm__(2,2)-lhsm__(1,2)*lhsm__(4,0)
|
||||
lhsm__(3,2)=lhsm__(3,2)-lhsm__(1,2)*lhsm__(5,0)
|
||||
rhs(m,i,j,k2)=rhs(m,i,j,k2) -lhsm__(1,2)*rhs(m,i,j,k)
|
||||
endif
|
||||
|
||||
if(k .eq. nz2) then
|
||||
rhs(4,i,j,k1) = rhs(4,i,j,k1)/lhsp__(3,1)
|
||||
rhs(5,i,j,k1) = rhs(5,i,j,k1)/lhsm__(3,1)
|
||||
do m = 1, 3
|
||||
rhs(m,i,j,k) = rhs(m,i,j,k)-lhs__(4,0)*rhs(m,i,j,k1)
|
||||
end do
|
||||
rhs(4,i,j,k) = rhs(4,i,j,k)-lhsp__(4,0)*rhs(4,i,j,k1)
|
||||
rhs(5,i,j,k) = rhs(5,i,j,k)-lhsm__(4,0)*rhs(5,i,j,k1)
|
||||
endif
|
||||
lhs(0,4,i,j,k) = lhs__(4,0)
|
||||
lhs(1,4,i,j,k) = lhsp__(4,0)
|
||||
lhs(2,4,i,j,k) = lhsm__(4,0)
|
||||
|
||||
lhs(0,5,i,j,k) = lhs__(5,0)
|
||||
lhs(1,5,i,j,k) = lhsp__(5,0)
|
||||
lhs(2,5,i,j,k) = lhsm__(5,0)
|
||||
|
||||
lhs__(1,0) = lhs__(1,1)
|
||||
lhsp__(1,0) = lhsp__(1,1)
|
||||
lhsm__(1,0) = lhsm__(1,1)
|
||||
lhs__(1,1) = lhs__(1,2)
|
||||
lhsp__(1,1) = lhsp__(1,2)
|
||||
lhsm__(1,1) = lhsm__(1,2)
|
||||
|
||||
lhs__(2,0) = lhs__(2,1)
|
||||
lhsp__(2,0) = lhsp__(2,1)
|
||||
lhsm__(2,0) = lhsm__(2,1)
|
||||
lhs__(2,1) = lhs__(2,2)
|
||||
lhsp__(2,1) = lhsp__(2,2)
|
||||
lhsm__(2,1) = lhsm__(2,2)
|
||||
|
||||
lhs__(3,0) = lhs__(3,1)
|
||||
lhsp__(3,0) = lhsp__(3,1)
|
||||
lhsm__(3,0) = lhsm__(3,1)
|
||||
lhs__(3,1) = lhs__(3,2)
|
||||
lhsp__(3,1) = lhsp__(3,2)
|
||||
lhsm__(3,1) = lhsm__(3,2)
|
||||
|
||||
lhs__(4,0) = lhs__(4,1)
|
||||
lhsp__(4,0) = lhsp__(4,1)
|
||||
lhsm__(4,0) = lhsm__(4,1)
|
||||
lhs__(4,1) = lhs__(4,2)
|
||||
lhsp__(4,1) = lhsp__(4,2)
|
||||
lhsm__(4,1) = lhsm__(4,2)
|
||||
|
||||
lhs__(5,0) = lhs__(5,1)
|
||||
lhsp__(5,0) = lhsp__(5,1)
|
||||
lhsm__(5,0) = lhsm__(5,1)
|
||||
lhs__(5,1) = lhs__(5,2)
|
||||
lhsp__(5,1) = lhsp__(5,2)
|
||||
lhsm__(5,1) = lhsm__(5,2)
|
||||
enddo
|
||||
|
||||
|
||||
k = problem_size-3
|
||||
rhs__(1,2) = rhs(1,i,j,k+2)
|
||||
rhs__(2,2) = rhs(2,i,j,k+2)
|
||||
rhs__(3,2) = rhs(3,i,j,k+2)
|
||||
rhs__(4,2) = rhs(4,i,j,k+2)
|
||||
rhs__(5,2) = rhs(5,i,j,k+2)
|
||||
|
||||
rhs__(1,1) = rhs(1,i,j,k+1)
|
||||
rhs__(2,1) = rhs(2,i,j,k+1)
|
||||
rhs__(3,1) = rhs(3,i,j,k+1)
|
||||
rhs__(4,1) = rhs(4,i,j,k+1)
|
||||
rhs__(5,1) = rhs(5,i,j,k+1)
|
||||
|
||||
rhs__(1,0) = rhs(1,i,j,k)
|
||||
rhs__(2,0) = rhs(2,i,j,k)
|
||||
rhs__(3,0) = rhs(3,i,j,k)
|
||||
rhs__(4,0) = rhs(4,i,j,k)
|
||||
rhs__(5,0) = rhs(5,i,j,k)
|
||||
|
||||
rhs__(1,0) = rhs__(1,0) -
|
||||
> lhs(0,4,i,j,k)*rhs__(1,1) -
|
||||
> lhs(0,5,i,j,k)*rhs__(1,2)
|
||||
rhs__(2,0) = rhs__(2,0) -
|
||||
> lhs(0,4,i,j,k)*rhs__(2,1) -
|
||||
> lhs(0,5,i,j,k)*rhs__(2,2)
|
||||
rhs__(3,0) = rhs__(3,0) -
|
||||
> lhs(0,4,i,j,k)*rhs__(3,1) -
|
||||
> lhs(0,5,i,j,k)*rhs__(3,2)
|
||||
|
||||
rhs__(4,0) = rhs__(4,0) -
|
||||
> lhs(1,4,i,j,k)*rhs__(4,1) -
|
||||
> lhs(1,5,i,j,k)*rhs__(4,2)
|
||||
rhs__(5,0) = rhs__(5,0) -
|
||||
> lhs(2,4,i,j,k)*rhs__(5,1) -
|
||||
> lhs(2,5,i,j,k)*rhs__(5,2)
|
||||
|
||||
rhs__(1,2) = rhs__(1,1)
|
||||
rhs__(2,2) = rhs__(2,1)
|
||||
rhs__(3,2) = rhs__(3,1)
|
||||
rhs__(4,2) = rhs__(4,1)
|
||||
rhs__(5,2) = rhs__(5,1)
|
||||
|
||||
rhs__(1,1) = rhs__(1,0)
|
||||
rhs__(2,1) = rhs__(2,0)
|
||||
rhs__(3,1) = rhs__(3,0)
|
||||
rhs__(4,1) = rhs__(4,0)
|
||||
rhs__(5,1) = rhs__(5,0)
|
||||
|
||||
do k = problem_size-4, 0, -1
|
||||
rhs__(1,0) = rhs(1,i,j,k)
|
||||
rhs__(2,0) = rhs(2,i,j,k)
|
||||
rhs__(3,0) = rhs(3,i,j,k)
|
||||
rhs__(4,0) = rhs(4,i,j,k)
|
||||
rhs__(5,0) = rhs(5,i,j,k)
|
||||
|
||||
rhs__(1,0) = rhs__(1,0) -
|
||||
> lhs(0,4,i,j,k)*rhs__(1,1) -
|
||||
> lhs(0,5,i,j,k)*rhs__(1,2)
|
||||
rhs__(2,0) = rhs__(2,0) -
|
||||
> lhs(0,4,i,j,k)*rhs__(2,1) -
|
||||
> lhs(0,5,i,j,k)*rhs__(2,2)
|
||||
rhs__(3,0) = rhs__(3,0) -
|
||||
> lhs(0,4,i,j,k)*rhs__(3,1) -
|
||||
> lhs(0,5,i,j,k)*rhs__(3,2)
|
||||
|
||||
rhs__(4,0) = rhs__(4,0) -
|
||||
> lhs(1,4,i,j,k)*rhs__(4,1) -
|
||||
> lhs(1,5,i,j,k)*rhs__(4,2)
|
||||
rhs__(5,0) = rhs__(5,0) -
|
||||
> lhs(2,4,i,j,k)*rhs__(5,1) -
|
||||
> lhs(2,5,i,j,k)*rhs__(5,2)
|
||||
|
||||
xvel = us(i,j,k+2)
|
||||
yvel = vs(i,j,k+2)
|
||||
zvel = ws(i,j,k+2)
|
||||
ac = speed(i,j,k+2)
|
||||
ac2u = ac*ac
|
||||
uzik1 = u(1,i,j,k+2)
|
||||
btuz = bt * uzik1
|
||||
t1 = btuz/ac * (rhs__(4,2) + rhs__(5,2))
|
||||
t2 = rhs__(3,2) + t1
|
||||
t3 = btuz * (rhs__(4,2) - rhs__(5,2))
|
||||
|
||||
rhs__(3,2) = uzik1*rhs__(1,2) + yvel*t2
|
||||
rhs__(4,2) = zvel*t2 + t3
|
||||
rhs__(5,2) = uzik1*(-xvel*rhs__(2,2) +
|
||||
> yvel*rhs__(1,2)) + qs(i,j,k+2)*t2 +
|
||||
> c2iv*ac2u*t1 + zvel*t3
|
||||
rhs__(1,2) = t2
|
||||
rhs__(2,2) = -uzik1*rhs__(2,2) + xvel*t2
|
||||
|
||||
u(1,i,j,k+2) = u(1,i,j,k+2) + rhs__(1,2)
|
||||
u(2,i,j,k+2) = u(2,i,j,k+2) + rhs__(2,2)
|
||||
u(3,i,j,k+2) = u(3,i,j,k+2) + rhs__(3,2)
|
||||
u(4,i,j,k+2) = u(4,i,j,k+2) + rhs__(4,2)
|
||||
u(5,i,j,k+2) = u(5,i,j,k+2) + rhs__(5,2)
|
||||
|
||||
rhs__(1,2) = rhs__(1,1)
|
||||
rhs__(2,2) = rhs__(2,1)
|
||||
rhs__(3,2) = rhs__(3,1)
|
||||
rhs__(4,2) = rhs__(4,1)
|
||||
rhs__(5,2) = rhs__(5,1)
|
||||
|
||||
rhs__(1,1) = rhs__(1,0)
|
||||
rhs__(2,1) = rhs__(2,0)
|
||||
rhs__(3,1) = rhs__(3,0)
|
||||
rhs__(4,1) = rhs__(4,0)
|
||||
rhs__(5,1) = rhs__(5,0)
|
||||
end do
|
||||
xvel = us(i,j,k+2)
|
||||
yvel = vs(i,j,k+2)
|
||||
zvel = ws(i,j,k+2)
|
||||
ac = speed(i,j,k+2)
|
||||
ac2u = ac*ac
|
||||
uzik1 = u(1,i,j,k+2)
|
||||
btuz = bt * uzik1
|
||||
t1 = btuz/ac * (rhs__(4,2) + rhs__(5,2))
|
||||
t2 = rhs__(3,2) + t1
|
||||
t3 = btuz * (rhs__(4,2) - rhs__(5,2))
|
||||
|
||||
rhs__(3,2) = uzik1*rhs__(1,2) + yvel*t2
|
||||
rhs__(4,2) = zvel*t2 + t3
|
||||
rhs__(5,2) = uzik1*(-xvel*rhs__(2,2) +
|
||||
> yvel*rhs__(1,2)) + qs(i,j,k+2)*t2 +
|
||||
> c2iv*ac2u*t1 + zvel*t3
|
||||
rhs__(1,2) = t2
|
||||
rhs__(2,2) = -uzik1*rhs__(2,2) + xvel*t2
|
||||
|
||||
u(1,i,j,k+2) = u(1,i,j,k+2) + rhs__(1,2)
|
||||
u(2,i,j,k+2) = u(2,i,j,k+2) + rhs__(2,2)
|
||||
u(3,i,j,k+2) = u(3,i,j,k+2) + rhs__(3,2)
|
||||
u(4,i,j,k+2) = u(4,i,j,k+2) + rhs__(4,2)
|
||||
u(5,i,j,k+2) = u(5,i,j,k+2) + rhs__(5,2)
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!DVM$ end region
|
||||
if (timeron) call timer_stop(t_zsolve)
|
||||
|
||||
return
|
||||
end
|
||||
@@ -0,0 +1,338 @@
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
subroutine z_solve
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c this function performs the solution of the approximate factorization
|
||||
c step in the z-direction for all five matrix components
|
||||
c simultaneously. The Thomas algorithm is employed to solve the
|
||||
c systems for the z-lines. Boundary conditions are non-periodic
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
include 'header.h'
|
||||
|
||||
integer i, j, k, k1, k2, m
|
||||
double precision ru1, fac1, fac2
|
||||
double precision lhs__(5,0:2), lhsm__(5,0:2), lhsp__(5,0:2)
|
||||
double precision t1,t2,t3,ac,xvel,yvel,zvel,btuz,ac2u,uzik1
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
c---------------------------------------------------------------------
|
||||
c Prepare for z-solve, array redistribution
|
||||
c---------------------------------------------------------------------
|
||||
|
||||
if (timeron) call timer_start(t_zsolve)
|
||||
|
||||
!DVM$ region local(lhs)
|
||||
|
||||
!DVM$ parallel (k,j,i) on rhs(*,i,j,k)
|
||||
!DVM$& ,private(m,k1,k2,ru1,fac1,fac2,k,lhs__,lhsp__,lhsm__)
|
||||
!DVM$& ,ACROSS(OUT:rhs(0:0,0:0,0:0,0:2), lhs(0:0,0:0,0:0,0:0,0:2))
|
||||
!DVM$& ,stage(stage_n)
|
||||
do k = 0, problem_size-1
|
||||
do j = 1, ny2
|
||||
do i = 1, nx2
|
||||
if (k .eq. 0) then
|
||||
lhs__(1,0) = 0.0d0
|
||||
lhsp__(1,0) = 0.0d0
|
||||
lhsm__(1,0) = 0.0d0
|
||||
|
||||
lhs__(2,0) = 0.0d0
|
||||
lhsp__(2,0) = 0.0d0
|
||||
lhsm__(2,0) = 0.0d0
|
||||
|
||||
lhs__(3,0) = 1.0d0
|
||||
lhsp__(3,0) = 1.0d0
|
||||
lhsm__(3,0) = 1.0d0
|
||||
|
||||
lhs__(4,0) = 0.0d0
|
||||
lhsp__(4,0) = 0.0d0
|
||||
lhsm__(4,0) = 0.0d0
|
||||
|
||||
lhs__(5,0) = 0.0d0
|
||||
lhsp__(5,0) = 0.0d0
|
||||
lhsm__(5,0) = 0.0d0
|
||||
|
||||
|
||||
lhs__(1,1) = 0.0d0
|
||||
ru1 = c3c4*1.0d0/u(1,i,j,k)
|
||||
ru1 = dmax1(dz4 + con43 * ru1,
|
||||
> dz5 + c1c5 * ru1,
|
||||
> dzmax + ru1,
|
||||
> dz1)
|
||||
lhs__(2,1) = - dttz2 * ws(i,j,k) - dttz1 * ru1
|
||||
ru1 = c3c4*1.0d0/u(1,i,j,k+1)
|
||||
ru1 = dmax1(dz4 + con43 * ru1,
|
||||
> dz5 + c1c5 * ru1,
|
||||
> dzmax + ru1,
|
||||
> dz1)
|
||||
lhs__(3,1) = 1.0d0 + c2dttz1 * ru1
|
||||
ru1 = c3c4*1.0d0/u(1,i,j,k+2)
|
||||
ru1 = dmax1(dz4 + con43 * ru1,
|
||||
> dz5 + c1c5 * ru1,
|
||||
> dzmax + ru1,
|
||||
> dz1)
|
||||
lhs__(4,1) = dttz2 * ws(i,j,k+2) - dttz1 * ru1
|
||||
lhs__(5,1) = 0.0d0
|
||||
|
||||
lhs__(3,1) = lhs__(3,1) + comz5
|
||||
lhs__(4,1) = lhs__(4,1) - comz4
|
||||
lhs__(5,1) = lhs__(5,1) + comz1
|
||||
|
||||
lhsp__(1,1) = lhs__(1,1)
|
||||
lhsp__(2,1) = lhs__(2,1) - dttz2 * speed(i,j,k)
|
||||
lhsp__(3,1) = lhs__(3,1)
|
||||
lhsp__(4,1) = lhs__(4,1) + dttz2 * speed(i,j,k+2)
|
||||
lhsp__(5,1) = lhs__(5,1)
|
||||
lhsm__(1,1) = lhs__(1,1)
|
||||
lhsm__(2,1) = lhs__(2,1) + dttz2 * speed(i,j,k)
|
||||
lhsm__(3,1) = lhs__(3,1)
|
||||
lhsm__(4,1) = lhs__(4,1) - dttz2 * speed(i,j,k+2)
|
||||
lhsm__(5,1) = lhs__(5,1)
|
||||
else
|
||||
do m = 1, 5
|
||||
lhs__(m,0) = lhs(0,m,i,j,k)
|
||||
lhsp__(m,0) = lhs(1,m,i,j,k)
|
||||
lhsm__(m,0) = lhs(2,m,i,j,k)
|
||||
|
||||
lhs__(m,1) = lhs(0,m,i,j,k+1)
|
||||
lhsp__(m,1) = lhs(1,m,i,j,k+1)
|
||||
lhsm__(m,1) = lhs(2,m,i,j,k+1)
|
||||
enddo
|
||||
endif
|
||||
|
||||
if(k + 2 .lt. nz2 + 1) then
|
||||
m = k + 2
|
||||
lhs__(1,2) = 0.0d0
|
||||
|
||||
ru1 = c3c4*1.0d0/u(1,i,j,m-1)
|
||||
ru1 = dmax1(dz4 + con43 * ru1,
|
||||
> dz5 + c1c5 * ru1,
|
||||
> dzmax + ru1,
|
||||
> dz1)
|
||||
lhs__(2,2) = - dttz2 * ws(i,j,m-1) - dttz1 * ru1
|
||||
ru1 = c3c4*1.0d0/u(1,i,j,m)
|
||||
ru1 = dmax1(dz4 + con43 * ru1,
|
||||
> dz5 + c1c5 * ru1,
|
||||
> dzmax + ru1,
|
||||
> dz1)
|
||||
lhs__(3,2) = 1.0d0 + c2dttz1 * ru1
|
||||
ru1 = c3c4*1.0d0/u(1,i,j,m+1)
|
||||
ru1 = dmax1(dz4 + con43 * ru1,
|
||||
> dz5 + c1c5 * ru1,
|
||||
> dzmax + ru1,
|
||||
> dz1)
|
||||
lhs__(4,2) = dttz2 * ws(i,j,m+1) - dttz1 * ru1
|
||||
lhs__(5,2) = 0.0d0
|
||||
|
||||
if(m .eq. 1) then
|
||||
lhs__(3,2) = lhs__(3,2) + comz5
|
||||
lhs__(4,2) = lhs__(4,2) - comz4
|
||||
lhs__(5,2) = lhs__(5,2) + comz1
|
||||
else if(m .eq. 2) then
|
||||
lhs__(2,2) = lhs__(2,2) - comz4
|
||||
lhs__(3,2) = lhs__(3,2) + comz6
|
||||
lhs__(4,2) = lhs__(4,2) - comz4
|
||||
lhs__(5,2) = lhs__(5,2) + comz1
|
||||
else if(m .ge. 3 .and. m .le. nz2-2) then
|
||||
lhs__(1,2) = lhs__(1,2) + comz1
|
||||
lhs__(2,2) = lhs__(2,2) - comz4
|
||||
lhs__(3,2) = lhs__(3,2) + comz6
|
||||
lhs__(4,2) = lhs__(4,2) - comz4
|
||||
lhs__(5,2) = lhs__(5,2) + comz1
|
||||
else if(m .eq. nz2-1) then
|
||||
lhs__(1,2) = lhs__(1,2) + comz1
|
||||
lhs__(2,2) = lhs__(2,2) - comz4
|
||||
lhs__(3,2) = lhs__(3,2) + comz6
|
||||
lhs__(4,2) = lhs__(4,2) - comz4
|
||||
else if(m .eq. nz2) then
|
||||
lhs__(1,2) = lhs__(1,2) + comz1
|
||||
lhs__(2,2) = lhs__(2,2) - comz4
|
||||
lhs__(3,2) = lhs__(3,2) + comz5
|
||||
endif
|
||||
|
||||
lhsp__(1,2) = lhs__(1,2)
|
||||
lhsp__(2,2) = lhs__(2,2) - dttz2 * speed(i,j,m-1)
|
||||
lhsp__(3,2) = lhs__(3,2)
|
||||
lhsp__(4,2) = lhs__(4,2) + dttz2 * speed(i,j,m+1)
|
||||
lhsp__(5,2) = lhs__(5,2)
|
||||
lhsm__(1,2) = lhs__(1,2)
|
||||
lhsm__(2,2) = lhs__(2,2) + dttz2 * speed(i,j,m-1)
|
||||
lhsm__(3,2) = lhs__(3,2)
|
||||
lhsm__(4,2) = lhs__(4,2) - dttz2 * speed(i,j,m+1)
|
||||
lhsm__(5,2) = lhs__(5,2)
|
||||
else if(k + 2 .eq. nz2+1) then
|
||||
|
||||
lhs__(1,2) = 0.0d0
|
||||
lhsp__(1,2) = 0.0d0
|
||||
lhsm__(1,2) = 0.0d0
|
||||
|
||||
lhs__(2,2) = 0.0d0
|
||||
lhsp__(2,2) = 0.0d0
|
||||
lhsm__(2,2) = 0.0d0
|
||||
|
||||
lhs__(3,2) = 1.0d0
|
||||
lhsp__(3,2) = 1.0d0
|
||||
lhsm__(3,2) = 1.0d0
|
||||
|
||||
lhs__(4,2) = 0.0d0
|
||||
lhsp__(4,2) = 0.0d0
|
||||
lhsm__(4,2) = 0.0d0
|
||||
|
||||
lhs__(5,2) = 0.0d0
|
||||
lhsp__(5,2) = 0.0d0
|
||||
lhsm__(5,2) = 0.0d0
|
||||
|
||||
endif
|
||||
!********************************** end of init
|
||||
|
||||
k1 = k + 1
|
||||
k2 = k + 2
|
||||
fac1 = 1.d0/lhs__(3,0)
|
||||
lhs__(4,0) = fac1*lhs__(4,0)
|
||||
lhs__(5,0) = fac1*lhs__(5,0)
|
||||
do m = 1, 3
|
||||
rhs(m,i,j,k) = fac1*rhs(m,i,j,k)
|
||||
end do
|
||||
|
||||
if(k .le. nz2-1) then
|
||||
lhs__(3,1)=lhs__(3,1)-lhs__(2,1)*lhs__(4,0)
|
||||
lhs__(4,1)=lhs__(4,1)-lhs__(2,1)*lhs__(5,0)
|
||||
lhs__(2,2)=lhs__(2,2)-lhs__(1,2)*lhs__(4,0)
|
||||
lhs__(3,2)=lhs__(3,2)-lhs__(1,2)*lhs__(5,0)
|
||||
do m = 1, 3
|
||||
rhs(m,i,j,k1)=rhs(m,i,j,k1)-lhs__(2,1)*rhs(m,i,j,k)
|
||||
rhs(m,i,j,k2)=rhs(m,i,j,k2)-lhs__(1,2)*rhs(m,i,j,k)
|
||||
end do
|
||||
|
||||
else
|
||||
lhs__(3,1)=lhs__(3,1)-lhs__(2,1)*lhs__(4,0)
|
||||
lhs__(4,1)=lhs__(4,1)-lhs__(2,1)*lhs__(5,0)
|
||||
if (lhs__(3,1) .ne. 0) then
|
||||
fac2 = 1.d0/lhs__(3,1)
|
||||
else
|
||||
fac2 = 0
|
||||
endif
|
||||
do m = 1, 3
|
||||
rhs(m,i,j,k1) = rhs(m,i,j,k1)-lhs__(2,1)*rhs(m,i,j,k)
|
||||
rhs(m,i,j,k1) = fac2*rhs(m,i,j,k1)
|
||||
end do
|
||||
endif
|
||||
|
||||
m = 4
|
||||
fac1 = 1.d0/lhsp__(3,0)
|
||||
lhsp__(4,0) = fac1*lhsp__(4,0)
|
||||
lhsp__(5,0) = fac1*lhsp__(5,0)
|
||||
rhs(m,i,j,k) = fac1*rhs(m,i,j,k)
|
||||
lhsp__(3,1)=lhsp__(3,1)-lhsp__(2,1)*lhsp__(4,0)
|
||||
lhsp__(4,1)=lhsp__(4,1)-lhsp__(2,1)*lhsp__(5,0)
|
||||
rhs(m,i,j,k1) = rhs(m,i,j,k1)-lhsp__(2,1)*rhs(m,i,j,k)
|
||||
if(k .lt. nz2) then
|
||||
lhsp__(2,2)=lhsp__(2,2)-lhsp__(1,2)*lhsp__(4,0)
|
||||
lhsp__(3,2)=lhsp__(3,2)-lhsp__(1,2)*lhsp__(5,0)
|
||||
rhs(m,i,j,k2)=rhs(m,i,j,k2)-lhsp__(1,2)*rhs(m,i,j,k)
|
||||
endif
|
||||
m = 5
|
||||
fac1 = 1.d0/lhsm__(3,0)
|
||||
lhsm__(4,0) = fac1*lhsm__(4,0)
|
||||
lhsm__(5,0) = fac1*lhsm__(5,0)
|
||||
rhs(m,i,j,k) = fac1*rhs(m,i,j,k)
|
||||
lhsm__(3,1)=lhsm__(3,1)-lhsm__(2,1)*lhsm__(4,0)
|
||||
lhsm__(4,1)=lhsm__(4,1)-lhsm__(2,1)*lhsm__(5,0)
|
||||
rhs(m,i,j,k1)=rhs(m,i,j,k1) -lhsm__(2,1)*rhs(m,i,j,k)
|
||||
if(k .lt. nz2) then
|
||||
lhsm__(2,2)=lhsm__(2,2)-lhsm__(1,2)*lhsm__(4,0)
|
||||
lhsm__(3,2)=lhsm__(3,2)-lhsm__(1,2)*lhsm__(5,0)
|
||||
rhs(m,i,j,k2)=rhs(m,i,j,k2) -lhsm__(1,2)*rhs(m,i,j,k)
|
||||
endif
|
||||
|
||||
if(k .eq. nz2) then
|
||||
rhs(4,i,j,k1) = rhs(4,i,j,k1)/lhsp__(3,1)
|
||||
rhs(5,i,j,k1) = rhs(5,i,j,k1)/lhsm__(3,1)
|
||||
do m = 1, 3
|
||||
rhs(m,i,j,k) = rhs(m,i,j,k)-lhs__(4,0)*rhs(m,i,j,k1)
|
||||
end do
|
||||
rhs(4,i,j,k) = rhs(4,i,j,k)-lhsp__(4,0)*rhs(4,i,j,k1)
|
||||
rhs(5,i,j,k) = rhs(5,i,j,k)-lhsm__(4,0)*rhs(5,i,j,k1)
|
||||
endif
|
||||
|
||||
do m = 1,5
|
||||
lhs(0,m,i,j,k) = lhs__(m,0)
|
||||
lhs(1,m,i,j,k) = lhsp__(m,0)
|
||||
lhs(2,m,i,j,k) = lhsm__(m,0)
|
||||
|
||||
lhs(0,m,i,j,k+1) = lhs__(m,1)
|
||||
lhs(1,m,i,j,k+1) = lhsp__(m,1)
|
||||
lhs(2,m,i,j,k+1) = lhsm__(m,1)
|
||||
if (k .lt. nz2) then
|
||||
lhs(0,m,i,j,k+2) = lhs__(m,2)
|
||||
lhs(1,m,i,j,k+2) = lhsp__(m,2)
|
||||
lhs(2,m,i,j,k+2) = lhsm__(m,2)
|
||||
endif
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!DVM$ parallel (k,j,i) on rhs(*,i,j,k),ACROSS(rhs(0:0,0:0,0:0,0:2))
|
||||
!DVM$& ,stage(stage_n)
|
||||
do k = problem_size-3, 0, -1
|
||||
do j = 1, ny2
|
||||
do i = 1, nx2
|
||||
rhs(1,i,j,k) = rhs(1,i,j,k) -
|
||||
& lhs(0,4,i,j,k)*rhs(1,i,j,k+1) -
|
||||
& lhs(0,5,i,j,k)*rhs(1,i,j,k+2)
|
||||
rhs(2,i,j,k) = rhs(2,i,j,k) -
|
||||
& lhs(0,4,i,j,k)*rhs(2,i,j,k+1) -
|
||||
& lhs(0,5,i,j,k)*rhs(2,i,j,k+2)
|
||||
rhs(3,i,j,k) = rhs(3,i,j,k) -
|
||||
& lhs(0,4,i,j,k)*rhs(3,i,j,k+1) -
|
||||
& lhs(0,5,i,j,k)*rhs(3,i,j,k+2)
|
||||
|
||||
rhs(4,i,j,k) = rhs(4,i,j,k) -
|
||||
& lhs(1,4,i,j,k)*rhs(4,i,j,k+1) -
|
||||
& lhs(1,5,i,j,k)*rhs(4,i,j,k+2)
|
||||
rhs(5,i,j,k) = rhs(5,i,j,k) -
|
||||
& lhs(2,4,i,j,k)*rhs(5,i,j,k+1) -
|
||||
& lhs(2,5,i,j,k)*rhs(5,i,j,k+2)
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
|
||||
!DVM$ parallel (k,j,i) on u(*,i,j,k)
|
||||
!DVM$& ,private(t1,t2,t3,ac,xvel,yvel,zvel,btuz,ac2u,uzik1)
|
||||
do k = 1, nz2
|
||||
do j = 1, ny2
|
||||
do i = 1, nx2
|
||||
xvel = us(i,j,k)
|
||||
yvel = vs(i,j,k)
|
||||
zvel = ws(i,j,k)
|
||||
ac = speed(i,j,k)
|
||||
ac2u = ac*ac
|
||||
uzik1 = u(1,i,j,k)
|
||||
btuz = bt * uzik1
|
||||
t1 = btuz/ac * (rhs(4,i,j,k) + rhs(5,i,j,k))
|
||||
t2 = rhs(3,i,j,k) + t1
|
||||
t3 = btuz * (rhs(4,i,j,k) - rhs(5,i,j,k))
|
||||
|
||||
u(1,i,j,k) = u(1,i,j,k) + t2
|
||||
u(2,i,j,k) = u(2,i,j,k)-uzik1*rhs(2,i,j,k)+xvel*t2
|
||||
u(3,i,j,k) = u(3,i,j,k)+uzik1*rhs(1,i,j,k)+yvel*t2
|
||||
u(4,i,j,k) = u(4,i,j,k)+ zvel*t2 + t3
|
||||
u(5,i,j,k) = u(5,i,j,k)+ uzik1*(-xvel*rhs(2,i,j,k) +
|
||||
& yvel*rhs(1,i,j,k)) + qs(i,j,k)*t2 +
|
||||
& c2iv*ac2u*t1 + zvel*t3
|
||||
enddo
|
||||
enddo
|
||||
enddo
|
||||
!DVM$ end region
|
||||
if (timeron) call timer_stop(t_zsolve)
|
||||
|
||||
return
|
||||
end
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user