added tests

This commit is contained in:
ALEXks
2024-05-02 17:08:55 +03:00
parent d0d629eeb8
commit 94570a414b
431 changed files with 248194 additions and 1 deletions

View File

@@ -0,0 +1,66 @@
SHELL=/bin/sh
BENCHMARK=bt
BENCHMARKU=BT
include ../config/make.def
include ../sys/make.common
SOURCES = bt.fdv \
set_constants.fdv \
initialize.fdv \
exact_solution.fdv \
verify.fdv \
compute_errors.fdv \
timers.fdv \
print_result.fdv
SOURCES_MPI = z_solve_mpi.fdv y_solve_mpi.fdv x_solve_mpi.fdv compute_rhs_mpi.fdv exact_rhs.fdv
SOURCES_SINGLE = z_solve.fdv y_solve.fdv x_solve.fdv compute_rhs.fdv exact_rhs.fdv
SOURCES_BLOCK = z_solve_block.fdv y_solve_block.fdv x_solve_block.fdv compute_rhs_block.fdv exact_rhs_block.fdv
SOURCES_BLOCK1 = z_solve_block.fdv y_solve_block.fdv x_solve_block.fdv compute_rhs.fdv exact_rhs.fdv
SOURCES_BLOCK2 = z_solve_block.fdv y_solve_block.fdv x_solve_block.fdv compute_rhs_block2.fdv exact_rhs.fdv
OBJS = ${SOURCES:.fdv=.o}
OBJS_SINGLE = ${SOURCES_SINGLE:.fdv=.o}
OBJS_MPI = ${SOURCES_MPI:.fdv=.o}
OBJS_BLOCK = ${SOURCES_BLOCK:.fdv=.o}
OBJS_BLOCK1 = ${SOURCES_BLOCK1:.fdv=.o}
OBJS_BLOCK2 = ${SOURCES_BLOCK2:.fdv=.o}
${PROGRAM}: config
@if [ "$(VERSION)" = "MPI" ] ; then \
${MAKE} MPI_VER; \
else \
if [ "$(VERSION)" = "BLOCK" ] ; then \
${MAKE} BLOCK_VER; \
else \
if [ "$(VERSION)" = "BLOCK1" ] ; then \
${MAKE} BLOCK_VER1; \
else \
${MAKE} SINGLE_VER;\
fi \
fi \
fi
MPI_VER: $(OBJS) $(OBJS_MPI)
${FLINK} -o ${PROGRAM} ${OBJS} $(OBJS_MPI)
SINGLE_VER: $(OBJS) $(OBJS_SINGLE)
${FLINK} -o ${PROGRAM} ${OBJS} $(OBJS_SINGLE)
BLOCK_VER: $(OBJS) $(OBJS_BLOCK)
${FLINK} -o ${PROGRAM} ${OBJS} $(OBJS_BLOCK)
BLOCK_VER1: $(OBJS) $(OBJS_BLOCK1)
${FLINK} -o ${PROGRAM} ${OBJS} $(OBJS_BLOCK1)
BLOCK_VER2: $(OBJS) $(OBJS_BLOCK2)
${FLINK} -o ${PROGRAM} ${OBJS} $(OBJS_BLOCK2)
%.o: %.fdv npbparams.h header3d.h
${F77} ${FFLAGS} -c -o $@ $<
clean:
rm -f npbparams.h
rm -f *.o *~
rm -f *.cu *.cuf *.c *.f

View File

@@ -0,0 +1,12 @@
@echo off
set CLASS=%1
set OPT=%2
CALL ..\sys\setparams BT %CLASS%
CALL %F77% %OPT% bt 1>out_%CLASS%.txt 2>err_%CLASS%.txt
if exist bt.exe (
copy bt.exe %BIN%\bt.%CLASS%.x.exe
del bt.exe
)

View File

@@ -0,0 +1,120 @@
!---------------------------------------------------------------------
program btdv3
include 'header3d.h'
integer i,niter,step,fstatus,n3
double precision navg,mflops
external timer_read,verify
double precision tmax,timer_read
logical verified
character class
!---------------------------------------------------------------------
! Root node reads input file (if it exists) else takes
! defaults from parameters
!---------------------------------------------------------------------
write (unit = *,fmt = 1000)
open (unit = 2,file = 'inputbt.data',status = 'old',iostat = fstat
&us)
if (fstatus .eq. 0) then
write (unit = *,fmt = 233)
233 format(' Reading from input file inputbt.data')
read (unit = 2,fmt = *) niter
read (unit = 2,fmt = *) dt
read (unit = 2,fmt = *) grid_points(1),grid_points(2),grid_poin
&ts(3)
close (unit = 2)
else
write (unit = *,fmt = 234)
niter = niter_default
dt = dt_default
grid_points(1) = problem_size
grid_points(2) = problem_size
grid_points(3) = problem_size
endif
234 format(' No input file inputbt.data. Using compiled defaults')
write (unit = *,fmt = 1001) grid_points(1),grid_points(2),grid_poi
&nts(3)
write (unit = *,fmt = 1002) niter,dt
1000 format(//, ' NAS Parallel Benchmarks 3.3.1 - DVMH version',' - BT
&Benchmark ',/)
1001 format(' Size: ', i3, 'x', i3, 'x', i3)
1002 format(' Iterations: ', i3, ' dt: ', F10.6)
if (grid_points(1) .gt. imax .or. grid_points(2) .gt. jmax .or. gr
&id_points(3) .gt. kmax) then
print *, (grid_points(i), i = 1,3)
print *, ' Problem size too big for compiled array sizes'
goto 999
endif
open (unit = 2,file = 'inputStage',status = 'old',iostat = fstat
&us)
if (fstatus .eq. 0) then
read (unit = 2,fmt = *) stage_n
close (unit = 2)
else
stage_n = 0
endif
write(*,*) 'stage = ', stage_n
call set_constants()
call initialize()
call exact_rhs()
! ************* DO 2 iterations for touch all code
call adi_first
call adi_first
call initialize
call timer_clear(1)
call timer_start(1)
do step = 1,niter
if (mod (step,20) .eq. 0 .or. step .eq. 1) then
write (unit = *,fmt = 200) step
200 format(' Time step ', i8)
endif
call adi()
enddo
call timer_stop(1)
tmax = timer_read (1)
call verify(niter,class,verified)
n3 = grid_points(1) * grid_points(2) * grid_points(3)
navg = (grid_points(1) + grid_points(2) + grid_points(3)) / 3.0
if (tmax .ne. 0.) then
mflops = 1.0e-6 * float (niter) * (3478.8 * float (n3) - 17655.
&7 * navg** 2 + 28023.7 * navg) / tmax
else
mflops = 0.0
endif
call print_results('BT',class,grid_points(1),grid_points(2),grid_p
&oints(3),niter,tmax,mflops,' floating point',verified,npb
&version)
! ,compiletime, cs1, cs2, cs3, cs4, cs5,cs6, '(none)')
999 continue
end
subroutine adi_first()
call compute_rhs()
call x_solve()
call y_solve()
call z_solve()
return
end
subroutine adi ()
!DVM$ interval 1
call compute_rhs()
!DVM$ end interval
!DVM$ interval 11
call x_solve()
!DVM$ end interval
!DVM$ interval 12
call y_solve()
!DVM$ end interval
!DVM$ interval 13
call z_solve()
!DVM$ end interval
return
end

View File

@@ -0,0 +1,117 @@
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! this function computes the norm of the difference between the
! computed solution and the exact solution
!---------------------------------------------------------------------
subroutine error_norm (rms)
include 'header3d.h'
integer i,j,k,m,d
double precision xi,eta,zeta,u_exact(5),rms(5),add
double precision r1,r2,r3,r4,r5
do m = 1,5
rms(m) = 0.0d0
enddo
r1 = 0.0d0
r2 = 0.0d0
r3 = 0.0d0
r4 = 0.0d0
r5 = 0.0d0
!DVM$ region
!DVM$ PARALLEL (k,j,i) ON u(*,i,j,k),
!DVM$& REDUCTION(SUM(r1),SUM(r2),SUM(r3),SUM(r4),SUM(r5)),
!DVM$&private(u_exact,xi,eta,zeta,m,add)
do k = 0,problem_size - 1
do j = 0,problem_size - 1
do i = 0,problem_size - 1
zeta = dble (k) * dnzm1
eta = dble (j) * dnym1
xi = dble (i) * dnxm1
! call exact_solution(xi, eta, zeta, u_exact)
do m = 1,5
u_exact(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) +
& xi * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m,6
&) + eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta *
&(ce(m,7) + zeta * (ce(m,10) + zeta * ce(m,13))))
enddo
add = u(1,i,j,k) - u_exact(1)
r1 = r1 + add * add
add = u(2,i,j,k) - u_exact(2)
r2 = r2 + add * add
add = u(3,i,j,k) - u_exact(3)
r3 = r3 + add * add
add = u(4,i,j,k) - u_exact(4)
r4 = r4 + add * add
add = u(5,i,j,k) - u_exact(5)
r5 = r5 + add * add
enddo
enddo
enddo
!DVM$ end region
rms(1) = r1
rms(2) = r2
rms(3) = r3
rms(4) = r4
rms(5) = r5
do m = 1,5
do d = 1,3
rms(m) = rms(m) / dble (grid_points(d) - 2)
enddo
rms(m) = dsqrt (rms(m))
enddo
return
end
!---------------------------------------------------------------------
!---------------------------------------------------------------------
subroutine rhs_norm (rms)
include 'header3d.h'
integer i,j,k,d,m
double precision rms(5),add,r1,r2,r3,r4,r5
r1 = 0.0d0
r2 = 0.0d0
r3 = 0.0d0
r4 = 0.0d0
r5 = 0.0d0
!DVM$ region
!DVM$ PARALLEL (k,j,i) ON rhs(*,i,j,k),
!DVM$&REDUCTION(SUM(r1),SUM(r2),SUM(r3),SUM(r4),SUM(r5)),
!DVM$&private(add)
do k = 1,problem_size - 2
do j = 1,problem_size - 2
do i = 1,problem_size - 2
add = rhs(1,i,j,k)
r1 = r1 + add * add
add = rhs(2,i,j,k)
r2 = r2 + add * add
add = rhs(3,i,j,k)
r3 = r3 + add * add
add = rhs(4,i,j,k)
r4 = r4 + add * add
add = rhs(5,i,j,k)
r5 = r5 + add * add
enddo
enddo
enddo
!DVM$ end region
rms(1) = r1
rms(2) = r2
rms(3) = r3
rms(4) = r4
rms(5) = r5
do m = 1,5
do d = 1,3
rms(m) = rms(m) / dble (grid_points(d) - 2)
enddo
rms(m) = dsqrt (rms(m))
enddo
return
end

View File

@@ -0,0 +1,218 @@
!---------------------------------------------------------------------
!---------------------------------------------------------------------
subroutine compute_rhs ()
include 'header3d.h'
integer i,j,k,m
double precision rho_inv,uijk,up1,um1,vijk,vp1,vm1,wijk,wp1,wm1,r
&hs_(5)
!DVM$ region out(rho_i, us, vs, ws, qs, square)
!DVM$ PARALLEL (k,j,i) ON us(i,j,k), SHADOW_COMPUTE,
!DVM$& PRIVATE(rho_inv,m),cuda_block(128)
do k = 0,problem_size - 1
do j = 0,problem_size - 1
do i = 0,problem_size - 1
rho_inv = 1.0d0 / u(1,i,j,k)
rho_i(i,j,k) = rho_inv
us(i,j,k) = u(2,i,j,k) * rho_inv
vs(i,j,k) = u(3,i,j,k) * rho_inv
ws(i,j,k) = u(4,i,j,k) * rho_inv
square(i,j,k) = 0.5d0 * (u(2,i,j,k) * u(2,i,j,k) + u(3,i,
&j,k) * u(3,i,j,k) + u(4,i,j,k) * u(4,i,j,k)) * rho_inv
qs(i,j,k) = square(i,j,k) * rho_inv
do m = 1,5
rhs(m,i,j,k) = forcing(m,i,j,k)
enddo
enddo
enddo
enddo
!---------------------------------------------------------------------
! compute xi-direction fluxes
!---------------------------------------------------------------------
!DVM$ PARALLEL (k,j,i) ON rhs(*,i,j,k), PRIVATE(uijk,up1,um1,m,
!DVM$&vijk,vp1,vm1,wijk,wp1,wm1,rhs_),cuda_block(32)
do k = 1,problem_size - 2
do j = 1,problem_size - 2
do i = 1,problem_size - 2
uijk = us(i,j,k)
up1 = us(i + 1,j,k)
um1 = us(i - 1,j,k)
rhs_(1) = forcing(1,i,j,k)
rhs_(2) = forcing(2,i,j,k)
rhs_(3) = forcing(3,i,j,k)
rhs_(4) = forcing(4,i,j,k)
rhs_(5) = forcing(5,i,j,k)
rhs_(1) = rhs_(1) + dx1tx1 * (u(1,i + 1,j,k) - 2.0d0 * u(
&1,i,j,k) + u(1,i - 1,j,k)) - tx2 * (u(2,i + 1,j,k) - u(2,i - 1,j,k
&))
rhs_(2) = rhs_(2) + dx2tx1 * (u(2,i + 1,j,k) - 2.0d0 * u(
&2,i,j,k) + u(2,i - 1,j,k)) + xxcon2 * con43 * (up1 - 2.0d0 * uijk
&+ um1) - tx2 * (u(2,i + 1,j,k) * up1 - u(2,i - 1,j,k) * um1 + (u(5
&,i + 1,j,k) - square(i + 1,j,k) - u(5,i - 1,j,k) + square(i - 1,j,
&k)) * c2)
rhs_(3) = rhs_(3) + dx3tx1 * (u(3,i + 1,j,k) - 2.0d0 * u(
&3,i,j,k) + u(3,i - 1,j,k)) + xxcon2 * (vs(i + 1,j,k) - 2.0d0 * vs(
&i,j,k) + vs(i - 1,j,k)) - tx2 * (u(3,i + 1,j,k) * up1 - u(3,i - 1,
&j,k) * um1)
rhs_(4) = rhs_(4) + dx4tx1 * (u(4,i + 1,j,k) - 2.0d0 * u(
&4,i,j,k) + u(4,i - 1,j,k)) + xxcon2 * (ws(i + 1,j,k) - 2.0d0 * ws(
&i,j,k) + ws(i - 1,j,k)) - tx2 * (u(4,i + 1,j,k) * up1 - u(4,i - 1,
&j,k) * um1)
rhs_(5) = rhs_(5) + dx5tx1 * (u(5,i + 1,j,k) - 2.0d0 * u(
&5,i,j,k) + u(5,i - 1,j,k)) + xxcon3 * (qs(i + 1,j,k) - 2.0d0 * qs(
&i,j,k) + qs(i - 1,j,k)) + xxcon4 * (up1 * up1 - 2.0d0 * uijk * uij
&k + um1 * um1) + xxcon5 * (u(5,i + 1,j,k) * rho_i(i + 1,j,k) - 2.0
&d0 * u(5,i,j,k) * rho_i(i,j,k) + u(5,i - 1,j,k) * rho_i(i - 1,j,k)
&) - tx2 * ((c1 * u(5,i + 1,j,k) - c2 * square(i + 1,j,k)) * up1 -
&(c1 * u(5,i - 1,j,k) - c2 * square(i - 1,j,k)) * um1)
if (i .eq. 1) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (5.0d0 * u(m,i,j,k) - 4.
&0d0 * u(m,i + 1,j,k) + u(m,i + 2,j,k))
enddo
else if (i .eq. 2) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * ((-(4.0d0)) * u(m,i - 1,
&j,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i + 1,j,k) + u(m,i + 2,j,k
&))
enddo
else if (i .ge. 3 .and. i .le. problem_size - 4) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (u(m,i - 2,j,k) - 4.0d0
&* u(m,i - 1,j,k) + 6.0 * u(m,i,j,k) - 4.0d0 * u(m,i + 1,j,k) + u(m
&,i + 2,j,k))
enddo
else if (i .eq. problem_size - 3) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (u(m,i - 2,j,k) - 4.0d0
&* u(m,i - 1,j,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i + 1,j,k))
enddo
else if (i .eq. problem_size - 2) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (u(m,i - 2,j,k) - 4.d0 *
& u(m,i - 1,j,k) + 5.d0 * u(m,i,j,k))
enddo
endif
vijk = vs(i,j,k)
vp1 = vs(i,j + 1,k)
vm1 = vs(i,j - 1,k)
rhs_(1) = rhs_(1) + dy1ty1 * (u(1,i,j + 1,k) - 2.0d0 * u(
&1,i,j,k) + u(1,i,j - 1,k)) - ty2 * (u(3,i,j + 1,k) - u(3,i,j - 1,k
&))
rhs_(2) = rhs_(2) + dy2ty1 * (u(2,i,j + 1,k) - 2.0d0 * u(
&2,i,j,k) + u(2,i,j - 1,k)) + yycon2 * (us(i,j + 1,k) - 2.0d0 * us(
&i,j,k) + us(i,j - 1,k)) - ty2 * (u(2,i,j + 1,k) * vp1 - u(2,i,j -
&1,k) * vm1)
rhs_(3) = rhs_(3) + dy3ty1 * (u(3,i,j + 1,k) - 2.0d0 * u(
&3,i,j,k) + u(3,i,j - 1,k)) + yycon2 * con43 * (vp1 - 2.0d0 * vijk
&+ vm1) - ty2 * (u(3,i,j + 1,k) * vp1 - u(3,i,j - 1,k) * vm1 + (u(5
&,i,j + 1,k) - square(i,j + 1,k) - u(5,i,j - 1,k) + square(i,j - 1,
&k)) * c2)
rhs_(4) = rhs_(4) + dy4ty1 * (u(4,i,j + 1,k) - 2.0d0 * u(
&4,i,j,k) + u(4,i,j - 1,k)) + yycon2 * (ws(i,j + 1,k) - 2.0d0 * ws(
&i,j,k) + ws(i,j - 1,k)) - ty2 * (u(4,i,j + 1,k) * vp1 - u(4,i,j -
&1,k) * vm1)
rhs_(5) = rhs_(5) + dy5ty1 * (u(5,i,j + 1,k) - 2.0d0 * u(
&5,i,j,k) + u(5,i,j - 1,k)) + yycon3 * (qs(i,j + 1,k) - 2.0d0 * qs(
&i,j,k) + qs(i,j - 1,k)) + yycon4 * (vp1 * vp1 - 2.0d0 * vijk * vij
&k + vm1 * vm1) + yycon5 * (u(5,i,j + 1,k) * rho_i(i,j + 1,k) - 2.0
&d0 * u(5,i,j,k) * rho_i(i,j,k) + u(5,i,j - 1,k) * rho_i(i,j - 1,k)
&) - ty2 * ((c1 * u(5,i,j + 1,k) - c2 * square(i,j + 1,k)) * vp1 -
&(c1 * u(5,i,j - 1,k) - c2 * square(i,j - 1,k)) * vm1)
if (j .eq. 1) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (5.0d0 * u(m,i,j,k) - 4.
&0d0 * u(m,i,j + 1,k) + u(m,i,j + 2,k))
enddo
else if (j .eq. 2) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * ((-(4.0d0)) * u(m,i,j -
&1,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j + 1,k) + u(m,i,j + 2,k
&))
enddo
else if (j .ge. 3 .and. j .le. problem_size - 4) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (u(m,i,j - 2,k) - 4.0d0
&* u(m,i,j - 1,k) + 6.0 * u(m,i,j,k) - 4.0d0 * u(m,i,j + 1,k) + u(m
&,i,j + 2,k))
enddo
else if (j .eq. problem_size - 3) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (u(m,i,j - 2,k) - 4.0d0
&* u(m,i,j - 1,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j + 1,k))
enddo
else if (j .eq. problem_size - 2) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (u(m,i,j - 2,k) - 4.d0 *
& u(m,i,j - 1,k) + 5.d0 * u(m,i,j,k))
enddo
endif
wijk = ws(i,j,k)
wp1 = ws(i,j,k + 1)
wm1 = ws(i,j,k - 1)
rhs_(1) = rhs_(1) + dz1tz1 * (u(1,i,j,k + 1) - 2.0d0 * u(
&1,i,j,k) + u(1,i,j,k - 1)) - tz2 * (u(4,i,j,k + 1) - u(4,i,j,k - 1
&))
rhs_(2) = rhs_(2) + dz2tz1 * (u(2,i,j,k + 1) - 2.0d0 * u(
&2,i,j,k) + u(2,i,j,k - 1)) + zzcon2 * (us(i,j,k + 1) - 2.0d0 * us(
&i,j,k) + us(i,j,k - 1)) - tz2 * (u(2,i,j,k + 1) * wp1 - u(2,i,j,k
&- 1) * wm1)
rhs_(3) = rhs_(3) + dz3tz1 * (u(3,i,j,k + 1) - 2.0d0 * u(
&3,i,j,k) + u(3,i,j,k - 1)) + zzcon2 * (vs(i,j,k + 1) - 2.0d0 * vs(
&i,j,k) + vs(i,j,k - 1)) - tz2 * (u(3,i,j,k + 1) * wp1 - u(3,i,j,k
&- 1) * wm1)
rhs_(4) = rhs_(4) + dz4tz1 * (u(4,i,j,k + 1) - 2.0d0 * u(
&4,i,j,k) + u(4,i,j,k - 1)) + zzcon2 * con43 * (wp1 - 2.0d0 * wijk
&+ wm1) - tz2 * (u(4,i,j,k + 1) * wp1 - u(4,i,j,k - 1) * wm1 + (u(5
&,i,j,k + 1) - square(i,j,k + 1) - u(5,i,j,k - 1) + square(i,j,k -
&1)) * c2)
rhs_(5) = rhs_(5) + dz5tz1 * (u(5,i,j,k + 1) - 2.0d0 * u(
&5,i,j,k) + u(5,i,j,k - 1)) + zzcon3 * (qs(i,j,k + 1) - 2.0d0 * qs(
&i,j,k) + qs(i,j,k - 1)) + zzcon4 * (wp1 * wp1 - 2.0d0 * wijk * wij
&k + wm1 * wm1) + zzcon5 * (u(5,i,j,k + 1) * rho_i(i,j,k + 1) - 2.0
&d0 * u(5,i,j,k) * rho_i(i,j,k) + u(5,i,j,k - 1) * rho_i(i,j,k - 1)
&) - tz2 * ((c1 * u(5,i,j,k + 1) - c2 * square(i,j,k + 1)) * wp1 -
&(c1 * u(5,i,j,k - 1) - c2 * square(i,j,k - 1)) * wm1)
if (k .eq. 1) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (5.0d0 * u(m,i,j,k) - 4.
&0d0 * u(m,i,j,k + 1) + u(m,i,j,k + 2))
enddo
else if (k .eq. 2) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * ((-(4.0d0)) * u(m,i,j,k
&- 1) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j,k + 1) + u(m,i,j,k + 2
&))
enddo
else if (k .ge. 3 .and. k .le. problem_size - 4) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (u(m,i,j,k - 2) - 4.0d0
&* u(m,i,j,k - 1) + 6.0 * u(m,i,j,k) - 4.0d0 * u(m,i,j,k + 1) + u(m
&,i,j,k + 2))
enddo
else if (k .eq. problem_size - 3) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (u(m,i,j,k - 2) - 4.0d0
&* u(m,i,j,k - 1) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j,k + 1))
enddo
else if (k .eq. problem_size - 2) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (u(m,i,j,k - 2) - 4.d0 *
& u(m,i,j,k - 1) + 5.d0 * u(m,i,j,k))
enddo
endif
rhs(1,i,j,k) = rhs_(1) * dt
rhs(2,i,j,k) = rhs_(2) * dt
rhs(3,i,j,k) = rhs_(3) * dt
rhs(4,i,j,k) = rhs_(4) * dt
rhs(5,i,j,k) = rhs_(5) * dt
enddo
enddo
enddo
!DVM$ end region
return
end

View File

@@ -0,0 +1,484 @@
!---------------------------------------------------------------------
!---------------------------------------------------------------------
subroutine compute_rhs ()
include 'header3d.h'
integer i,j,k,m,z
double precision rho_inv,uijk,up1,um1,vijk,vp1,vm1,wijk,wp1,wm1,r
&hs_(5),s1,s2,s3,s4,s5,s6,s7,qs1,qs2,qs3,qs4,qs5,qs6,qs7
double precision ue_(-2:2, 5), buf_(-2:2, 5),cuf_(-2:2),q_(-2:2)
double precision dtemp(5), xi, eta, zeta, dtpp
!DVM$ region
!DVM$ PARALLEL (k,j,i) ON us(i,j,k),PRIVATE(m),cuda_block(128)
do k = 0,problem_size - 1
do j = 0,problem_size - 1
do i = 0,problem_size - 1
do m = 1,5
rhs(m,i,j,k) = 0
enddo
enddo
enddo
enddo
!---------------------------------------------------------------------
! compute xi-direction fluxes
!---------------------------------------------------------------------
!DVM$ PARALLEL (k,j,i) ON rhs(*,i,j,k), PRIVATE(uijk,up1,um1,m,
!DVM$&vijk,vp1,vm1,wijk,wp1,wm1,rhs_,s1,s2,s3,s4,s5,s6,s7,
!DVM$&qs1,qs2,qs3,qs4,qs5,qs6,qs7,
!DVM$&zeta,eta,xi,dtemp,buf_,cuf_,q_,dtpp,z,ue_),cuda_block(32)
do k = 1,problem_size - 2
do j = 1,problem_size - 2
do i = 1,problem_size - 2
rhs_(1) = 0
rhs_(2) = 0
rhs_(3) = 0
rhs_(4) = 0
rhs_(5) = 0
zeta = dble(k) * dnzm1
eta = dble(j) * dnym1
do z = -2, 2
xi = dble(i + z) * dnxm1
do m = 1, 5
dtemp(m) = ce(m,1) +
> xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) + xi*ce(m,11)))) +
> eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9) + eta*ce(m,12))))+
> zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) +
> zeta*ce(m,13))))
end do
do m = 1, 5
ue_(z,m) = dtemp(m)
end do
dtpp = 1.0d0 / dtemp(1)
do m = 2, 5
buf_(z, m) = dtpp * dtemp(m)
end do
cuf_(z) = buf_(z,2) * buf_(z,2)
buf_(z,1) = cuf_(z) + buf_(z,3) * buf_(z,3) +
> buf_(z,4) * buf_(z,4)
q_(z) = 0.5d0*(buf_(z,2)*ue_(z,2) + buf_(z,3)*
> ue_(z,3) + buf_(z,4)*ue_(z,4))
enddo
rhs_(1) = rhs_(1) -
> tx2*( ue_(1,2)-ue_(-1,2) )+
> dx1tx1*(ue_(1,1)-2.0d0*ue_(0,1)+ue_(-1,1))
rhs_(2) = rhs_(2) - tx2 * (
> (ue_(1,2)*buf_(1,2)+c2*(ue_(1,5)-q_(1)))-
> (ue_(-1,2)*buf_(-1,2)+c2*(ue_(-1,5)-q_(-1))))+
> xxcon1*(buf_(1,2)-2.0d0*buf_(0,2)+buf_(-1,2))+
> dx2tx1*( ue_(1,2)-2.0d0* ue_(0,2)+ue_(-1,2))
rhs_(3) = rhs_(3) - tx2 * (
> ue_(1,3)*buf_(1,2)-ue_(-1,3)*buf_(-1,2))+
> xxcon2*(buf_(1,3)-2.0d0*buf_(0,3)+buf_(-1,3))+
> dx3tx1*( ue_(1,3)-2.0d0*ue_(0,3) +ue_(-1,3))
rhs_(4) = rhs_(4) - tx2*(
> ue_(1,4)*buf_(1,2)-ue_(-1,4)*buf_(-1,2))+
> xxcon2*(buf_(1,4)-2.0d0*buf_(0,4)+buf_(-1,4))+
> dx4tx1*( ue_(1,4)-2.0d0* ue_(0,4)+ ue_(-1,4))
rhs_(5) = rhs_(5) - tx2*(
> buf_(1,2)*(c1*ue_(1,5)-c2*q_(1))-
> buf_(-1,2)*(c1*ue_(-1,5)-c2*q_(-1)))+
> 0.5d0*xxcon3*(buf_(1,1)-2.0d0*buf_(0,1)+
> buf_(-1,1))+
> xxcon4*(cuf_(1)-2.0d0*cuf_(0)+cuf_(-1))+
> xxcon5*(buf_(1,5)-2.0d0*buf_(0,5)+buf_(-1,5))+
> dx5tx1*( ue_(1,5)-2.0d0* ue_(0,5)+ ue_(-1,5))
do m = 1, 5
if(i .eq. 1) then
rhs_(m) = rhs_(m) - dssp *
> (5.0d0*ue_(0,m) - 4.0d0*ue_(1,m) +ue_(2,m))
else if(i .eq. 2) then
rhs_(m) = rhs_(m) - dssp *
> (-4.0d0*ue_(-1,m) + 6.0d0*ue_(0,m) -
> 4.0d0*ue_(1,m) + ue_(2,m))
else if(i .eq. problem_size-3) then
rhs_(m) = rhs_(m) - dssp *
> (ue_(-2,m) - 4.0d0*ue_(-1,m) +
> 6.0d0*ue_(0,m) - 4.0d0*ue_(1,m))
else if(i .eq. problem_size-2) then
rhs_(m) = rhs_(m) - dssp *
> (ue_(-2,m) - 4.0d0*ue_(-1,m) + 5.0d0*ue_(0,m))
else
rhs_(m) = rhs_(m) - dssp*
> (ue_(-2,m) - 4.0d0*ue_(-1,m) +
> 6.0d0*ue_(0,m) - 4.0d0*ue_(1,m) + ue_(2,m))
endif
end do
zeta = dble(k) * dnzm1
xi = dble(i) * dnxm1
do z = -2, 2
eta = dble(j + z) * dnym1
do m = 1, 5
dtemp(m) = ce(m,1) +
> xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) + xi*ce(m,11)))) +
> eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9) + eta*ce(m,12))))+
> zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) +
> zeta*ce(m,13))))
end do
do m = 1, 5
ue_(z,m) = dtemp(m)
end do
dtpp = 1.0d0 / dtemp(1)
do m = 2, 5
buf_(z, m) = dtpp * dtemp(m)
end do
cuf_(z) = buf_(z,3) * buf_(z,3)
buf_(z,1) = cuf_(z) + buf_(z,2) * buf_(z,2) +
> buf_(z,4) * buf_(z,4)
q_(z) = 0.5d0*(buf_(z,2)*ue_(z,2) + buf_(z,3)
> *ue_(z,3) + buf_(z,4) * ue_(z,4))
enddo
rhs_(1) = rhs_(1) -
> ty2*( ue_(1,3)-ue_(-1,3) )+
> dy1ty1*(ue_(1,1)-2.0d0*ue_(0,1)+ue_(-1,1))
rhs_(2) = rhs_(2) - ty2*(
> ue_(1,2)*buf_(1,3)-ue_(-1,2)*buf_(-1,3))+
> yycon2*(buf_(1,2)-2.0d0*buf_(0,2)+buf_(-1,2))+
> dy2ty1*( ue_(1,2)-2.0* ue_(0,2)+ ue_(-1,2))
rhs_(3) = rhs_(3) - ty2*(
> (ue_(1,3)*buf_(1,3)+c2*(ue_(1,5)-q_(1)))-
> (ue_(-1,3)*buf_(-1,3)+c2*(ue_(-1,5)-q_(-1))))+
> yycon1*(buf_(1,3)-2.0d0*buf_(0,3)+buf_(-1,3))+
> dy3ty1*( ue_(1,3)-2.0d0*ue_(0,3) +ue_(-1,3))
rhs_(4) = rhs_(4) - ty2*(
> ue_(1,4)*buf_(1,3)-ue_(-1,4)*buf_(-1,3))+
> yycon2*(buf_(1,4)-2.0d0*buf_(0,4)+buf_(-1,4))+
> dy4ty1*( ue_(1,4)-2.0d0*ue_(0,4)+ ue_(-1,4))
rhs_(5) = rhs_(5) - ty2*(
> buf_(1,3)*(c1*ue_(1,5)-c2*q_(1))-
> buf_(-1,3)*(c1*ue_(-1,5)-c2*q_(-1)))+
> 0.5d0*yycon3*(buf_(1,1)-2.0d0*buf_(0,1)+
> buf_(-1,1))+
> yycon4*(cuf_(1)-2.0d0*cuf_(0)+cuf_(-1))+
> yycon5*(buf_(1,5)-2.0d0*buf_(0,5)+buf_(-1,5))+
> dy5ty1*(ue_(1,5)-2.0d0*ue_(0,5)+ue_(-1,5))
do m = 1, 5
if(j .eq. 1) then
rhs_(m) = rhs_(m) - dssp *
> (5.0d0*ue_(0,m) - 4.0d0*ue_(1,m) +ue_(2,m))
else if(j .eq. 2) then
rhs_(m) = rhs_(m) - dssp *
> (-4.0d0*ue_(-1,m) + 6.0d0*ue_(0,m) -
> 4.0d0*ue_(1,m) + ue_(2,m))
else if(j .eq. problem_size-3) then
rhs_(m) = rhs_(m) - dssp *
> (ue_(-2,m) - 4.0d0*ue_(-1,m) +
> 6.0d0*ue_(0,m) - 4.0d0*ue_(1,m))
else if(j .eq. problem_size-2) then
rhs_(m) = rhs_(m) - dssp *
> (ue_(-2,m) - 4.0d0*ue_(-1,m) + 5.0d0*ue_(0,m))
else
rhs_(m) = rhs_(m) - dssp*
> (ue_(-2,m) - 4.0d0*ue_(-1,m) +
> 6.0d0*ue_(0,m) - 4.0d0*ue_(1,m) + ue_(2,m))
endif
end do
xi = dble(i) * dnxm1
eta = dble(j) * dnym1
do z = -2, 2
zeta = dble(k + z) * dnzm1
do m = 1, 5
dtemp(m) = ce(m,1) +
> xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) + xi*ce(m,11)))) +
> eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9) + eta*ce(m,12))))+
> zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) +
> zeta*ce(m,13))))
end do
do m = 1, 5
ue_(z,m) = dtemp(m)
end do
dtpp = 1.0d0 / dtemp(1)
do m = 2, 5
buf_(z, m) = dtpp * dtemp(m)
end do
cuf_(z) = buf_(z,4) * buf_(z,4)
buf_(z,1) = cuf_(z) + buf_(z,2) * buf_(z,2) +
> buf_(z,3) * buf_(z,3)
q_(z) = 0.5d0*(buf_(z,2)*ue_(z,2) + buf_(z,3)*
> ue_(z,3) + buf_(z,4)*ue_(z,4))
enddo
rhs_(1) = rhs_(1) -
> tz2*( ue_(1,4)-ue_(-1,4) )+
> dz1tz1*(ue_(1,1)-2.0d0*ue_(0,1)+ue_(-1,1))
rhs_(2) = rhs_(2) - tz2 * (
> ue_(1,2)*buf_(1,4)-ue_(-1,2)*buf_(-1,4))+
> zzcon2*(buf_(1,2)-2.0d0*buf_(0,2)+buf_(-1,2))+
> dz2tz1*( ue_(1,2)-2.0d0* ue_(0,2)+ ue_(-1,2))
rhs_(3) = rhs_(3) - tz2 * (
> ue_(1,3)*buf_(1,4)-ue_(-1,3)*buf_(-1,4))+
> zzcon2*(buf_(1,3)-2.0d0*buf_(0,3)+buf_(-1,3))+
> dz3tz1*(ue_(1,3)-2.0d0*ue_(0,3)+ue_(-1,3))
rhs_(4) = rhs_(4) - tz2 * (
> (ue_(1,4)*buf_(1,4)+c2*(ue_(1,5)-q_(1)))-
> (ue_(-1,4)*buf_(-1,4)+c2*(ue_(-1,5)-q_(-1))))+
> zzcon1*(buf_(1,4)-2.0d0*buf_(0,4)+buf_(-1,4))+
> dz4tz1*( ue_(1,4)-2.0d0*ue_(0,4) +ue_(-1,4))
rhs_(5) = rhs_(5) - tz2 * (
> buf_(1,4)*(c1*ue_(1,5)-c2*q_(1))-
> buf_(-1,4)*(c1*ue_(-1,5)-c2*q_(-1)))+
> 0.5d0*zzcon3*(buf_(1,1)-2.0d0*buf_(0,1)
> +buf_(-1,1))+
> zzcon4*(cuf_(1)-2.0d0*cuf_(0)+cuf_(-1))+
> zzcon5*(buf_(1,5)-2.0d0*buf_(0,5)+buf_(-1,5))+
> dz5tz1*( ue_(1,5)-2.0d0*ue_(0,5)+ ue_(-1,5))
do m = 1, 5
if(k .eq. 1) then
rhs_(m) = rhs_(m) - dssp *
> (5.0d0*ue_(0,m) - 4.0d0*ue_(1,m) +ue_(2,m))
else if(k .eq. 2) then
rhs_(m) = rhs_(m) - dssp *
> (-4.0d0*ue_(-1,m) + 6.0d0*ue_(0,m) -
> 4.0d0*ue_(1,m) + ue_(2,m))
else if(k .eq. problem_size-3) then
rhs_(m) = rhs_(m) - dssp *
> (ue_(-2,m) - 4.0d0*ue_(-1,m) +
> 6.0d0*ue_(0,m) - 4.0d0*ue_(1,m))
else if(k .eq. problem_size-2) then
rhs_(m) = rhs_(m) - dssp *
> (ue_(-2,m) - 4.0d0*ue_(-1,m) + 5.0d0*ue_(0,m))
else
rhs_(m) = rhs_(m) - dssp*
> (ue_(-2,m) - 4.0d0*ue_(-1,m) +
> 6.0d0*ue_(0,m) - 4.0d0*ue_(1,m) + ue_(2,m))
endif
end do
do m = 1, 5
rhs_(m) = -1.d0 * rhs_(m)
end do
uijk = u(2,i,j,k) / u(1,i,j,k)
up1 = u(2,i + 1,j,k) / u(1,i + 1,j,k)
um1 = u(2,i - 1,j,k) / u(1,i - 1,j,k)
vijk = u(3,i,j,k) / u(1,i,j,k)
vp1 = u(3,i,j + 1,k) / u(1,i,j + 1,k)
vm1 = u(3,i,j - 1,k) / u(1,i,j - 1,k)
wijk = u(4,i,j,k) / u(1,i,j,k)
wp1 = u(4,i,j,k + 1) / u(1,i,j,k + 1)
wm1 = u(4,i,j,k - 1) / u(1,i,j,k - 1)
s1 = 0.5d0 * (u(2,i,j,k) * u(2,i,j,k) + u(3,i,
&j,k) * u(3,i,j,k) + u(4,i,j,k) * u(4,i,j,k)) / u(1,i,j,k)
s2 = 0.5d0 * (u(2,i+1,j,k) * u(2,i+1,j,k) + u(3,i
&+1,j,k) * u(3,i+1,j,k) + u(4,i+1,j,k) * u(4,i+1,j,k)) /
&u(1,i+1,j,k)
s3 = 0.5d0 * (u(2,i-1,j,k) * u(2,i-1,j,k) + u(3,i
&-1,j,k) * u(3,i-1,j,k) + u(4,i-1,j,k) * u(4,i-1,j,k)) /
&u(1,i-1,j,k)
s4 = 0.5d0 * (u(2,i,j+1,k) * u(2,i,j+1,k) + u(3,i,
&j+1,k) * u(3,i,j+1,k) + u(4,i,j+1,k) * u(4,i,j+1,k)) /
&u(1,i,j+1,k)
s5 = 0.5d0 * (u(2,i,j-1,k) * u(2,i,j-1,k) + u(3,i,
&j-1,k) * u(3,i,j-1,k) + u(4,i,j-1,k) * u(4,i,j-1,k)) /
&u(1,i,j-1,k)
s6 = 0.5d0 * (u(2,i,j,k+1) * u(2,i,j,k+1) + u(3,i,
&j,k+1) * u(3,i,j,k+1) + u(4,i,j,k+1) * u(4,i,j,k+1)) /
&u(1,i,j,k+1)
s7 = 0.5d0 * (u(2,i,j,k-1) * u(2,i,j,k-1) + u(3,i,
&j,k-1) * u(3,i,j,k-1) + u(4,i,j,k-1) * u(4,i,j,k-1)) /
&u(1,i,j,k-1)
qs1 = s1 / u(1,i,j,k)
qs2 = s2 / u(1,i+1,j,k)
qs3 = s3 / u(1,i-1,j,k)
qs4 = s4 / u(1,i,j+1,k)
qs5 = s5 / u(1,i,j-1,k)
qs6 = s6 / u(1,i,j,k+1)
qs7 = s7 / u(1,i,j,k-1)
! rhs_(1) = forcing(1,i,j,k)
! rhs_(2) = forcing(2,i,j,k)
! rhs_(3) = forcing(3,i,j,k)
! rhs_(4) = forcing(4,i,j,k)
! rhs_(5) = forcing(5,i,j,k)
rhs_(1) = rhs_(1) + dx1tx1 * (u(1,i + 1,j,k) - 2.0d0 * u(
&1,i,j,k) + u(1,i - 1,j,k)) - tx2 * (u(2,i + 1,j,k) - u(2,i - 1,j,k
&))
rhs_(2) = rhs_(2) + dx2tx1 * (u(2,i + 1,j,k) - 2.0d0 * u(
&2,i,j,k) + u(2,i - 1,j,k)) + xxcon2 * con43 * (up1 - 2.0d0 * uijk
&+ um1) - tx2 * (u(2,i + 1,j,k) * up1 - u(2,i - 1,j,k) * um1 + (u(5
&,i + 1,j,k) - s2 - u(5,i - 1,j,k) + s3) * c2)
rhs_(3) = rhs_(3) + dx3tx1 * (u(3,i + 1,j,k) - 2.0d0 * u(
&3,i,j,k) + u(3,i - 1,j,k)) + xxcon2 * (u(3,i + 1,j,k) /
&u(1,i + 1,j,k) - 2.0d0 * vijk + u(3,i - 1,j,k)/u(1,i - 1,j,k))
&- tx2 * (u(3,i + 1,j,k) * up1 - u(3,i - 1,j,k) * um1)
rhs_(4) = rhs_(4) + dx4tx1 * (u(4,i + 1,j,k) - 2.0d0 * u(
&4,i,j,k) + u(4,i - 1,j,k)) + xxcon2 * (u(4,i + 1,j,k) /
&u(1,i + 1,j,k) - 2.0d0 * wijk + u(4,i - 1,j,k) / u(1,i - 1,j,k))
&- tx2 * (u(4,i + 1,j,k) * up1 - u(4,i - 1,j,k) * um1)
rhs_(5) = rhs_(5) + dx5tx1 * (u(5,i + 1,j,k) - 2.0d0 * u(
&5,i,j,k) + u(5,i - 1,j,k)) + xxcon3 * (qs2 - 2.0d0 * qs1 +
&qs3) + xxcon4 * (up1 * up1 - 2.0d0 * uijk * uij
&k + um1 * um1) + xxcon5 * (u(5,i + 1,j,k) * 1.0d0 / u(1,i + 1,j,k)
& - 2.0d0 * u(5,i,j,k) * 1.0d0 / u(1,i,j,k) + u(5,i - 1,j,k) *
&1.0d0 / u(1,i - 1,j,k)) - tx2 * ((c1 * u(5,i + 1,j,k) -
&c2 * s2) * up1 -
&(c1 * u(5,i - 1,j,k) - c2 * s3) * um1)
if (i .eq. 1) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (5.0d0 * u(m,i,j,k) - 4.
&0d0 * u(m,i + 1,j,k) + u(m,i + 2,j,k))
enddo
else if (i .eq. 2) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * ((-(4.0d0)) * u(m,i - 1,
&j,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i + 1,j,k) + u(m,i + 2,j,k
&))
enddo
else if (i .ge. 3 .and. i .le. problem_size - 4) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (u(m,i - 2,j,k) - 4.0d0
&* u(m,i - 1,j,k) + 6.0 * u(m,i,j,k) - 4.0d0 * u(m,i + 1,j,k) + u(m
&,i + 2,j,k))
enddo
else if (i .eq. problem_size - 3) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (u(m,i - 2,j,k) - 4.0d0
&* u(m,i - 1,j,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i + 1,j,k))
enddo
else if (i .eq. problem_size - 2) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (u(m,i - 2,j,k) - 4.d0 *
& u(m,i - 1,j,k) + 5.d0 * u(m,i,j,k))
enddo
endif
rhs_(1) = rhs_(1) + dy1ty1 * (u(1,i,j + 1,k) - 2.0d0 * u(
&1,i,j,k) + u(1,i,j - 1,k)) - ty2 * (u(3,i,j + 1,k) - u(3,i,j - 1,k
&))
rhs_(2) = rhs_(2) + dy2ty1 * (u(2,i,j + 1,k) - 2.0d0 * u(
&2,i,j,k) + u(2,i,j - 1,k)) + yycon2 * (u(2,i,j + 1,k) /
&u(1,i,j + 1,k) - 2.0d0 * uijk + u(2,i,j - 1,k)/u(1,i,j - 1,k))
&- ty2 * (u(2,i,j + 1,k) * vp1 - u(2,i,j -1,k) * vm1)
rhs_(3) = rhs_(3) + dy3ty1 * (u(3,i,j + 1,k) - 2.0d0 * u(
&3,i,j,k) + u(3,i,j - 1,k)) + yycon2 * con43 * (vp1 - 2.0d0 * vijk
&+ vm1) - ty2 * (u(3,i,j + 1,k) * vp1 - u(3,i,j - 1,k) * vm1 + (u(5
&,i,j + 1,k) - s4 - u(5,i,j - 1,k) + s5) * c2)
rhs_(4) = rhs_(4) + dy4ty1 * (u(4,i,j + 1,k) - 2.0d0 * u(
&4,i,j,k) + u(4,i,j - 1,k)) + yycon2 * (u(4,i,j + 1,k) /
&u(1,i,j + 1,k) - 2.0d0 * wijk + u(4,i,j - 1,k) / u(1,i,j - 1,k))
&- ty2 * (u(4,i,j + 1,k) * vp1 - u(4,i,j - 1,k) * vm1)
rhs_(5) = rhs_(5) + dy5ty1 * (u(5,i,j + 1,k) - 2.0d0 * u(
&5,i,j,k) + u(5,i,j - 1,k)) + yycon3 * (qs4 - 2.0d0 * qs1
& + qs5) + yycon4 * (vp1 * vp1 - 2.0d0 * vijk * vij
&k + vm1 * vm1) + yycon5 * (u(5,i,j + 1,k) * 1.0d0 / u(1,i,j + 1,k)
&- 2.0d0 * u(5,i,j,k) * 1.0d0 / u(1,i,j,k) + u(5,i,j - 1,k) *
&1.0d0 / u(1,i,j - 1,k)
&) - ty2 * ((c1 * u(5,i,j + 1,k) - c2 * s4) * vp1 -
&(c1 * u(5,i,j - 1,k) - c2 * s5) * vm1)
if (j .eq. 1) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (5.0d0 * u(m,i,j,k) - 4.
&0d0 * u(m,i,j + 1,k) + u(m,i,j + 2,k))
enddo
else if (j .eq. 2) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * ((-(4.0d0)) * u(m,i,j -
&1,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j + 1,k) + u(m,i,j + 2,k
&))
enddo
else if (j .ge. 3 .and. j .le. problem_size - 4) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (u(m,i,j - 2,k) - 4.0d0
&* u(m,i,j - 1,k) + 6.0 * u(m,i,j,k) - 4.0d0 * u(m,i,j + 1,k) + u(m
&,i,j + 2,k))
enddo
else if (j .eq. problem_size - 3) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (u(m,i,j - 2,k) - 4.0d0
&* u(m,i,j - 1,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j + 1,k))
enddo
else if (j .eq. problem_size - 2) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (u(m,i,j - 2,k) - 4.d0 *
& u(m,i,j - 1,k) + 5.d0 * u(m,i,j,k))
enddo
endif
rhs_(1) = rhs_(1) + dz1tz1 * (u(1,i,j,k + 1) - 2.0d0 * u(
&1,i,j,k) + u(1,i,j,k - 1)) - tz2 * (u(4,i,j,k + 1) - u(4,i,j,k - 1
&))
rhs_(2) = rhs_(2) + dz2tz1 * (u(2,i,j,k + 1) - 2.0d0 * u(
&2,i,j,k) + u(2,i,j,k - 1)) + zzcon2 * (u(2,i,j,k + 1) /
&u(1,i,j,k + 1) - 2.0d0 * uijk + u(2,i,j,k - 1) / u(1,i,j,k - 1))
&- tz2 * (u(2,i,j,k + 1) * wp1 - u(2,i,j,k - 1) * wm1)
rhs_(3) = rhs_(3) + dz3tz1 * (u(3,i,j,k + 1) - 2.0d0 * u(
&3,i,j,k) + u(3,i,j,k - 1)) + zzcon2 * (u(3,i,j,k + 1) /
&u(1,i,j,k + 1) - 2.0d0 * vijk + u(3,i,j,k - 1) / u(1,i,j,k - 1))
&- tz2 * (u(3,i,j,k + 1) * wp1 - u(3,i,j,k - 1) * wm1)
rhs_(4) = rhs_(4) + dz4tz1 * (u(4,i,j,k + 1) - 2.0d0 * u(
&4,i,j,k) + u(4,i,j,k - 1)) + zzcon2 * con43 * (wp1 - 2.0d0 * wijk
&+ wm1) - tz2 * (u(4,i,j,k + 1) * wp1 - u(4,i,j,k - 1) * wm1 + (u(5
&,i,j,k + 1) - s6 - u(5,i,j,k - 1) + s7) * c2)
rhs_(5) = rhs_(5) + dz5tz1 * (u(5,i,j,k + 1) - 2.0d0 * u(
&5,i,j,k) + u(5,i,j,k - 1)) + zzcon3 * (qs6 - 2.0d0 * qs1 +
&qs7) + zzcon4 * (wp1 * wp1 - 2.0d0 * wijk * wij
&k + wm1 * wm1) + zzcon5 * (u(5,i,j,k + 1) * 1.0d0 / u(1,i,j,k+1)
&- 2.0d0 * u(5,i,j,k) * 1.0d0 / u(1,i,j,k) + u(5,i,j,k - 1) *
&1.0d0 / u(1,i,j,k-1)
&) - tz2 * ((c1 * u(5,i,j,k + 1) - c2 * s6) * wp1 -
&(c1 * u(5,i,j,k - 1) - c2 * s7) * wm1)
if (k .eq. 1) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (5.0d0 * u(m,i,j,k) - 4.
&0d0 * u(m,i,j,k + 1) + u(m,i,j,k + 2))
enddo
else if (k .eq. 2) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * ((-(4.0d0)) * u(m,i,j,k
&- 1) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j,k + 1) + u(m,i,j,k + 2
&))
enddo
else if (k .ge. 3 .and. k .le. problem_size - 4) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (u(m,i,j,k - 2) - 4.0d0
&* u(m,i,j,k - 1) + 6.0 * u(m,i,j,k) - 4.0d0 * u(m,i,j,k + 1) + u(m
&,i,j,k + 2))
enddo
else if (k .eq. problem_size - 3) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (u(m,i,j,k - 2) - 4.0d0
&* u(m,i,j,k - 1) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j,k + 1))
enddo
else if (k .eq. problem_size - 2) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (u(m,i,j,k - 2) - 4.d0 *
& u(m,i,j,k - 1) + 5.d0 * u(m,i,j,k))
enddo
endif
rhs(1,i,j,k) = rhs_(1) * dt
rhs(2,i,j,k) = rhs_(2) * dt
rhs(3,i,j,k) = rhs_(3) * dt
rhs(4,i,j,k) = rhs_(4) * dt
rhs(5,i,j,k) = rhs_(5) * dt
enddo
enddo
enddo
!DVM$ end region
return
end

View File

@@ -0,0 +1,247 @@
!---------------------------------------------------------------------
!---------------------------------------------------------------------
subroutine compute_rhs ()
include 'header3d.h'
integer i,j,k,m,z
double precision rho_inv,uijk,up1,um1,vijk,vp1,vm1,wijk,wp1,wm1,r
&hs_(5),s1,s2,s3,s4,s5,s6,s7,qs1,qs2,qs3,qs4,qs5,qs6,qs7
!DVM$ region
!DVM$ PARALLEL (k,j,i) ON us(i,j,k),PRIVATE(m),cuda_block(128)
do k = 0,problem_size - 1
do j = 0,problem_size - 1
do i = 0,problem_size - 1
do m = 1,5
rhs(m,i,j,k) = 0
enddo
enddo
enddo
enddo
!---------------------------------------------------------------------
! compute xi-direction fluxes
!---------------------------------------------------------------------
!DVM$ PARALLEL (k,j,i) ON rhs(*,i,j,k), PRIVATE(uijk,up1,um1,m,
!DVM$&vijk,vp1,vm1,wijk,wp1,wm1,rhs_,s1,s2,s3,s4,s5,s6,s7,
!DVM$&qs1,qs2,qs3,qs4,qs5,qs6,qs7),cuda_block(32)
do k = 1,problem_size - 2
do j = 1,problem_size - 2
do i = 1,problem_size - 2
do m = 1, 5
rhs_(m) = forcing(m,i,j,k)
end do
uijk = u(2,i,j,k) / u(1,i,j,k)
up1 = u(2,i + 1,j,k) / u(1,i + 1,j,k)
um1 = u(2,i - 1,j,k) / u(1,i - 1,j,k)
vijk = u(3,i,j,k) / u(1,i,j,k)
vp1 = u(3,i,j + 1,k) / u(1,i,j + 1,k)
vm1 = u(3,i,j - 1,k) / u(1,i,j - 1,k)
wijk = u(4,i,j,k) / u(1,i,j,k)
wp1 = u(4,i,j,k + 1) / u(1,i,j,k + 1)
wm1 = u(4,i,j,k - 1) / u(1,i,j,k - 1)
s1 = 0.5d0 * (u(2,i,j,k) * u(2,i,j,k) + u(3,i,
&j,k) * u(3,i,j,k) + u(4,i,j,k) * u(4,i,j,k)) / u(1,i,j,k)
s2 = 0.5d0 * (u(2,i+1,j,k) * u(2,i+1,j,k) + u(3,i
&+1,j,k) * u(3,i+1,j,k) + u(4,i+1,j,k) * u(4,i+1,j,k)) /
&u(1,i+1,j,k)
s3 = 0.5d0 * (u(2,i-1,j,k) * u(2,i-1,j,k) + u(3,i
&-1,j,k) * u(3,i-1,j,k) + u(4,i-1,j,k) * u(4,i-1,j,k)) /
&u(1,i-1,j,k)
s4 = 0.5d0 * (u(2,i,j+1,k) * u(2,i,j+1,k) + u(3,i,
&j+1,k) * u(3,i,j+1,k) + u(4,i,j+1,k) * u(4,i,j+1,k)) /
&u(1,i,j+1,k)
s5 = 0.5d0 * (u(2,i,j-1,k) * u(2,i,j-1,k) + u(3,i,
&j-1,k) * u(3,i,j-1,k) + u(4,i,j-1,k) * u(4,i,j-1,k)) /
&u(1,i,j-1,k)
s6 = 0.5d0 * (u(2,i,j,k+1) * u(2,i,j,k+1) + u(3,i,
&j,k+1) * u(3,i,j,k+1) + u(4,i,j,k+1) * u(4,i,j,k+1)) /
&u(1,i,j,k+1)
s7 = 0.5d0 * (u(2,i,j,k-1) * u(2,i,j,k-1) + u(3,i,
&j,k-1) * u(3,i,j,k-1) + u(4,i,j,k-1) * u(4,i,j,k-1)) /
&u(1,i,j,k-1)
qs1 = s1 / u(1,i,j,k)
qs2 = s2 / u(1,i+1,j,k)
qs3 = s3 / u(1,i-1,j,k)
qs4 = s4 / u(1,i,j+1,k)
qs5 = s5 / u(1,i,j-1,k)
qs6 = s6 / u(1,i,j,k+1)
qs7 = s7 / u(1,i,j,k-1)
! rhs_(1) = forcing(1,i,j,k)
! rhs_(2) = forcing(2,i,j,k)
! rhs_(3) = forcing(3,i,j,k)
! rhs_(4) = forcing(4,i,j,k)
! rhs_(5) = forcing(5,i,j,k)
rhs_(1) = rhs_(1) + dx1tx1 * (u(1,i + 1,j,k) - 2.0d0 * u(
&1,i,j,k) + u(1,i - 1,j,k)) - tx2 * (u(2,i + 1,j,k) - u(2,i - 1,j,k
&))
rhs_(2) = rhs_(2) + dx2tx1 * (u(2,i + 1,j,k) - 2.0d0 * u(
&2,i,j,k) + u(2,i - 1,j,k)) + xxcon2 * con43 * (up1 - 2.0d0 * uijk
&+ um1) - tx2 * (u(2,i + 1,j,k) * up1 - u(2,i - 1,j,k) * um1 + (u(5
&,i + 1,j,k) - s2 - u(5,i - 1,j,k) + s3) * c2)
rhs_(3) = rhs_(3) + dx3tx1 * (u(3,i + 1,j,k) - 2.0d0 * u(
&3,i,j,k) + u(3,i - 1,j,k)) + xxcon2 * (u(3,i + 1,j,k) /
&u(1,i + 1,j,k) - 2.0d0 * vijk + u(3,i - 1,j,k)/u(1,i - 1,j,k))
&- tx2 * (u(3,i + 1,j,k) * up1 - u(3,i - 1,j,k) * um1)
rhs_(4) = rhs_(4) + dx4tx1 * (u(4,i + 1,j,k) - 2.0d0 * u(
&4,i,j,k) + u(4,i - 1,j,k)) + xxcon2 * (u(4,i + 1,j,k) /
&u(1,i + 1,j,k) - 2.0d0 * wijk + u(4,i - 1,j,k) / u(1,i - 1,j,k))
&- tx2 * (u(4,i + 1,j,k) * up1 - u(4,i - 1,j,k) * um1)
rhs_(5) = rhs_(5) + dx5tx1 * (u(5,i + 1,j,k) - 2.0d0 * u(
&5,i,j,k) + u(5,i - 1,j,k)) + xxcon3 * (qs2 - 2.0d0 * qs1 +
&qs3) + xxcon4 * (up1 * up1 - 2.0d0 * uijk * uij
&k + um1 * um1) + xxcon5 * (u(5,i + 1,j,k) * 1.0d0 / u(1,i + 1,j,k)
& - 2.0d0 * u(5,i,j,k) * 1.0d0 / u(1,i,j,k) + u(5,i - 1,j,k) *
&1.0d0 / u(1,i - 1,j,k)) - tx2 * ((c1 * u(5,i + 1,j,k) -
&c2 * s2) * up1 -
&(c1 * u(5,i - 1,j,k) - c2 * s3) * um1)
if (i .eq. 1) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (5.0d0 * u(m,i,j,k) - 4.
&0d0 * u(m,i + 1,j,k) + u(m,i + 2,j,k))
enddo
else if (i .eq. 2) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * ((-(4.0d0)) * u(m,i - 1,
&j,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i + 1,j,k) + u(m,i + 2,j,k
&))
enddo
else if (i .ge. 3 .and. i .le. problem_size - 4) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (u(m,i - 2,j,k) - 4.0d0
&* u(m,i - 1,j,k) + 6.0 * u(m,i,j,k) - 4.0d0 * u(m,i + 1,j,k) + u(m
&,i + 2,j,k))
enddo
else if (i .eq. problem_size - 3) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (u(m,i - 2,j,k) - 4.0d0
&* u(m,i - 1,j,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i + 1,j,k))
enddo
else if (i .eq. problem_size - 2) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (u(m,i - 2,j,k) - 4.d0 *
& u(m,i - 1,j,k) + 5.d0 * u(m,i,j,k))
enddo
endif
rhs_(1) = rhs_(1) + dy1ty1 * (u(1,i,j + 1,k) - 2.0d0 * u(
&1,i,j,k) + u(1,i,j - 1,k)) - ty2 * (u(3,i,j + 1,k) - u(3,i,j - 1,k
&))
rhs_(2) = rhs_(2) + dy2ty1 * (u(2,i,j + 1,k) - 2.0d0 * u(
&2,i,j,k) + u(2,i,j - 1,k)) + yycon2 * (u(2,i,j + 1,k) /
&u(1,i,j + 1,k) - 2.0d0 * uijk + u(2,i,j - 1,k)/u(1,i,j - 1,k))
&- ty2 * (u(2,i,j + 1,k) * vp1 - u(2,i,j -1,k) * vm1)
rhs_(3) = rhs_(3) + dy3ty1 * (u(3,i,j + 1,k) - 2.0d0 * u(
&3,i,j,k) + u(3,i,j - 1,k)) + yycon2 * con43 * (vp1 - 2.0d0 * vijk
&+ vm1) - ty2 * (u(3,i,j + 1,k) * vp1 - u(3,i,j - 1,k) * vm1 + (u(5
&,i,j + 1,k) - s4 - u(5,i,j - 1,k) + s5) * c2)
rhs_(4) = rhs_(4) + dy4ty1 * (u(4,i,j + 1,k) - 2.0d0 * u(
&4,i,j,k) + u(4,i,j - 1,k)) + yycon2 * (u(4,i,j + 1,k) /
&u(1,i,j + 1,k) - 2.0d0 * wijk + u(4,i,j - 1,k) / u(1,i,j - 1,k))
&- ty2 * (u(4,i,j + 1,k) * vp1 - u(4,i,j - 1,k) * vm1)
rhs_(5) = rhs_(5) + dy5ty1 * (u(5,i,j + 1,k) - 2.0d0 * u(
&5,i,j,k) + u(5,i,j - 1,k)) + yycon3 * (qs4 - 2.0d0 * qs1
& + qs5) + yycon4 * (vp1 * vp1 - 2.0d0 * vijk * vij
&k + vm1 * vm1) + yycon5 * (u(5,i,j + 1,k) * 1.0d0 / u(1,i,j + 1,k)
&- 2.0d0 * u(5,i,j,k) * 1.0d0 / u(1,i,j,k) + u(5,i,j - 1,k) *
&1.0d0 / u(1,i,j - 1,k)
&) - ty2 * ((c1 * u(5,i,j + 1,k) - c2 * s4) * vp1 -
&(c1 * u(5,i,j - 1,k) - c2 * s5) * vm1)
if (j .eq. 1) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (5.0d0 * u(m,i,j,k) - 4.
&0d0 * u(m,i,j + 1,k) + u(m,i,j + 2,k))
enddo
else if (j .eq. 2) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * ((-(4.0d0)) * u(m,i,j -
&1,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j + 1,k) + u(m,i,j + 2,k
&))
enddo
else if (j .ge. 3 .and. j .le. problem_size - 4) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (u(m,i,j - 2,k) - 4.0d0
&* u(m,i,j - 1,k) + 6.0 * u(m,i,j,k) - 4.0d0 * u(m,i,j + 1,k) + u(m
&,i,j + 2,k))
enddo
else if (j .eq. problem_size - 3) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (u(m,i,j - 2,k) - 4.0d0
&* u(m,i,j - 1,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j + 1,k))
enddo
else if (j .eq. problem_size - 2) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (u(m,i,j - 2,k) - 4.d0 *
& u(m,i,j - 1,k) + 5.d0 * u(m,i,j,k))
enddo
endif
rhs_(1) = rhs_(1) + dz1tz1 * (u(1,i,j,k + 1) - 2.0d0 * u(
&1,i,j,k) + u(1,i,j,k - 1)) - tz2 * (u(4,i,j,k + 1) - u(4,i,j,k - 1
&))
rhs_(2) = rhs_(2) + dz2tz1 * (u(2,i,j,k + 1) - 2.0d0 * u(
&2,i,j,k) + u(2,i,j,k - 1)) + zzcon2 * (u(2,i,j,k + 1) /
&u(1,i,j,k + 1) - 2.0d0 * uijk + u(2,i,j,k - 1) / u(1,i,j,k - 1))
&- tz2 * (u(2,i,j,k + 1) * wp1 - u(2,i,j,k - 1) * wm1)
rhs_(3) = rhs_(3) + dz3tz1 * (u(3,i,j,k + 1) - 2.0d0 * u(
&3,i,j,k) + u(3,i,j,k - 1)) + zzcon2 * (u(3,i,j,k + 1) /
&u(1,i,j,k + 1) - 2.0d0 * vijk + u(3,i,j,k - 1) / u(1,i,j,k - 1))
&- tz2 * (u(3,i,j,k + 1) * wp1 - u(3,i,j,k - 1) * wm1)
rhs_(4) = rhs_(4) + dz4tz1 * (u(4,i,j,k + 1) - 2.0d0 * u(
&4,i,j,k) + u(4,i,j,k - 1)) + zzcon2 * con43 * (wp1 - 2.0d0 * wijk
&+ wm1) - tz2 * (u(4,i,j,k + 1) * wp1 - u(4,i,j,k - 1) * wm1 + (u(5
&,i,j,k + 1) - s6 - u(5,i,j,k - 1) + s7) * c2)
rhs_(5) = rhs_(5) + dz5tz1 * (u(5,i,j,k + 1) - 2.0d0 * u(
&5,i,j,k) + u(5,i,j,k - 1)) + zzcon3 * (qs6 - 2.0d0 * qs1 +
&qs7) + zzcon4 * (wp1 * wp1 - 2.0d0 * wijk * wij
&k + wm1 * wm1) + zzcon5 * (u(5,i,j,k + 1) * 1.0d0 / u(1,i,j,k+1)
&- 2.0d0 * u(5,i,j,k) * 1.0d0 / u(1,i,j,k) + u(5,i,j,k - 1) *
&1.0d0 / u(1,i,j,k-1)
&) - tz2 * ((c1 * u(5,i,j,k + 1) - c2 * s6) * wp1 -
&(c1 * u(5,i,j,k - 1) - c2 * s7) * wm1)
if (k .eq. 1) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (5.0d0 * u(m,i,j,k) - 4.
&0d0 * u(m,i,j,k + 1) + u(m,i,j,k + 2))
enddo
else if (k .eq. 2) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * ((-(4.0d0)) * u(m,i,j,k
&- 1) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j,k + 1) + u(m,i,j,k + 2
&))
enddo
else if (k .ge. 3 .and. k .le. problem_size - 4) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (u(m,i,j,k - 2) - 4.0d0
&* u(m,i,j,k - 1) + 6.0 * u(m,i,j,k) - 4.0d0 * u(m,i,j,k + 1) + u(m
&,i,j,k + 2))
enddo
else if (k .eq. problem_size - 3) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (u(m,i,j,k - 2) - 4.0d0
&* u(m,i,j,k - 1) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j,k + 1))
enddo
else if (k .eq. problem_size - 2) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (u(m,i,j,k - 2) - 4.d0 *
& u(m,i,j,k - 1) + 5.d0 * u(m,i,j,k))
enddo
endif
rhs(1,i,j,k) = rhs_(1) * dt
rhs(2,i,j,k) = rhs_(2) * dt
rhs(3,i,j,k) = rhs_(3) * dt
rhs(4,i,j,k) = rhs_(4) * dt
rhs(5,i,j,k) = rhs_(5) * dt
enddo
enddo
enddo
!DVM$ end region
return
end

View File

@@ -0,0 +1,219 @@
!---------------------------------------------------------------------
!---------------------------------------------------------------------
subroutine compute_rhs ()
include 'header3d.h'
integer i,j,k,m
double precision rho_inv,uijk,up1,um1,vijk,vp1,vm1,wijk,wp1,wm1,r
&hs_(5)
!DVM$ region out(rho_i, us, vs, ws, qs, square)
!DVM$ PARALLEL (k,j,i) ON us(i,j,k), SHADOW_COMPUTE,
!DVM$& PRIVATE(rho_inv,m),cuda_block(128)
!DVM$& ,SHADOW_RENEW(u(0:0,2:2,2:2,2:2))
do k = 0,problem_size - 1
do j = 0,problem_size - 1
do i = 0,problem_size - 1
rho_inv = 1.0d0 / u(1,i,j,k)
rho_i(i,j,k) = rho_inv
us(i,j,k) = u(2,i,j,k) * rho_inv
vs(i,j,k) = u(3,i,j,k) * rho_inv
ws(i,j,k) = u(4,i,j,k) * rho_inv
square(i,j,k) = 0.5d0 * (u(2,i,j,k) * u(2,i,j,k) + u(3,i,
&j,k) * u(3,i,j,k) + u(4,i,j,k) * u(4,i,j,k)) * rho_inv
qs(i,j,k) = square(i,j,k) * rho_inv
do m = 1,5
rhs(m,i,j,k) = forcing(m,i,j,k)
enddo
enddo
enddo
enddo
!---------------------------------------------------------------------
! compute xi-direction fluxes
!---------------------------------------------------------------------
!DVM$ PARALLEL (k,j,i) ON rhs(*,i,j,k), PRIVATE(uijk,up1,um1,m,
!DVM$&vijk,vp1,vm1,wijk,wp1,wm1,rhs_),cuda_block(32)
do k = 1,problem_size - 2
do j = 1,problem_size - 2
do i = 1,problem_size - 2
uijk = us(i,j,k)
up1 = us(i + 1,j,k)
um1 = us(i - 1,j,k)
rhs_(1) = forcing(1,i,j,k)
rhs_(2) = forcing(2,i,j,k)
rhs_(3) = forcing(3,i,j,k)
rhs_(4) = forcing(4,i,j,k)
rhs_(5) = forcing(5,i,j,k)
rhs_(1) = rhs_(1) + dx1tx1 * (u(1,i + 1,j,k) - 2.0d0 * u(
&1,i,j,k) + u(1,i - 1,j,k)) - tx2 * (u(2,i + 1,j,k) - u(2,i - 1,j,k
&))
rhs_(2) = rhs_(2) + dx2tx1 * (u(2,i + 1,j,k) - 2.0d0 * u(
&2,i,j,k) + u(2,i - 1,j,k)) + xxcon2 * con43 * (up1 - 2.0d0 * uijk
&+ um1) - tx2 * (u(2,i + 1,j,k) * up1 - u(2,i - 1,j,k) * um1 + (u(5
&,i + 1,j,k) - square(i + 1,j,k) - u(5,i - 1,j,k) + square(i - 1,j,
&k)) * c2)
rhs_(3) = rhs_(3) + dx3tx1 * (u(3,i + 1,j,k) - 2.0d0 * u(
&3,i,j,k) + u(3,i - 1,j,k)) + xxcon2 * (vs(i + 1,j,k) - 2.0d0 * vs(
&i,j,k) + vs(i - 1,j,k)) - tx2 * (u(3,i + 1,j,k) * up1 - u(3,i - 1,
&j,k) * um1)
rhs_(4) = rhs_(4) + dx4tx1 * (u(4,i + 1,j,k) - 2.0d0 * u(
&4,i,j,k) + u(4,i - 1,j,k)) + xxcon2 * (ws(i + 1,j,k) - 2.0d0 * ws(
&i,j,k) + ws(i - 1,j,k)) - tx2 * (u(4,i + 1,j,k) * up1 - u(4,i - 1,
&j,k) * um1)
rhs_(5) = rhs_(5) + dx5tx1 * (u(5,i + 1,j,k) - 2.0d0 * u(
&5,i,j,k) + u(5,i - 1,j,k)) + xxcon3 * (qs(i + 1,j,k) - 2.0d0 * qs(
&i,j,k) + qs(i - 1,j,k)) + xxcon4 * (up1 * up1 - 2.0d0 * uijk * uij
&k + um1 * um1) + xxcon5 * (u(5,i + 1,j,k) * rho_i(i + 1,j,k) - 2.0
&d0 * u(5,i,j,k) * rho_i(i,j,k) + u(5,i - 1,j,k) * rho_i(i - 1,j,k)
&) - tx2 * ((c1 * u(5,i + 1,j,k) - c2 * square(i + 1,j,k)) * up1 -
&(c1 * u(5,i - 1,j,k) - c2 * square(i - 1,j,k)) * um1)
if (i .eq. 1) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (5.0d0 * u(m,i,j,k) - 4.
&0d0 * u(m,i + 1,j,k) + u(m,i + 2,j,k))
enddo
else if (i .eq. 2) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * ((-(4.0d0)) * u(m,i - 1,
&j,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i + 1,j,k) + u(m,i + 2,j,k
&))
enddo
else if (i .ge. 3 .and. i .le. problem_size - 4) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (u(m,i - 2,j,k) - 4.0d0
&* u(m,i - 1,j,k) + 6.0 * u(m,i,j,k) - 4.0d0 * u(m,i + 1,j,k) + u(m
&,i + 2,j,k))
enddo
else if (i .eq. problem_size - 3) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (u(m,i - 2,j,k) - 4.0d0
&* u(m,i - 1,j,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i + 1,j,k))
enddo
else if (i .eq. problem_size - 2) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (u(m,i - 2,j,k) - 4.d0 *
& u(m,i - 1,j,k) + 5.d0 * u(m,i,j,k))
enddo
endif
vijk = vs(i,j,k)
vp1 = vs(i,j + 1,k)
vm1 = vs(i,j - 1,k)
rhs_(1) = rhs_(1) + dy1ty1 * (u(1,i,j + 1,k) - 2.0d0 * u(
&1,i,j,k) + u(1,i,j - 1,k)) - ty2 * (u(3,i,j + 1,k) - u(3,i,j - 1,k
&))
rhs_(2) = rhs_(2) + dy2ty1 * (u(2,i,j + 1,k) - 2.0d0 * u(
&2,i,j,k) + u(2,i,j - 1,k)) + yycon2 * (us(i,j + 1,k) - 2.0d0 * us(
&i,j,k) + us(i,j - 1,k)) - ty2 * (u(2,i,j + 1,k) * vp1 - u(2,i,j -
&1,k) * vm1)
rhs_(3) = rhs_(3) + dy3ty1 * (u(3,i,j + 1,k) - 2.0d0 * u(
&3,i,j,k) + u(3,i,j - 1,k)) + yycon2 * con43 * (vp1 - 2.0d0 * vijk
&+ vm1) - ty2 * (u(3,i,j + 1,k) * vp1 - u(3,i,j - 1,k) * vm1 + (u(5
&,i,j + 1,k) - square(i,j + 1,k) - u(5,i,j - 1,k) + square(i,j - 1,
&k)) * c2)
rhs_(4) = rhs_(4) + dy4ty1 * (u(4,i,j + 1,k) - 2.0d0 * u(
&4,i,j,k) + u(4,i,j - 1,k)) + yycon2 * (ws(i,j + 1,k) - 2.0d0 * ws(
&i,j,k) + ws(i,j - 1,k)) - ty2 * (u(4,i,j + 1,k) * vp1 - u(4,i,j -
&1,k) * vm1)
rhs_(5) = rhs_(5) + dy5ty1 * (u(5,i,j + 1,k) - 2.0d0 * u(
&5,i,j,k) + u(5,i,j - 1,k)) + yycon3 * (qs(i,j + 1,k) - 2.0d0 * qs(
&i,j,k) + qs(i,j - 1,k)) + yycon4 * (vp1 * vp1 - 2.0d0 * vijk * vij
&k + vm1 * vm1) + yycon5 * (u(5,i,j + 1,k) * rho_i(i,j + 1,k) - 2.0
&d0 * u(5,i,j,k) * rho_i(i,j,k) + u(5,i,j - 1,k) * rho_i(i,j - 1,k)
&) - ty2 * ((c1 * u(5,i,j + 1,k) - c2 * square(i,j + 1,k)) * vp1 -
&(c1 * u(5,i,j - 1,k) - c2 * square(i,j - 1,k)) * vm1)
if (j .eq. 1) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (5.0d0 * u(m,i,j,k) - 4.
&0d0 * u(m,i,j + 1,k) + u(m,i,j + 2,k))
enddo
else if (j .eq. 2) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * ((-(4.0d0)) * u(m,i,j -
&1,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j + 1,k) + u(m,i,j + 2,k
&))
enddo
else if (j .ge. 3 .and. j .le. problem_size - 4) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (u(m,i,j - 2,k) - 4.0d0
&* u(m,i,j - 1,k) + 6.0 * u(m,i,j,k) - 4.0d0 * u(m,i,j + 1,k) + u(m
&,i,j + 2,k))
enddo
else if (j .eq. problem_size - 3) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (u(m,i,j - 2,k) - 4.0d0
&* u(m,i,j - 1,k) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j + 1,k))
enddo
else if (j .eq. problem_size - 2) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (u(m,i,j - 2,k) - 4.d0 *
& u(m,i,j - 1,k) + 5.d0 * u(m,i,j,k))
enddo
endif
wijk = ws(i,j,k)
wp1 = ws(i,j,k + 1)
wm1 = ws(i,j,k - 1)
rhs_(1) = rhs_(1) + dz1tz1 * (u(1,i,j,k + 1) - 2.0d0 * u(
&1,i,j,k) + u(1,i,j,k - 1)) - tz2 * (u(4,i,j,k + 1) - u(4,i,j,k - 1
&))
rhs_(2) = rhs_(2) + dz2tz1 * (u(2,i,j,k + 1) - 2.0d0 * u(
&2,i,j,k) + u(2,i,j,k - 1)) + zzcon2 * (us(i,j,k + 1) - 2.0d0 * us(
&i,j,k) + us(i,j,k - 1)) - tz2 * (u(2,i,j,k + 1) * wp1 - u(2,i,j,k
&- 1) * wm1)
rhs_(3) = rhs_(3) + dz3tz1 * (u(3,i,j,k + 1) - 2.0d0 * u(
&3,i,j,k) + u(3,i,j,k - 1)) + zzcon2 * (vs(i,j,k + 1) - 2.0d0 * vs(
&i,j,k) + vs(i,j,k - 1)) - tz2 * (u(3,i,j,k + 1) * wp1 - u(3,i,j,k
&- 1) * wm1)
rhs_(4) = rhs_(4) + dz4tz1 * (u(4,i,j,k + 1) - 2.0d0 * u(
&4,i,j,k) + u(4,i,j,k - 1)) + zzcon2 * con43 * (wp1 - 2.0d0 * wijk
&+ wm1) - tz2 * (u(4,i,j,k + 1) * wp1 - u(4,i,j,k - 1) * wm1 + (u(5
&,i,j,k + 1) - square(i,j,k + 1) - u(5,i,j,k - 1) + square(i,j,k -
&1)) * c2)
rhs_(5) = rhs_(5) + dz5tz1 * (u(5,i,j,k + 1) - 2.0d0 * u(
&5,i,j,k) + u(5,i,j,k - 1)) + zzcon3 * (qs(i,j,k + 1) - 2.0d0 * qs(
&i,j,k) + qs(i,j,k - 1)) + zzcon4 * (wp1 * wp1 - 2.0d0 * wijk * wij
&k + wm1 * wm1) + zzcon5 * (u(5,i,j,k + 1) * rho_i(i,j,k + 1) - 2.0
&d0 * u(5,i,j,k) * rho_i(i,j,k) + u(5,i,j,k - 1) * rho_i(i,j,k - 1)
&) - tz2 * ((c1 * u(5,i,j,k + 1) - c2 * square(i,j,k + 1)) * wp1 -
&(c1 * u(5,i,j,k - 1) - c2 * square(i,j,k - 1)) * wm1)
if (k .eq. 1) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (5.0d0 * u(m,i,j,k) - 4.
&0d0 * u(m,i,j,k + 1) + u(m,i,j,k + 2))
enddo
else if (k .eq. 2) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * ((-(4.0d0)) * u(m,i,j,k
&- 1) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j,k + 1) + u(m,i,j,k + 2
&))
enddo
else if (k .ge. 3 .and. k .le. problem_size - 4) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (u(m,i,j,k - 2) - 4.0d0
&* u(m,i,j,k - 1) + 6.0 * u(m,i,j,k) - 4.0d0 * u(m,i,j,k + 1) + u(m
&,i,j,k + 2))
enddo
else if (k .eq. problem_size - 3) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (u(m,i,j,k - 2) - 4.0d0
&* u(m,i,j,k - 1) + 6.0d0 * u(m,i,j,k) - 4.0d0 * u(m,i,j,k + 1))
enddo
else if (k .eq. problem_size - 2) then
do m = 1,5
rhs_(m) = rhs_(m) - dssp * (u(m,i,j,k - 2) - 4.d0 *
& u(m,i,j,k - 1) + 5.d0 * u(m,i,j,k))
enddo
endif
rhs(1,i,j,k) = rhs_(1) * dt
rhs(2,i,j,k) = rhs_(2) * dt
rhs(3,i,j,k) = rhs_(3) * dt
rhs(4,i,j,k) = rhs_(4) * dt
rhs(5,i,j,k) = rhs_(5) * dt
enddo
enddo
enddo
!DVM$ end region
return
end

View File

@@ -0,0 +1,307 @@
c---------------------------------------------------------------------
c---------------------------------------------------------------------
subroutine exact_rhs
c---------------------------------------------------------------------
c---------------------------------------------------------------------
include 'header3d.h'
double precision dtemp(5), xi, eta, zeta, dtpp
integer m, i, j, k, ip1, im1, jp1, p, p1,
> jm1, km1, kp1,z
double precision ue_(-2:2, 5), buf_(-2:2, 5),cuf_(-2:2),q_(-2:2)
!DVM$ region
!DVM$ parallel (k,j,i) on forcing(*,i,j,k),private(m)
do k= 0, problem_size-1
do j = 0, problem_size-1
do i = 0, problem_size-1
do m = 1, 5
forcing(m,i,j,k) = 0.0d0
end do
end do
end do
end do
c---------------------------------------------------------------------
c xi-direction flux differences
c---------------------------------------------------------------------
!DVM$ parallel (k,j,i) on forcing(*,i,j,k),private(zeta,eta,xi,m,dtemp
!DVM$& ,buf_,cuf_,q_,dtpp,z,ue_)
do k = 1, problem_size-2
do j = 1, problem_size-2
do i = 1, problem_size-2
zeta = dble(k) * dnzm1
eta = dble(j) * dnym1
do z = -2, 2
xi = dble(i + z) * dnxm1
do m = 1, 5
dtemp(m) = ce(m,1) +
> xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) + xi*ce(m,11)))) +
> eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9) + eta*ce(m,12))))+
> zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) +
> zeta*ce(m,13))))
end do
do m = 1, 5
ue_(z,m) = dtemp(m)
end do
dtpp = 1.0d0 / dtemp(1)
do m = 2, 5
buf_(z, m) = dtpp * dtemp(m)
end do
cuf_(z) = buf_(z,2) * buf_(z,2)
buf_(z,1) = cuf_(z) + buf_(z,3) * buf_(z,3) +
> buf_(z,4) * buf_(z,4)
q_(z) = 0.5d0*(buf_(z,2)*ue_(z,2) + buf_(z,3)*
> ue_(z,3) + buf_(z,4)*ue_(z,4))
enddo
forcing(1,i,j,k) = forcing(1,i,j,k) -
> tx2*( ue_(1,2)-ue_(-1,2) )+
> dx1tx1*(ue_(1,1)-2.0d0*ue_(0,1)+ue_(-1,1))
forcing(2,i,j,k) = forcing(2,i,j,k) - tx2 * (
> (ue_(1,2)*buf_(1,2)+c2*(ue_(1,5)-q_(1)))-
> (ue_(-1,2)*buf_(-1,2)+c2*(ue_(-1,5)-q_(-1))))+
> xxcon1*(buf_(1,2)-2.0d0*buf_(0,2)+buf_(-1,2))+
> dx2tx1*( ue_(1,2)-2.0d0* ue_(0,2)+ue_(-1,2))
forcing(3,i,j,k) = forcing(3,i,j,k) - tx2 * (
> ue_(1,3)*buf_(1,2)-ue_(-1,3)*buf_(-1,2))+
> xxcon2*(buf_(1,3)-2.0d0*buf_(0,3)+buf_(-1,3))+
> dx3tx1*( ue_(1,3)-2.0d0*ue_(0,3) +ue_(-1,3))
forcing(4,i,j,k) = forcing(4,i,j,k) - tx2*(
> ue_(1,4)*buf_(1,2)-ue_(-1,4)*buf_(-1,2))+
> xxcon2*(buf_(1,4)-2.0d0*buf_(0,4)+buf_(-1,4))+
> dx4tx1*( ue_(1,4)-2.0d0* ue_(0,4)+ ue_(-1,4))
forcing(5,i,j,k) = forcing(5,i,j,k) - tx2*(
> buf_(1,2)*(c1*ue_(1,5)-c2*q_(1))-
> buf_(-1,2)*(c1*ue_(-1,5)-c2*q_(-1)))+
> 0.5d0*xxcon3*(buf_(1,1)-2.0d0*buf_(0,1)+
> buf_(-1,1))+
> xxcon4*(cuf_(1)-2.0d0*cuf_(0)+cuf_(-1))+
> xxcon5*(buf_(1,5)-2.0d0*buf_(0,5)+buf_(-1,5))+
> dx5tx1*( ue_(1,5)-2.0d0* ue_(0,5)+ ue_(-1,5))
do m = 1, 5
if(i .eq. 1) then
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
> (5.0d0*ue_(0,m) - 4.0d0*ue_(1,m) +ue_(2,m))
else if(i .eq. 2) then
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
> (-4.0d0*ue_(-1,m) + 6.0d0*ue_(0,m) -
> 4.0d0*ue_(1,m) + ue_(2,m))
else if(i .eq. problem_size-3) then
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
> (ue_(-2,m) - 4.0d0*ue_(-1,m) +
> 6.0d0*ue_(0,m) - 4.0d0*ue_(1,m))
else if(i .eq. problem_size-2) then
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
> (ue_(-2,m) - 4.0d0*ue_(-1,m) + 5.0d0*ue_(0,m))
else
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp*
> (ue_(-2,m) - 4.0d0*ue_(-1,m) +
> 6.0d0*ue_(0,m) - 4.0d0*ue_(1,m) + ue_(2,m))
endif
end do
end do
end do
end do
c---------------------------------------------------------------------
c eta-direction flux differences
c---------------------------------------------------------------------
!DVM$ parallel (k,j,i) on forcing(*,i,j,k),private(zeta,eta,xi,m,dtemp
!DVM$& ,buf_,cuf_,q_,dtpp,z,ue_)
do k = 1, problem_size- 2
do j = 1, problem_size-2
do i = 1, problem_size- 2
zeta = dble(k) * dnzm1
xi = dble(i) * dnxm1
do z = -2, 2
eta = dble(j + z) * dnym1
do m = 1, 5
dtemp(m) = ce(m,1) +
> xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) + xi*ce(m,11)))) +
> eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9) + eta*ce(m,12))))+
> zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) +
> zeta*ce(m,13))))
end do
do m = 1, 5
ue_(z,m) = dtemp(m)
end do
dtpp = 1.0d0 / dtemp(1)
do m = 2, 5
buf_(z, m) = dtpp * dtemp(m)
end do
cuf_(z) = buf_(z,3) * buf_(z,3)
buf_(z,1) = cuf_(z) + buf_(z,2) * buf_(z,2) +
> buf_(z,4) * buf_(z,4)
q_(z) = 0.5d0*(buf_(z,2)*ue_(z,2) + buf_(z,3)
> *ue_(z,3) + buf_(z,4) * ue_(z,4))
enddo
forcing(1,i,j,k) = forcing(1,i,j,k) -
> ty2*( ue_(1,3)-ue_(-1,3) )+
> dy1ty1*(ue_(1,1)-2.0d0*ue_(0,1)+ue_(-1,1))
forcing(2,i,j,k) = forcing(2,i,j,k) - ty2*(
> ue_(1,2)*buf_(1,3)-ue_(-1,2)*buf_(-1,3))+
> yycon2*(buf_(1,2)-2.0d0*buf_(0,2)+buf_(-1,2))+
> dy2ty1*( ue_(1,2)-2.0* ue_(0,2)+ ue_(-1,2))
forcing(3,i,j,k) = forcing(3,i,j,k) - ty2*(
> (ue_(1,3)*buf_(1,3)+c2*(ue_(1,5)-q_(1)))-
> (ue_(-1,3)*buf_(-1,3)+c2*(ue_(-1,5)-q_(-1))))+
> yycon1*(buf_(1,3)-2.0d0*buf_(0,3)+buf_(-1,3))+
> dy3ty1*( ue_(1,3)-2.0d0*ue_(0,3) +ue_(-1,3))
forcing(4,i,j,k) = forcing(4,i,j,k) - ty2*(
> ue_(1,4)*buf_(1,3)-ue_(-1,4)*buf_(-1,3))+
> yycon2*(buf_(1,4)-2.0d0*buf_(0,4)+buf_(-1,4))+
> dy4ty1*( ue_(1,4)-2.0d0*ue_(0,4)+ ue_(-1,4))
forcing(5,i,j,k) = forcing(5,i,j,k) - ty2*(
> buf_(1,3)*(c1*ue_(1,5)-c2*q_(1))-
> buf_(-1,3)*(c1*ue_(-1,5)-c2*q_(-1)))+
> 0.5d0*yycon3*(buf_(1,1)-2.0d0*buf_(0,1)+
> buf_(-1,1))+
> yycon4*(cuf_(1)-2.0d0*cuf_(0)+cuf_(-1))+
> yycon5*(buf_(1,5)-2.0d0*buf_(0,5)+buf_(-1,5))+
> dy5ty1*(ue_(1,5)-2.0d0*ue_(0,5)+ue_(-1,5))
do m = 1, 5
if(j .eq. 1) then
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
> (5.0d0*ue_(0,m) - 4.0d0*ue_(1,m) +ue_(2,m))
else if(j .eq. 2) then
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
> (-4.0d0*ue_(-1,m) + 6.0d0*ue_(0,m) -
> 4.0d0*ue_(1,m) + ue_(2,m))
else if(j .eq. problem_size-3) then
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
> (ue_(-2,m) - 4.0d0*ue_(-1,m) +
> 6.0d0*ue_(0,m) - 4.0d0*ue_(1,m))
else if(j .eq. problem_size-2) then
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
> (ue_(-2,m) - 4.0d0*ue_(-1,m) + 5.0d0*ue_(0,m))
else
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp*
> (ue_(-2,m) - 4.0d0*ue_(-1,m) +
> 6.0d0*ue_(0,m) - 4.0d0*ue_(1,m) + ue_(2,m))
endif
end do
end do
end do
end do
c---------------------------------------------------------------------
c zeta-direction flux differences
c---------------------------------------------------------------------
!DVM$ parallel (k,j,i) on forcing(*,i,j,k),private(zeta,eta,xi,m
!DVM$& ,buf_,cuf_,q_,ue_,dtpp,dtemp,z)
do k = 1, problem_size-2
do j = 1, problem_size-2
do i = 1, problem_size-2
xi = dble(i) * dnxm1
eta = dble(j) * dnym1
do z = -2, 2
zeta = dble(k + z) * dnzm1
do m = 1, 5
dtemp(m) = ce(m,1) +
> xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) + xi*ce(m,11)))) +
> eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9) + eta*ce(m,12))))+
> zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) +
> zeta*ce(m,13))))
end do
do m = 1, 5
ue_(z,m) = dtemp(m)
end do
dtpp = 1.0d0 / dtemp(1)
do m = 2, 5
buf_(z, m) = dtpp * dtemp(m)
end do
cuf_(z) = buf_(z,4) * buf_(z,4)
buf_(z,1) = cuf_(z) + buf_(z,2) * buf_(z,2) +
> buf_(z,3) * buf_(z,3)
q_(z) = 0.5d0*(buf_(z,2)*ue_(z,2) + buf_(z,3)*
> ue_(z,3) + buf_(z,4)*ue_(z,4))
enddo
forcing(1,i,j,k) = forcing(1,i,j,k) -
> tz2*( ue_(1,4)-ue_(-1,4) )+
> dz1tz1*(ue_(1,1)-2.0d0*ue_(0,1)+ue_(-1,1))
forcing(2,i,j,k) = forcing(2,i,j,k) - tz2 * (
> ue_(1,2)*buf_(1,4)-ue_(-1,2)*buf_(-1,4))+
> zzcon2*(buf_(1,2)-2.0d0*buf_(0,2)+buf_(-1,2))+
> dz2tz1*( ue_(1,2)-2.0d0* ue_(0,2)+ ue_(-1,2))
forcing(3,i,j,k) = forcing(3,i,j,k) - tz2 * (
> ue_(1,3)*buf_(1,4)-ue_(-1,3)*buf_(-1,4))+
> zzcon2*(buf_(1,3)-2.0d0*buf_(0,3)+buf_(-1,3))+
> dz3tz1*(ue_(1,3)-2.0d0*ue_(0,3)+ue_(-1,3))
forcing(4,i,j,k) = forcing(4,i,j,k) - tz2 * (
> (ue_(1,4)*buf_(1,4)+c2*(ue_(1,5)-q_(1)))-
> (ue_(-1,4)*buf_(-1,4)+c2*(ue_(-1,5)-q_(-1))))+
> zzcon1*(buf_(1,4)-2.0d0*buf_(0,4)+buf_(-1,4))+
> dz4tz1*( ue_(1,4)-2.0d0*ue_(0,4) +ue_(-1,4))
forcing(5,i,j,k) = forcing(5,i,j,k) - tz2 * (
> buf_(1,4)*(c1*ue_(1,5)-c2*q_(1))-
> buf_(-1,4)*(c1*ue_(-1,5)-c2*q_(-1)))+
> 0.5d0*zzcon3*(buf_(1,1)-2.0d0*buf_(0,1)
> +buf_(-1,1))+
> zzcon4*(cuf_(1)-2.0d0*cuf_(0)+cuf_(-1))+
> zzcon5*(buf_(1,5)-2.0d0*buf_(0,5)+buf_(-1,5))+
> dz5tz1*( ue_(1,5)-2.0d0*ue_(0,5)+ ue_(-1,5))
do m = 1, 5
if(k .eq. 1) then
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
> (5.0d0*ue_(0,m) - 4.0d0*ue_(1,m) +ue_(2,m))
else if(k .eq. 2) then
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
> (-4.0d0*ue_(-1,m) + 6.0d0*ue_(0,m) -
> 4.0d0*ue_(1,m) + ue_(2,m))
else if(k .eq. problem_size-3) then
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
> (ue_(-2,m) - 4.0d0*ue_(-1,m) +
> 6.0d0*ue_(0,m) - 4.0d0*ue_(1,m))
else if(k .eq. problem_size-2) then
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
> (ue_(-2,m) - 4.0d0*ue_(-1,m) + 5.0d0*ue_(0,m))
else
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp*
> (ue_(-2,m) - 4.0d0*ue_(-1,m) +
> 6.0d0*ue_(0,m) - 4.0d0*ue_(1,m) + ue_(2,m))
endif
end do
end do
end do
end do
c---------------------------------------------------------------------
c now change the sign of the forcing function,
c---------------------------------------------------------------------
!DVM$ parallel (k,j,i) on forcing(*,i,j,k),private(m)
do k = 1, problem_size-2
do j = 1, problem_size-2
do i = 1, problem_size-2
do m = 1, 5
forcing(m,i,j,k) = -1.d0 * forcing(m,i,j,k)
end do
end do
end do
end do
!DVM$ end region
return
end

View File

@@ -0,0 +1,4 @@
subroutine exact_rhs
return
end

View File

@@ -0,0 +1,18 @@
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! this function returns the exact solution at point xi, eta, zeta
!---------------------------------------------------------------------
subroutine exact_solution (xi, eta, zeta, dtemp)
include 'header3d.h'
double precision xi,eta,zeta,dtemp(5)
integer m
do m = 1,5
dtemp(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) + xi * (ce(m
&,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m,6) + eta * (
&ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta * (ce(m,7) +
&zeta * (ce(m,10) + zeta * ce(m,13))))
enddo
return
end

View File

@@ -0,0 +1,106 @@
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!
! header.h
!
!---------------------------------------------------------------------
!---------------------------------------------------------------------
implicit none
!---------------------------------------------------------------------
! The following include file is generated automatically by the
! "setparams" utility. It defines
! maxcells: the square root of the maximum number of processors
! problem_size: 12, 64, 102, 162 (for class T, A, B, C)
! dt_default: default time step for this problem size if no
! config file
! niter_default: default number of iterations for this problem size
!---------------------------------------------------------------------
include 'npbparams.h'
integer aa, bb, cc, BLOCK_SIZE
parameter (aa=1, bb=2, cc=3, BLOCK_SIZE=5)
integer grid_points(3)
double precision elapsed_time
common /global/ elapsed_time, grid_points
double precision tx1, tx2, tx3, ty1, ty2, ty3, tz1, tz2, tz3
double precision dx1, dx2, dx3, dx4, dx5, dy1, dy2, dy3, dy4
double precision dy5, dz1, dz2, dz3, dz4, dz5, dssp, dt
double precision ce(5,13), dxmax, dymax, dzmax, xxcon1, xxcon2
double precision xxcon3, xxcon4, xxcon5, dx1tx1, dx2tx1, dx3tx1
double precision dx4tx1, dx5tx1, yycon1, yycon2, yycon3, yycon4
double precision yycon5, dy1ty1, dy2ty1, dy3ty1, dy4ty1, dy5ty1
double precision zzcon1, zzcon2, zzcon3, zzcon4, zzcon5, dz1tz1
double precision dz2tz1, dz3tz1, dz4tz1, dz5tz1, dnxm1, dnym1
double precision dnzm1, c1c2, c1c5, c3c4, c1345, conz1, c1, c2
double precision c3, c4, c5, c4dssp, c5dssp, dtdssp, dttx1
double precision dttx2, dtty1, dtty2, dttz1, dttz2, c2dttx1
double precision c2dtty1, c2dttz1, comz1, comz4, comz5, comz6
double precision c3c4tx3, c3c4ty3, c3c4tz3, c2iv, con43, con16
integer stage_n, BL, R
common /constants/ tx1, tx2, tx3, ty1, ty2, ty3, tz1, tz2, tz3
common /constants/ dx1, dx2, dx3, dx4, dx5, dy1, dy2, dy3, dy4
common /constants/ dy5, dz1, dz2, dz3, dz4, dz5, dssp, dt
common /constants/ ce, dxmax, dymax, dzmax, xxcon1, xxcon2
common /constants/ xxcon3, xxcon4, xxcon5, dx1tx1, dx2tx1, dx3tx1
common /constants/ dx4tx1, dx5tx1, yycon1, yycon2, yycon3, yycon4
common /constants/ yycon5, dy1ty1, dy2ty1, dy3ty1, dy4ty1, dy5ty1
common /constants/ zzcon1, zzcon2, zzcon3, zzcon4, zzcon5, dz1tz1
common /constants/ dz2tz1, dz3tz1, dz4tz1, dz5tz1, dnxm1, dnym1
common /constants/ dnzm1, c1c2, c1c5, c3c4, c1345, conz1, c1, c2
common /constants/ c3, c4, c5, c4dssp, c5dssp, dtdssp, dttx1
common /constants/ dttx2, dtty1, dtty2, dttz1, dttz2, c2dttx1
common /constants/ c2dtty1, c2dttz1, comz1, comz4, comz5, comz6
common /constants/ c3c4tx3, c3c4ty3, c3c4tz3, c2iv, con43, con16
common /constants/ stage_n
integer IMAX, JMAX, KMAX
parameter (IMAX=problem_size,JMAX=problem_size,KMAX=problem_size)
parameter (BL=1, R=0)
!
! to improve cache performance, grid dimensions padded by 1
! for even number sizes only.
!
double precision us(0:IMAX/2*2, 0:JMAX/2*2, 0:KMAX/2*2)
double precision vs(0:IMAX/2*2, 0:JMAX/2*2, 0:KMAX/2*2)
double precision ws(0:IMAX/2*2, 0:JMAX/2*2, 0:KMAX/2*2)
double precision qs(0:IMAX/2*2, 0:JMAX/2*2, 0:KMAX/2*2)
double precision rho_i(0:IMAX/2*2, 0:JMAX/2*2, 0:KMAX/2*2)
double precision square(0:IMAX/2*2, 0:JMAX/2*2, 0:KMAX/2*2)
double precision forcing (5,0:IMAX/2*2,0:JMAX/2*2, 0:KMAX/2*2)
double precision u(5,0:(IMAX+1)/2*2,0:(JMAX+1)/2*2,0:(KMAX+1)/2*2)
double precision rhs(5,0:IMAX/2*2, 0:JMAX/2*2, 0:KMAX/2*2)
double precision lhs__(5,5,0:IMAX/2*2,0:JMAX/2*2,0:KMAX/2*2/BL+R)
common /fields/ u, us, vs, ws, qs, rho_i, square
common /fields/ rhs, forcing, lhs__
double precision cv(-2:problem_size+1)
double precision cuf(-2:problem_size+1), q(-2:problem_size+1)
double precision ue(-2:problem_size+1,5), buf(-2:problem_size+1,5)
common /work_1d/ cv, cuf, q, ue, buf
double precision tmp1, tmp2, tmp3, tmp11, tmp22
double precision t1, t2, t3, tm1, tm2, tm3
common /work_lhs/ tmp1, tmp2, tmp3, tmp11, tmp22
common /work_lhs/ t1, t2, t3, tm1, tm2, tm3
double precision tmp_block(5,5), b_inverse(5,5), tmp_vec(5)
common /work_solve/ tmp_block, b_inverse, tmp_vec
!--------------------------------------------------------------------
! FDVM Specifications
!--------------------------------------------------------------------
!DVM$ DISTRIBUTE us (BLOCK,BLOCK,BLOCK)
!DVM$ ALIGN (i,j,k) WITH us(i,j,k) :: vs, ws, qs, rho_i, square
!DVM$ ALIGN (*,*,i,j,k) WITH us(i,j,k) :: lhs__
!DVM$ ALIGN (*,i,j,k) WITH us(i,j,k) :: u, rhs
!DVM$ ALIGN (*,i,j,k) WITH us(i,j,k) :: forcing
!DVM$ SHADOW u(2:2,2:2,2:2,2:2)

View File

@@ -0,0 +1,181 @@
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! This subroutine initializes the field variable u using
! tri-linear transfinite interpolation of the boundary values
!---------------------------------------------------------------------
subroutine initialize ()
include 'header3d.h'
integer i,j,k,m,ix,iy,iz
double precision xi,eta,zeta,pface(5,3,2),pxi,peta,pzeta,temp(5),
&xi1,yi1,zi1
xi = 0.0
eta = 0.0
zeta = 0.0
!---------------------------------------------------------------------
! Later (in compute_rhs) we compute 1/u for every element. A few of
! the corner elements are not used, but it convenient (and faster)
! to compute the whole thing with a simple loop. Make sure those
! values are nonzero by initializing the whole thing here.
!---------------------------------------------------------------------
!DVM$ region out(u)
!DVM$ PARALLEL (k,j,i) ON u(*,i,j,k), SHADOW_COMPUTE, private(m)
do k = 0,imax - 1
do j = 0,imax - 1
do i = 0,imax - 1
do m = 1,5
u(m,i,j,k) = 1.0
enddo
enddo
enddo
enddo
!DVM$ PARALLEL (k,j,i) ON u(*,i,j,k), private(m,zeta,eta, xi,ix,
!DVM$& iy,iz,Pxi,Peta,Pzeta,Pface,xi1,yi1,zi1,temp),SHADOW_COMPUTE
do k = 0,grid_points(3) - 1
do j = 0,grid_points(2) - 1
do i = 0,grid_points(1) - 1
zeta = dble (k) * dnzm1
eta = dble (j) * dnym1
xi = dble (i) * dnxm1
do ix = 1,2
! call exact_solution(dble(ix-1), eta, zeta, Pface(1,1,ix))
xi1 = dble (ix - 1)
do m = 1,5
pface(m,1,ix) = ce(m,1) + xi1 * (ce(m,2) + xi1 * (c
&e(m,5) + xi1 * (ce(m,8) + xi1 * ce(m,11)))) + eta * (ce(m,3) + eta
& * (ce(m,6) + eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4)
& + zeta * (ce(m,7) + zeta * (ce(m,10) + zeta * ce(m,13))))
enddo
enddo
do iy = 1,2
! call exact_solution(xi, dble(iy-1) , zeta, Pface(1,2,iy))
yi1 = dble (iy - 1)
do m = 1,5
pface(m,2,iy) = ce(m,1) + xi * (ce(m,2) + xi * (ce(
&m,5) + xi * (ce(m,8) + xi * ce(m,11)))) + yi1 * (ce(m,3) + yi1 * (
&ce(m,6) + yi1 * (ce(m,9) + yi1 * ce(m,12)))) + zeta * (ce(m,4) + z
&eta * (ce(m,7) + zeta * (ce(m,10) + zeta * ce(m,13))))
enddo
enddo
do iz = 1,2
! call exact_solution(xi, eta, dble(iz-1), Pface(1,3,iz))
zi1 = dble (iz - 1)
do m = 1,5
pface(m,3,iz) = ce(m,1) + xi * (ce(m,2) + xi * (ce(
&m,5) + xi * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (
&ce(m,6) + eta * (ce(m,9) + eta * ce(m,12)))) + zi1 * (ce(m,4) + zi
&1 * (ce(m,7) + zi1 * (ce(m,10) + zi1 * ce(m,13))))
enddo
enddo
do m = 1,5
pxi = xi * pface(m,1,2) + (1.0d0 - xi) * pface(m,1,1)
peta = eta * pface(m,2,2) + (1.0d0 - eta) * pface(m,2,
&1)
pzeta = zeta * pface(m,3,2) + (1.0d0 - zeta) * pface(m
&,3,1)
u(m,i,j,k) = pxi + peta + pzeta - pxi * peta - pxi * p
&zeta - peta * pzeta + pxi * peta * pzeta
enddo
if(i .eq. 0) then
do m = 1,5
temp(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) + xi
& * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m,6) +
& eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta * (ce
&(m,7) + zeta * (ce(m,10) + zeta * ce(m,13))))
enddo
do m = 1,5
u(m,i,j,k) = temp(m)
enddo
endif
if(i .eq. grid_points(1) - 1) then
xi = 1.0d0
do m = 1,5
temp(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) + xi
& * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m,6) +
& eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta * (ce
&(m,7) + zeta * (ce(m,10) + zeta * ce(m,13))))
enddo
do m = 1,5
u(m,i,j,k) = temp(m)
enddo
endif
if(j .eq. 0) then
zeta = dble (k) * dnzm1
xi = dble (i) * dnxm1
eta = 0.0d0
do m = 1,5
temp(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) + xi
& * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m,6) +
& eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta * (ce
&(m,7) + zeta * (ce(m,10) + zeta * ce(m,13))))
enddo
do m = 1,5
u(m,i,j,k) = temp(m)
enddo
endif
if(j .eq. grid_points(2) - 1) then
zeta = dble (k) * dnzm1
xi = dble (i) * dnxm1
eta = 1.0d0
! call exact_solution(xi, eta, zeta, temp)
do m = 1,5
temp(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) + xi
& * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m,6) +
& eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta * (ce
&(m,7) + zeta * (ce(m,10) + zeta * ce(m,13))))
enddo
do m = 1,5
u(m,i,j,k) = temp(m)
enddo
endif
if(k .eq. 0) then
zeta = 0.0d0
xi = dble (i) * dnxm1
eta = dble (j) * dnym1
! call exact_solution(xi, eta, zeta, temp)
do m = 1,5
temp(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) + xi
& * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m,6) +
& eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta * (ce
&(m,7) + zeta * (ce(m,10) + zeta * ce(m,13))))
enddo
do m = 1,5
u(m,i,j,k) = temp(m)
enddo
endif
if(k .eq. grid_points(3) - 1) then
zeta = 1.0d0
xi = dble (i) * dnxm1
eta = dble (j) * dnym1
! call exact_solution(xi, eta, zeta, temp)
do m = 1,5
temp(m) = ce(m,1) + xi * (ce(m,2) + xi * (ce(m,5) + xi
& * (ce(m,8) + xi * ce(m,11)))) + eta * (ce(m,3) + eta * (ce(m,6) +
& eta * (ce(m,9) + eta * ce(m,12)))) + zeta * (ce(m,4) + zeta * (ce
&(m,7) + zeta * (ce(m,10) + zeta * ce(m,13))))
enddo
do m = 1,5
u(m,i,j,k) = temp(m)
enddo
endif
enddo
enddo
enddo
!DVM$ end region
return
end

View File

@@ -0,0 +1,58 @@
subroutine print_results (name, class, n1, n2, n3, niter, t, mops,
& optype, verified, npbversion)
! , compiletime, cs1, cs2, cs3, cs4, cs5, cs6, cs7)
implicit none
character*2 name
character*1 class
integer n1,n2,n3,niter,j
double precision t,mops
character optype*24,size*13
logical verified
character*(*) npbversion
! , compiletime,cs1, cs2, cs3, cs4, cs5, cs6, cs7
write (unit = *,fmt = 2) name
2 format(//, ' ', A2, ' Benchmark Completed.')
write (unit = *,fmt = 3) class
3 format(' Class = ', 12x, a12)
! If this is not a grid-based problem (EP, FT, CG), then
! we only print n1, which contains some measure of the
! problem size. In that case, n2 and n3 are both zero.
! Otherwise, we print the grid size n1xn2xn3
if (n2 .eq. 0 .and. n3 .eq. 0) then
if (name(1:2) .eq. 'EP') then
write (unit = size,fmt = '(f12.0)') 2.d0** n1
do j = 13,1,(-(1))
if (size(j:j) .eq. '.') size(j:j) = ' '
enddo
write (unit = *,fmt = 42) size
42 format(' Size = ',12x, a14)
else
write (unit = *,fmt = 44) n1
44 format(' Size = ',12x, i12)
endif
else
write (unit = *,fmt = 4) n1,n2,n3
4 format(' Size = ',12x, i3,'x',i3,'x',i3)
endif
write (unit = *,fmt = 5) niter
5 format(' Iterations = ', 12x, i12)
write (unit = *,fmt = 6) t
6 format(' Time in seconds = ',12x, f12.2)
write (unit = *,fmt = 9) mops
9 format(' Mop/s total = ',12x, f12.2)
write (unit = *,fmt = 11) optype
11 format(' Operation type = ', a24)
if (verified) then
write (unit = *,fmt = 12) ' SUCCESSFUL'
else
write (unit = *,fmt = 12) 'UNSUCCESSFUL'
endif
12 format(' Verification = ', 12x, a)
write (unit = *,fmt = 13) npbversion
13 format(' Version = ', 12x, a12)
return
end

View File

@@ -0,0 +1,165 @@
!---------------------------------------------------------------------
!---------------------------------------------------------------------
subroutine set_constants ()
include 'header3d.h'
ce(1,1) = 2.0d0
ce(1,2) = 0.0d0
ce(1,3) = 0.0d0
ce(1,4) = 4.0d0
ce(1,5) = 5.0d0
ce(1,6) = 3.0d0
ce(1,7) = 0.5d0
ce(1,8) = 0.02d0
ce(1,9) = 0.01d0
ce(1,10) = 0.03d0
ce(1,11) = 0.5d0
ce(1,12) = 0.4d0
ce(1,13) = 0.3d0
ce(2,1) = 1.0d0
ce(2,2) = 0.0d0
ce(2,3) = 0.0d0
ce(2,4) = 0.0d0
ce(2,5) = 1.0d0
ce(2,6) = 2.0d0
ce(2,7) = 3.0d0
ce(2,8) = 0.01d0
ce(2,9) = 0.03d0
ce(2,10) = 0.02d0
ce(2,11) = 0.4d0
ce(2,12) = 0.3d0
ce(2,13) = 0.5d0
ce(3,1) = 2.0d0
ce(3,2) = 2.0d0
ce(3,3) = 0.0d0
ce(3,4) = 0.0d0
ce(3,5) = 0.0d0
ce(3,6) = 2.0d0
ce(3,7) = 3.0d0
ce(3,8) = 0.04d0
ce(3,9) = 0.03d0
ce(3,10) = 0.05d0
ce(3,11) = 0.3d0
ce(3,12) = 0.5d0
ce(3,13) = 0.4d0
ce(4,1) = 2.0d0
ce(4,2) = 2.0d0
ce(4,3) = 0.0d0
ce(4,4) = 0.0d0
ce(4,5) = 0.0d0
ce(4,6) = 2.0d0
ce(4,7) = 3.0d0
ce(4,8) = 0.03d0
ce(4,9) = 0.05d0
ce(4,10) = 0.04d0
ce(4,11) = 0.2d0
ce(4,12) = 0.1d0
ce(4,13) = 0.3d0
ce(5,1) = 5.0d0
ce(5,2) = 4.0d0
ce(5,3) = 3.0d0
ce(5,4) = 2.0d0
ce(5,5) = 0.1d0
ce(5,6) = 0.4d0
ce(5,7) = 0.3d0
ce(5,8) = 0.05d0
ce(5,9) = 0.04d0
ce(5,10) = 0.03d0
ce(5,11) = 0.1d0
ce(5,12) = 0.3d0
ce(5,13) = 0.2d0
c1 = 1.4d0
c2 = 0.4d0
c3 = 0.1d0
c4 = 1.0d0
c5 = 1.4d0
dnxm1 = 1.0d0 / dble (grid_points(1) - 1)
dnym1 = 1.0d0 / dble (grid_points(2) - 1)
dnzm1 = 1.0d0 / dble (grid_points(3) - 1)
c1c2 = c1 * c2
c1c5 = c1 * c5
c3c4 = c3 * c4
c1345 = c1c5 * c3c4
conz1 = 1.0d0 - c1c5
tx1 = 1.0d0 / (dnxm1 * dnxm1)
tx2 = 1.0d0 / (2.0d0 * dnxm1)
tx3 = 1.0d0 / dnxm1
ty1 = 1.0d0 / (dnym1 * dnym1)
ty2 = 1.0d0 / (2.0d0 * dnym1)
ty3 = 1.0d0 / dnym1
tz1 = 1.0d0 / (dnzm1 * dnzm1)
tz2 = 1.0d0 / (2.0d0 * dnzm1)
tz3 = 1.0d0 / dnzm1
dx1 = 0.75d0
dx2 = 0.75d0
dx3 = 0.75d0
dx4 = 0.75d0
dx5 = 0.75d0
dy1 = 0.75d0
dy2 = 0.75d0
dy3 = 0.75d0
dy4 = 0.75d0
dy5 = 0.75d0
dz1 = 1.0d0
dz2 = 1.0d0
dz3 = 1.0d0
dz4 = 1.0d0
dz5 = 1.0d0
dxmax = dmax1 (dx3,dx4)
dymax = dmax1 (dy2,dy4)
dzmax = dmax1 (dz2,dz3)
dssp = 0.25d0 * dmax1 (dx1,dmax1 (dy1,dz1))
c4dssp = 4.0d0 * dssp
c5dssp = 5.0d0 * dssp
dttx1 = dt * tx1
dttx2 = dt * tx2
dtty1 = dt * ty1
dtty2 = dt * ty2
dttz1 = dt * tz1
dttz2 = dt * tz2
c2dttx1 = 2.0d0 * dttx1
c2dtty1 = 2.0d0 * dtty1
c2dttz1 = 2.0d0 * dttz1
dtdssp = dt * dssp
comz1 = dtdssp
comz4 = 4.0d0 * dtdssp
comz5 = 5.0d0 * dtdssp
comz6 = 6.0d0 * dtdssp
c3c4tx3 = c3c4 * tx3
c3c4ty3 = c3c4 * ty3
c3c4tz3 = c3c4 * tz3
dx1tx1 = dx1 * tx1
dx2tx1 = dx2 * tx1
dx3tx1 = dx3 * tx1
dx4tx1 = dx4 * tx1
dx5tx1 = dx5 * tx1
dy1ty1 = dy1 * ty1
dy2ty1 = dy2 * ty1
dy3ty1 = dy3 * ty1
dy4ty1 = dy4 * ty1
dy5ty1 = dy5 * ty1
dz1tz1 = dz1 * tz1
dz2tz1 = dz2 * tz1
dz3tz1 = dz3 * tz1
dz4tz1 = dz4 * tz1
dz5tz1 = dz5 * tz1
c2iv = 2.5d0
con43 = 4.0d0 / 3.0d0
con16 = 1.0d0 / 6.0d0
xxcon1 = c3c4tx3 * con43 * tx3
xxcon2 = c3c4tx3 * tx3
xxcon3 = c3c4tx3 * conz1 * tx3
xxcon4 = c3c4tx3 * con16 * tx3
xxcon5 = c3c4tx3 * c1c5 * tx3
yycon1 = c3c4ty3 * con43 * ty3
yycon2 = c3c4ty3 * ty3
yycon3 = c3c4ty3 * conz1 * ty3
yycon4 = c3c4ty3 * con16 * ty3
yycon5 = c3c4ty3 * c1c5 * ty3
zzcon1 = c3c4tz3 * con43 * tz3
zzcon2 = c3c4tz3 * tz3
zzcon3 = c3c4tz3 * conz1 * tz3
zzcon4 = c3c4tz3 * con16 * tz3
zzcon5 = c3c4tz3 * c1c5 * tz3
return
end

View File

@@ -0,0 +1,84 @@
!---------------------------------------------------------------------
!---------------------------------------------------------------------
subroutine timer_clear (n)
!---------------------------------------------------------------------
!---------------------------------------------------------------------
implicit none
integer n
double precision start(64),elapsed(64)
common /tt/start,elapsed
elapsed(n) = 0.0
return
end
!---------------------------------------------------------------------
!---------------------------------------------------------------------
subroutine timer_start (n)
!---------------------------------------------------------------------
!---------------------------------------------------------------------
implicit none
external elapsed_time
double precision elapsed_time
integer n
double precision start(64),elapsed(64)
common /tt/start,elapsed
start(n) = elapsed_time ()
return
end
!---------------------------------------------------------------------
!---------------------------------------------------------------------
subroutine timer_stop (n)
!---------------------------------------------------------------------
!---------------------------------------------------------------------
implicit none
external elapsed_time
double precision elapsed_time
integer n
double precision start(64),elapsed(64)
common /tt/start,elapsed
double precision t,now
now = elapsed_time ()
t = now - start(n)
elapsed(n) = elapsed(n) + t
return
end
!---------------------------------------------------------------------
!---------------------------------------------------------------------
double precision function timer_read (n)
!---------------------------------------------------------------------
!---------------------------------------------------------------------
implicit none
integer n
double precision start(64),elapsed(64)
common /tt/start,elapsed
timer_read = elapsed(n)
return
end
!---------------------------------------------------------------------
!---------------------------------------------------------------------
double precision function elapsed_time ()
!---------------------------------------------------------------------
!---------------------------------------------------------------------
implicit none
double precision t,dvtime
integer dvm_debug
! dvm_debug = 0 - standard mode, dvm_debug > 0 - debugging mode
parameter (dvm_debug = 0)
data t/0.d0/
t = dvtime ()
elapsed_time = t
return
end

View File

@@ -0,0 +1,312 @@
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! verification routine
!---------------------------------------------------------------------
subroutine verify (no_time_steps, class, verified)
include 'header3d.h'
double precision xcrref(5),xceref(5),xcrdif(5),xcedif(5),epsilon,
&xce(5),xcr(5),dtref
integer m,no_time_steps
character class
logical verified
!---------------------------------------------------------------------
! tolerance level
!---------------------------------------------------------------------
epsilon = 1.0d-08
!---------------------------------------------------------------------
! compute the error norm and the residual norm, and exit if not printing
!---------------------------------------------------------------------
call error_norm(xce)
call compute_rhs()
call rhs_norm(xcr)
do m = 1,5
xcr(m) = xcr(m) / dt
enddo
class = 'U'
verified = .TRUE.
do m = 1,5
xcrref(m) = 1.0
xceref(m) = 1.0
enddo
!---------------------------------------------------------------------
! reference data for 12X12X12 grids after 100 time steps, with DT = 1.0d-02
!---------------------------------------------------------------------
if (problem_size .eq. 12 .and. problem_size .eq. 12 .and. problem_
&size .eq. 12 .and. no_time_steps .eq. 60) then
class = 'S'
dtref = 1.0d-2
!---------------------------------------------------------------------
! Reference values of RMS-norms of residual.
!---------------------------------------------------------------------
xcrref(1) = 1.7034283709541311d-01
xcrref(2) = 1.2975252070034097d-02
xcrref(3) = 3.2527926989486055d-02
xcrref(4) = 2.6436421275166801d-02
xcrref(5) = 1.9211784131744430d-01
!---------------------------------------------------------------------
! Reference values of RMS-norms of solution error.
!---------------------------------------------------------------------
xceref(1) = 4.9976913345811579d-04
xceref(2) = 4.5195666782961927d-05
xceref(3) = 7.3973765172921357d-05
xceref(4) = 7.3821238632439731d-05
xceref(5) = 8.9269630987491446d-04
!---------------------------------------------------------------------
! reference data for 24X24X24 grids after 200 time steps, with DT = 0.8d-3
!---------------------------------------------------------------------
else if (problem_size .eq. 24 .and. problem_size .eq. 24 .and. pro
&blem_size .eq. 24 .and. no_time_steps .eq. 200) then
class = 'W'
dtref = 0.8d-3
!---------------------------------------------------------------------
! Reference values of RMS-norms of residual.
!---------------------------------------------------------------------
xcrref(1) = 0.1125590409344d+03
xcrref(2) = 0.1180007595731d+02
xcrref(3) = 0.2710329767846d+02
xcrref(4) = 0.2469174937669d+02
xcrref(5) = 0.2638427874317d+03
!---------------------------------------------------------------------
! Reference values of RMS-norms of solution error.
!---------------------------------------------------------------------
xceref(1) = 0.4419655736008d+01
xceref(2) = 0.4638531260002d+00
xceref(3) = 0.1011551749967d+01
xceref(4) = 0.9235878729944d+00
xceref(5) = 0.1018045837718d+02
!---------------------------------------------------------------------
! reference data for 64X64X64 grids after 200 time steps, with DT = 0.8d-3
!---------------------------------------------------------------------
else if (problem_size .eq. 64 .and. problem_size .eq. 64 .and. pro
&blem_size .eq. 64 .and. no_time_steps .eq. 200) then
class = 'A'
dtref = 0.8d-3
!---------------------------------------------------------------------
! Reference values of RMS-norms of residual.
!---------------------------------------------------------------------
xcrref(1) = 1.0806346714637264d+02
xcrref(2) = 1.1319730901220813d+01
xcrref(3) = 2.5974354511582465d+01
xcrref(4) = 2.3665622544678910d+01
xcrref(5) = 2.5278963211748344d+02
!---------------------------------------------------------------------
! Reference values of RMS-norms of solution error.
!---------------------------------------------------------------------
xceref(1) = 4.2348416040525025d+00
xceref(2) = 4.4390282496995698d-01
xceref(3) = 9.6692480136345650d-01
xceref(4) = 8.8302063039765474d-01
xceref(5) = 9.7379901770829278d+00
!---------------------------------------------------------------------
! reference data for 102X102X102 grids after 200 time steps,
! with DT = 3.0d-04
!---------------------------------------------------------------------
else if (problem_size .eq. 102 .and. problem_size .eq. 102 .and. p
&roblem_size .eq. 102 .and. no_time_steps .eq. 200) then
class = 'B'
dtref = 3.0d-4
!---------------------------------------------------------------------
! Reference values of RMS-norms of residual.
!---------------------------------------------------------------------
xcrref(1) = 1.4233597229287254d+03
xcrref(2) = 9.9330522590150238d+01
xcrref(3) = 3.5646025644535285d+02
xcrref(4) = 3.2485447959084092d+02
xcrref(5) = 3.2707541254659363d+03
!---------------------------------------------------------------------
! Reference values of RMS-norms of solution error.
!---------------------------------------------------------------------
xceref(1) = 5.2969847140936856d+01
xceref(2) = 4.4632896115670668d+00
xceref(3) = 1.3122573342210174d+01
xceref(4) = 1.2006925323559144d+01
xceref(5) = 1.2459576151035986d+02
!---------------------------------------------------------------------
! reference data for 162X162X162 grids after 200 time steps,
! with DT = 1.0d-04
!---------------------------------------------------------------------
else if (problem_size .eq. 162 .and. problem_size .eq. 162 .and. p
&roblem_size .eq. 162 .and. no_time_steps .eq. 200) then
class = 'C'
dtref = 1.0d-4
!---------------------------------------------------------------------
! Reference values of RMS-norms of residual.
!---------------------------------------------------------------------
xcrref(1) = 0.62398116551764615d+04
xcrref(2) = 0.50793239190423964d+03
xcrref(3) = 0.15423530093013596d+04
xcrref(4) = 0.13302387929291190d+04
xcrref(5) = 0.11604087428436455d+05
!---------------------------------------------------------------------
! Reference values of RMS-norms of solution error.
!---------------------------------------------------------------------
xceref(1) = 0.16462008369091265d+03
xceref(2) = 0.11497107903824313d+02
xceref(3) = 0.41207446207461508d+02
xceref(4) = 0.37087651059694167d+02
xceref(5) = 0.36211053051841265d+03
!---------------------------------------------------------------------
! reference data for 408x408x408 grids after 250 time steps, with DT = 0.2d-04
!---------------------------------------------------------------------
elseif ( (grid_points(1) .eq. 408) .and.
& (grid_points(2) .eq. 408) .and.
& (grid_points(3) .eq. 408) .and.
& (no_time_steps . eq. 250) ) then
class = 'D'
dtref = 0.2d-4
!---------------------------------------------------------------------
! Reference values of RMS-norms of residual.
!---------------------------------------------------------------------
xcrref(1) = 0.2533188551738d+05
xcrref(2) = 0.2346393716980d+04
xcrref(3) = 0.6294554366904d+04
xcrref(4) = 0.5352565376030d+04
xcrref(5) = 0.3905864038618d+05
!---------------------------------------------------------------------
! Reference values of RMS-norms of solution error.
!---------------------------------------------------------------------
xceref(1) = 0.3100009377557d+03
xceref(2) = 0.2424086324913d+02
xceref(3) = 0.7782212022645d+02
xceref(4) = 0.6835623860116d+02
xceref(5) = 0.6065737200368d+03
!---------------------------------------------------------------------
! reference data for 1020x1020x1020 grids after 250 time steps, with DT = 0.4d-05
!---------------------------------------------------------------------
elseif ( (grid_points(1) .eq. 1020) .and.
& (grid_points(2) .eq. 1020) .and.
& (grid_points(3) .eq. 1020) .and.
& (no_time_steps . eq. 250) ) then
class = 'E'
dtref = 0.4d-5
!---------------------------------------------------------------------
! Reference values of RMS-norms of residual.
!---------------------------------------------------------------------
xcrref(1) = 0.9795372484517d+05
xcrref(2) = 0.9739814511521d+04
xcrref(3) = 0.2467606342965d+05
xcrref(4) = 0.2092419572860d+05
xcrref(5) = 0.1392138856939d+06
!---------------------------------------------------------------------
! Reference values of RMS-norms of solution error.
!---------------------------------------------------------------------
xceref(1) = 0.4327562208414d+03
xceref(2) = 0.3699051964887d+02
xceref(3) = 0.1089845040954d+03
xceref(4) = 0.9462517622043d+02
xceref(5) = 0.7765512765309d+03
else
verified = .FALSE.
endif
!---------------------------------------------------------------------
! verification test for residuals if gridsize is either 12X12X12 or
! 64X64X64 or 102X102X102 or 162X162X162
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! Compute the difference of solution values and the known reference values.
!---------------------------------------------------------------------
do m = 1,5
xcrdif(m) = dabs ((xcr(m) - xcrref(m)) / xcrref(m))
xcedif(m) = dabs ((xce(m) - xceref(m)) / xceref(m))
enddo
!---------------------------------------------------------------------
! Output the comparison of computed results to known cases.
!---------------------------------------------------------------------
if (class .ne. 'U') then
write (unit = *,fmt = 1990) class
1990 format(' Verification being performed for class ', a)
write (unit = *,fmt = 2000) epsilon
2000 format(' accuracy setting for epsilon = ', E20.13)
if (dabs (dt - dtref) .gt. epsilon) then
verified = .FALSE.
class = 'U'
write (unit = *,fmt = 1000) dtref
1000 format(' DT does not match the reference value of ',
& E15.8)
endif
else
write (unit = *,fmt = 1995)
1995 format(' Unknown class')
endif
if (class .ne. 'U') then
write (unit = *,fmt = 2001)
else
write (unit = *,fmt = 2005)
endif
2001 format(' Comparison of RMS-norms of residual')
2005 format(' RMS-norms of residual')
do m = 1,5
if (class .eq. 'U') then
write (unit = *,fmt = 2015) m,xcr(m)
else if (xcrdif(m) .gt. epsilon .or. isnan(xcrdif(m)))then
verified = .FALSE.
write (unit = *,fmt = 2010) m,xcr(m),xcrref(m),xcrdif(m)
else
write (unit = *,fmt = 2011) m,xcr(m),xcrref(m),xcrdif(m)
endif
enddo
if (class .ne. 'U') then
write (unit = *,fmt = 2002)
else
write (unit = *,fmt = 2006)
endif
2002 format(' Comparison of RMS-norms of solution error')
2006 format(' RMS-norms of solution error')
do m = 1,5
if (class .eq. 'U') then
write (unit = *,fmt = 2015) m,xce(m)
else if (xcedif(m) .gt. epsilon .or. isnan(xcedif(m))) then
verified = .FALSE.
write (unit = *,fmt = 2010) m,xce(m),xceref(m),xcedif(m)
else
write (unit = *,fmt = 2011) m,xce(m),xceref(m),xcedif(m)
endif
enddo
2010 format(' FAILURE: ', i2, E20.13, E20.13, E20.13)
2011 format(' ', i2, E20.13, E20.13, E20.13)
2015 format(' ', i2, E20.13)
if (class .eq. 'U') then
write (unit = *,fmt = 2022)
write (unit = *,fmt = 2023)
2022 format(' No reference values provided')
2023 format(' No verification performed')
else if (verified) then
write (unit = *,fmt = 2020)
2020 format(' Verification Successful')
else
write (unit = *,fmt = 2021)
2021 format(' Verification failed')
endif
return
end

View File

@@ -0,0 +1,627 @@
!---------------------------------------------------------------------
! performs guaussian elimination on this cell.
!
! assumes that unpacking routines for non-first cells
! preload C' and rhs' from previous cell.
!
! assumed send happens outside this routine, but that
! c'(IMAX) and rhs'(IMAX) will be sent to next cell
!---------------------------------------------------------------------
subroutine x_solve ()
include 'header3d.h'
double precision pivot,coeff
integer i__0,j__1
integer m,n
double precision coeff__2
double precision pivot__3
double precision lhs_(5,5,3),u_(0:3,5)
double precision rhs_(5)
integer i,j,k,isize
isize = problem_size - 1
!---------------------------------------------------------------------
! outer most do loops - sweeping in i direction
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! begin inner most do loop
! do all the elements of the cell unless last
!---------------------------------------------------------------------
!, ACROSS(rhs(1:0,0:0,0:0,0:0),lhs__(1:0,0:0,0:0,0:0,0:0))
!DVM$ region local(lhs__)
!DVM$ PARALLEL (k,j) ON rhs(*,*,j,k),private(u_,i,rhs_,tmp1,tmp2,
!DVM$&tmp3,t1,t2,t3,tm1,tm2,pivot,coeff, tm3,i__0,j__1,tmp11,
!DVM$&tmp22,lhs_,m,n,coeff__2, pivot__3)
do k = 1,problem_size - 2
do j = 1,problem_size - 2
do m = 1,5
u_(0,m) = u(m,0,j,k)
u_(1,m) = u(m,1,j,k)
enddo
do i = 1,isize - 1
do m = 1,5
u_(2,m) = u(m,i + 1,j,k)
enddo
! if(i .ne. isize) then
tmp1 = 1.0d+00 / u_(1,1)
tmp2 = tmp1 * tmp1
tmp3 = tmp1 * tmp2
t1 = 1.0d+00 / u_(0,1)
t2 = t1 * t1
t3 = t1 * t2
tm1 = 1.0d+00 / u_(2,1)
tm2 = tm1 * tm1
tm3 = tm1 * tm2
tmp11 = dt * tx1
tmp22 = dt * tx2
lhs_(1,1,1) = (-(tmp11)) * dx1
lhs_(1,2,1) = (-(tmp22))
lhs_(1,3,1) = 0.
lhs_(1,4,1) = 0.
lhs_(1,5,1) = 0.
lhs_(2,1,1) = (-(tmp22)) * ((-(u_(0,2) * t2 * u_(0,2))) +
& c2 * 0.50d+00 * (u_(0,2) * u_(0,2) + u_(0,3) * u_(0,3) + u_(0,4)
&* u_(0,4)) * t2) - tmp11 * ((-(con43)) * c3c4 * t2 * u_(0,2))
lhs_(2,2,1) = (-(tmp22)) * ((2.0d+00 - c2) * (u_(0,2) / u
&_(0,1))) - tmp11 * con43 * c3c4 * t1 - tmp11 * dx2
lhs_(2,3,1) = (-(tmp22)) * ((-(c2)) * (u_(0,3) * t1))
lhs_(2,4,1) = (-(tmp22)) * ((-(c2)) * (u_(0,4) * t1))
lhs_(2,5,1) = (-(tmp22)) * c2
lhs_(3,1,1) = (-(tmp22)) * ((-(u_(0,2) * u_(0,3))) * t2)
&- tmp11 * ((-(c3c4)) * t2 * u_(0,3))
lhs_(3,2,1) = (-(tmp22)) * u_(0,3) * t1
lhs_(3,3,1) = (-(tmp22)) * u_(0,2) * t1 - tmp11 * c3c4 *
&t1 - tmp11 * dx3
lhs_(3,4,1) = 0.
lhs_(3,5,1) = 0.
lhs_(4,1,1) = (-(tmp22)) * ((-(u_(0,2) * u_(0,4))) * t2)
&- tmp11 * ((-(c3c4)) * t2 * u_(0,4))
lhs_(4,2,1) = (-(tmp22)) * u_(0,4) * t1
lhs_(4,3,1) = 0.
lhs_(4,4,1) = (-(tmp22)) * u_(0,2) * t1 - tmp11 * c3c4 *
&t1 - tmp11 * dx4
lhs_(4,5,1) = 0.
lhs_(5,1,1) = (-(tmp22)) * ((c2 * (u_(0,2) * u_(0,2) + u_
&(0,3) * u_(0,3) + u_(0,4) * u_(0,4)) * t2 - c1 * (u_(0,5) * t1)) *
& (u_(0,2) * t1)) - tmp11 * ((-(con43 * c3c4 - c1345)) * t3 * u_(0,
&2)** 2 - (c3c4 - c1345) * t3 * u_(0,3)** 2 - (c3c4 - c1345) * t3 *
& u_(0,4)** 2 - c1345 * t2 * u_(0,5))
lhs_(5,2,1) = (-(tmp22)) * (c1 * u_(0,5) * t1 - 0.50d+00
&* c2 * (3.0d+00 * u_(0,2) * u_(0,2) + u_(0,3) * u_(0,3) + u_(0,4)
&* u_(0,4)) * t2) - tmp11 * ((con43 * c3c4 - c1345) * t2 * u_(0,2))
lhs_(5,3,1) = (-(tmp22)) * ((-(c2)) * (u_(0,3) * u_(0,2))
& * t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,3)
lhs_(5,4,1) = (-(tmp22)) * ((-(c2)) * (u_(0,4) * u_(0,2))
& * t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,4)
lhs_(5,5,1) = (-(tmp22)) * c1 * (u_(0,2) * t1) - tmp11 *
&c1345 * t1 - tmp11 * dx5
lhs_(1,1,2) = 1.0d+00 + tmp11 * 2.0d+00 * dx1
lhs_(1,2,2) = 0.
lhs_(1,3,2) = 0.
lhs_(1,4,2) = 0.
lhs_(1,5,2) = 0.
lhs_(2,1,2) = tmp11 * 2.0d+00 * ((-(con43)) * c3c4 * tmp2
& * u_(1,2))
lhs_(2,2,2) = 1.0d+00 + tmp11 * 2.0d+00 * con43 * c3c4 *
&tmp1 + tmp11 * 2.0d+00 * dx2
lhs_(2,3,2) = 0.
lhs_(2,4,2) = 0.
lhs_(2,5,2) = 0.
lhs_(3,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1,
&3))
lhs_(3,2,2) = 0.
lhs_(3,3,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t
&mp11 * 2.0d+00 * dx3
lhs_(3,4,2) = 0.
lhs_(3,5,2) = 0.
lhs_(4,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1,
&4))
lhs_(4,2,2) = 0.
lhs_(4,3,2) = 0.
lhs_(4,4,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t
&mp11 * 2.0d+00 * dx4
lhs_(4,5,2) = tmp11 * 2.0d+00 * 0
lhs_(5,1,2) = tmp11 * 2.0d+00 * ((-(con43 * c3c4 - c1345)
&) * tmp3 * u_(1,2)** 2 - (c3c4 - c1345) * tmp3 * u_(1,3)** 2 - (c3
&c4 - c1345) * tmp3 * u_(1,4)** 2 - c1345 * tmp2 * u_(1,5))
lhs_(5,2,2) = tmp11 * 2.0d+00 * ((con43 * c3c4 - c1345) *
& tmp2 * u_(1,2))
lhs_(5,3,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u
&_(1,3)
lhs_(5,4,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u
&_(1,4)
lhs_(5,5,2) = 1.0d+00 + tmp11 * 2.0d+00 * c1345 * tmp1 +
&tmp11 * 2.0d+00 * dx5
if (i .ne. 1) then
do j__1 = 1,5
lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs
&_(1,j__1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,
&j__1,3) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1
&,3)
lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs
&_(1,j__1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,
&j__1,3) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1
&,3)
lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs
&_(1,j__1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,
&j__1,3) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1
&,3)
lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs
&_(1,j__1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,
&j__1,3) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1
&,3)
lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs
&_(1,j__1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,
&j__1,3) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1
&,3)
enddo
endif
lhs_(1,1,3) = (-(tmp11)) * dx1
lhs_(1,2,3) = tmp22
lhs_(1,3,3) = 0.
lhs_(1,4,3) = 0.
lhs_(1,5,3) = 0.
lhs_(2,1,3) = tmp22 * ((-(u_(2,2) * tm2 * u_(2,2))) + c2
&* 0.50d+00 * (u_(2,2) * u_(2,2) + u_(2,3) * u_(2,3) + u_(2,4) * u_
&(2,4)) * tm2) - tmp11 * ((-(con43)) * c3c4 * tm2 * u_(2,2))
lhs_(2,2,3) = tmp22 * ((2.0d+00 - c2) * (u_(2,2) / u_(2,1
&))) - tmp11 * con43 * c3c4 * tm1 - tmp11 * dx2
lhs_(2,3,3) = tmp22 * ((-(c2)) * (u_(2,3) * tm1))
lhs_(2,4,3) = tmp22 * ((-(c2)) * (u_(2,4) * tm1))
lhs_(2,5,3) = tmp22 * c2
lhs_(3,1,3) = tmp22 * ((-(u_(2,2) * u_(2,3))) * tm2) - tm
&p11 * ((-(c3c4)) * tm2 * u_(2,3))
lhs_(3,2,3) = tmp22 * u_(2,3) * tm1
lhs_(3,3,3) = tmp22 * u_(2,2) * tm1 - tmp11 * c3c4 * tm1
&- tmp11 * dx3
lhs_(3,4,3) = 0.
lhs_(3,5,3) = 0.
lhs_(4,1,3) = tmp22 * ((-(u_(2,2) * u_(2,4))) * tm2) - tm
&p11 * ((-(c3c4)) * tm2 * u_(2,4))
lhs_(4,2,3) = tmp22 * u_(2,4) * tm1
lhs_(4,3,3) = 0.
lhs_(4,4,3) = tmp22 * u_(2,2) * tm1 - tmp11 * c3c4 * tm1
&- tmp11 * dx4
lhs_(4,5,3) = 0.
lhs_(5,1,3) = tmp22 * ((c2 * (u_(2,2) * u_(2,2) + u_(2,3)
& * u_(2,3) + u_(2,4) * u_(2,4)) * tm2 - c1 * (u_(2,5) * tm1)) * (u
&_(2,2) * tm1)) - tmp11 * ((-(con43 * c3c4 - c1345)) * tm3 * u_(2,2
&)** 2 - (c3c4 - c1345) * tm3 * u_(2,3)** 2 - (c3c4 - c1345) * tm3
&* u_(2,4)** 2 - c1345 * tm2 * u_(2,5))
lhs_(5,2,3) = tmp22 * (c1 * u_(2,5) * tm1 - 0.50d+00 * c2
& * (3.0d+00 * u_(2,2) * u_(2,2) + u_(2,3) * u_(2,3) + u_(2,4) * u_
&(2,4)) * tm2) - tmp11 * ((con43 * c3c4 - c1345) * tm2 * u_(2,2))
lhs_(5,3,3) = tmp22 * ((-(c2)) * (u_(2,3) * u_(2,2)) * tm
&2) - tmp11 * (c3c4 - c1345) * tm2 * u_(2,3)
lhs_(5,4,3) = tmp22 * ((-(c2)) * (u_(2,4) * u_(2,2)) * tm
&2) - tmp11 * (c3c4 - c1345) * tm2 * u_(2,4)
lhs_(5,5,3) = tmp22 * c1 * (u_(2,2) * tm1) - tmp11 * c134
&5 * tm1 - tmp11 * dx5
do m = 1,5
rhs_(m) = rhs(m,i,j,k)
enddo
do i__0 = 1,5
rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i - 1
&,j,k) - lhs_(i__0,2,1) * rhs(2,i - 1,j,k) - lhs_(i__0,3,1) * rhs(3
&,i - 1,j,k) - lhs_(i__0,4,1) * rhs(4,i - 1,j,k) - lhs_(i__0,5,1) *
& rhs(5,i - 1,j,k)
enddo
pivot = 1.00d0 / lhs_(1,1,2)
lhs_(1,2,2) = lhs_(1,2,2) * pivot
lhs_(1,3,2) = lhs_(1,3,2) * pivot
lhs_(1,4,2) = lhs_(1,4,2) * pivot
lhs_(1,5,2) = lhs_(1,5,2) * pivot
lhs_(1,1,3) = lhs_(1,1,3) * pivot
lhs_(1,2,3) = lhs_(1,2,3) * pivot
lhs_(1,3,3) = lhs_(1,3,3) * pivot
lhs_(1,4,3) = lhs_(1,4,3) * pivot
lhs_(1,5,3) = lhs_(1,5,3) * pivot
rhs_(1) = rhs_(1) * pivot
coeff = lhs_(2,1,2)
lhs_(2,2,2) = lhs_(2,2,2) - coeff * lhs_(1,2,2)
lhs_(2,3,2) = lhs_(2,3,2) - coeff * lhs_(1,3,2)
lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(1,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(1,5,2)
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(1,1,3)
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(1,2,3)
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(1,3,3)
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(1,4,3)
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(1,5,3)
rhs_(2) = rhs_(2) - coeff * rhs_(1)
coeff = lhs_(3,1,2)
lhs_(3,2,2) = lhs_(3,2,2) - coeff * lhs_(1,2,2)
lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(1,3,2)
lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(1,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(1,5,2)
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(1,1,3)
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(1,2,3)
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(1,3,3)
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(1,4,3)
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(1,5,3)
rhs_(3) = rhs_(3) - coeff * rhs_(1)
coeff = lhs_(4,1,2)
lhs_(4,2,2) = lhs_(4,2,2) - coeff * lhs_(1,2,2)
lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(1,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(1,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(1,5,2)
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(1,1,3)
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(1,2,3)
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(1,3,3)
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(1,4,3)
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(1,5,3)
rhs_(4) = rhs_(4) - coeff * rhs_(1)
coeff = lhs_(5,1,2)
lhs_(5,2,2) = lhs_(5,2,2) - coeff * lhs_(1,2,2)
lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(1,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(1,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(1,5,2)
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(1,1,3)
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(1,2,3)
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(1,3,3)
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(1,4,3)
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(1,5,3)
rhs_(5) = rhs_(5) - coeff * rhs_(1)
pivot = 1.00d0 / lhs_(2,2,2)
lhs_(2,3,2) = lhs_(2,3,2) * pivot
lhs_(2,4,2) = lhs_(2,4,2) * pivot
lhs_(2,5,2) = lhs_(2,5,2) * pivot
lhs_(2,1,3) = lhs_(2,1,3) * pivot
lhs_(2,2,3) = lhs_(2,2,3) * pivot
lhs_(2,3,3) = lhs_(2,3,3) * pivot
lhs_(2,4,3) = lhs_(2,4,3) * pivot
lhs_(2,5,3) = lhs_(2,5,3) * pivot
rhs_(2) = rhs_(2) * pivot
coeff = lhs_(1,2,2)
lhs_(1,3,2) = lhs_(1,3,2) - coeff * lhs_(2,3,2)
lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(2,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(2,5,2)
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(2,1,3)
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(2,2,3)
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(2,3,3)
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(2,4,3)
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(2,5,3)
rhs_(1) = rhs_(1) - coeff * rhs_(2)
coeff = lhs_(3,2,2)
lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(2,3,2)
lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(2,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(2,5,2)
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(2,1,3)
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(2,2,3)
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(2,3,3)
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(2,4,3)
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(2,5,3)
rhs_(3) = rhs_(3) - coeff * rhs_(2)
coeff = lhs_(4,2,2)
lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(2,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(2,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(2,5,2)
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(2,1,3)
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(2,2,3)
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(2,3,3)
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(2,4,3)
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(2,5,3)
rhs_(4) = rhs_(4) - coeff * rhs_(2)
coeff = lhs_(5,2,2)
lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(2,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(2,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(2,5,2)
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(2,1,3)
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(2,2,3)
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(2,3,3)
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(2,4,3)
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(2,5,3)
rhs_(5) = rhs_(5) - coeff * rhs_(2)
pivot = 1.00d0 / lhs_(3,3,2)
lhs_(3,4,2) = lhs_(3,4,2) * pivot
lhs_(3,5,2) = lhs_(3,5,2) * pivot
lhs_(3,1,3) = lhs_(3,1,3) * pivot
lhs_(3,2,3) = lhs_(3,2,3) * pivot
lhs_(3,3,3) = lhs_(3,3,3) * pivot
lhs_(3,4,3) = lhs_(3,4,3) * pivot
lhs_(3,5,3) = lhs_(3,5,3) * pivot
rhs_(3) = rhs_(3) * pivot
coeff = lhs_(1,3,2)
lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(3,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(3,5,2)
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(3,1,3)
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(3,2,3)
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(3,3,3)
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(3,4,3)
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(3,5,3)
rhs_(1) = rhs_(1) - coeff * rhs_(3)
coeff = lhs_(2,3,2)
lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(3,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(3,5,2)
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(3,1,3)
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(3,2,3)
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(3,3,3)
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(3,4,3)
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(3,5,3)
rhs_(2) = rhs_(2) - coeff * rhs_(3)
coeff = lhs_(4,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(3,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(3,5,2)
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(3,1,3)
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(3,2,3)
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(3,3,3)
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(3,4,3)
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(3,5,3)
rhs_(4) = rhs_(4) - coeff * rhs_(3)
coeff = lhs_(5,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(3,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(3,5,2)
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(3,1,3)
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(3,2,3)
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(3,3,3)
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(3,4,3)
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(3,5,3)
rhs_(5) = rhs_(5) - coeff * rhs_(3)
pivot = 1.00d0 / lhs_(4,4,2)
lhs_(4,5,2) = lhs_(4,5,2) * pivot
lhs_(4,1,3) = lhs_(4,1,3) * pivot
lhs_(4,2,3) = lhs_(4,2,3) * pivot
lhs_(4,3,3) = lhs_(4,3,3) * pivot
lhs_(4,4,3) = lhs_(4,4,3) * pivot
lhs_(4,5,3) = lhs_(4,5,3) * pivot
rhs_(4) = rhs_(4) * pivot
coeff = lhs_(1,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(4,5,2)
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(4,1,3)
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(4,2,3)
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(4,3,3)
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(4,4,3)
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(4,5,3)
rhs_(1) = rhs_(1) - coeff * rhs_(4)
coeff = lhs_(2,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(4,5,2)
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(4,1,3)
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(4,2,3)
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(4,3,3)
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(4,4,3)
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(4,5,3)
rhs_(2) = rhs_(2) - coeff * rhs_(4)
coeff = lhs_(3,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(4,5,2)
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(4,1,3)
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(4,2,3)
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(4,3,3)
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(4,4,3)
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(4,5,3)
rhs_(3) = rhs_(3) - coeff * rhs_(4)
coeff = lhs_(5,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(4,5,2)
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(4,1,3)
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(4,2,3)
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(4,3,3)
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(4,4,3)
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(4,5,3)
rhs_(5) = rhs_(5) - coeff * rhs_(4)
pivot = 1.00d0 / lhs_(5,5,2)
lhs_(5,1,3) = lhs_(5,1,3) * pivot
lhs_(5,2,3) = lhs_(5,2,3) * pivot
lhs_(5,3,3) = lhs_(5,3,3) * pivot
lhs_(5,4,3) = lhs_(5,4,3) * pivot
lhs_(5,5,3) = lhs_(5,5,3) * pivot
rhs_(5) = rhs_(5) * pivot
coeff = lhs_(1,5,2)
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(5,1,3)
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(5,2,3)
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(5,3,3)
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(5,4,3)
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(5,5,3)
rhs_(1) = rhs_(1) - coeff * rhs_(5)
coeff = lhs_(2,5,2)
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(5,1,3)
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(5,2,3)
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(5,3,3)
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(5,4,3)
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(5,5,3)
rhs_(2) = rhs_(2) - coeff * rhs_(5)
coeff = lhs_(3,5,2)
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(5,1,3)
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(5,2,3)
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(5,3,3)
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(5,4,3)
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(5,5,3)
rhs_(3) = rhs_(3) - coeff * rhs_(5)
coeff = lhs_(4,5,2)
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(5,1,3)
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(5,2,3)
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(5,3,3)
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(5,4,3)
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(5,5,3)
rhs_(4) = rhs_(4) - coeff * rhs_(5)
do i__0 = 1,5
lhs__(i__0,1,i,j,k) = lhs_(i__0,1,3)
lhs__(i__0,2,i,j,k) = lhs_(i__0,2,3)
lhs__(i__0,3,i,j,k) = lhs_(i__0,3,3)
lhs__(i__0,4,i,j,k) = lhs_(i__0,4,3)
lhs__(i__0,5,i,j,k) = lhs_(i__0,5,3)
enddo
do m = 1,5
rhs(m,i,j,k) = rhs_(m)
u_(0,m) = u_(1,m)
u_(1,m) = u_(2,m)
enddo
enddo
! else ! ******************* else case *************************
do n = 1,5
lhs_(1,n,1) = 0.0d0
lhs_(1,n,2) = 0.0d0
lhs_(1,n,3) = 0.0d0
lhs_(2,n,1) = 0.0d0
lhs_(2,n,2) = 0.0d0
lhs_(2,n,3) = 0.0d0
lhs_(3,n,1) = 0.0d0
lhs_(3,n,2) = 0.0d0
lhs_(3,n,3) = 0.0d0
lhs_(4,n,1) = 0.0d0
lhs_(4,n,2) = 0.0d0
lhs_(4,n,3) = 0.0d0
lhs_(5,n,1) = 0.0d0
lhs_(5,n,2) = 0.0d0
lhs_(5,n,3) = 0.0d0
enddo
do m = 1,5
lhs_(m,m,2) = 1.0d0
enddo
do m = 1,5
rhs_(m) = rhs(m,i,j,k)
enddo
do i__0 = 1,5
rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i - 1,j,
&k) - lhs_(i__0,2,1) * rhs(2,i - 1,j,k) - lhs_(i__0,3,1) * rhs(3,i
&- 1,j,k) - lhs_(i__0,4,1) * rhs(4,i - 1,j,k) - lhs_(i__0,5,1) * rh
&s(5,i - 1,j,k)
enddo
do j__1 = 1,5
lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs_(1,j_
&_1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,j__1,3
&) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1,3)
lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs_(1,j_
&_1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,j__1,3
&) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1,3)
lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs_(1,j_
&_1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,j__1,3
&) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1,3)
lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs_(1,j_
&_1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,j__1,3
&) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1,3)
lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs_(1,j_
&_1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,j__1,3
&) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1,3)
enddo
!---------------------------------------------------------------------
!
!---------------------------------------------------------------------
pivot__3 = 1.00d0 / lhs_(1,1,2)
lhs_(1,2,2) = lhs_(1,2,2) * pivot__3
lhs_(1,3,2) = lhs_(1,3,2) * pivot__3
lhs_(1,4,2) = lhs_(1,4,2) * pivot__3
lhs_(1,5,2) = lhs_(1,5,2) * pivot__3
rhs_(1) = rhs_(1) * pivot__3
coeff__2 = lhs_(2,1,2)
lhs_(2,2,2) = lhs_(2,2,2) - coeff__2 * lhs_(1,2,2)
lhs_(2,3,2) = lhs_(2,3,2) - coeff__2 * lhs_(1,3,2)
lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(1,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(1,5,2)
rhs_(2) = rhs_(2) - coeff__2 * rhs_(1)
coeff__2 = lhs_(3,1,2)
lhs_(3,2,2) = lhs_(3,2,2) - coeff__2 * lhs_(1,2,2)
lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(1,3,2)
lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(1,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(1,5,2)
rhs_(3) = rhs_(3) - coeff__2 * rhs_(1)
coeff__2 = lhs_(4,1,2)
lhs_(4,2,2) = lhs_(4,2,2) - coeff__2 * lhs_(1,2,2)
lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(1,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(1,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(1,5,2)
rhs_(4) = rhs_(4) - coeff__2 * rhs_(1)
coeff__2 = lhs_(5,1,2)
lhs_(5,2,2) = lhs_(5,2,2) - coeff__2 * lhs_(1,2,2)
lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(1,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(1,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(1,5,2)
rhs_(5) = rhs_(5) - coeff__2 * rhs_(1)
pivot__3 = 1.00d0 / lhs_(2,2,2)
lhs_(2,3,2) = lhs_(2,3,2) * pivot__3
lhs_(2,4,2) = lhs_(2,4,2) * pivot__3
lhs_(2,5,2) = lhs_(2,5,2) * pivot__3
rhs_(2) = rhs_(2) * pivot__3
coeff__2 = lhs_(1,2,2)
lhs_(1,3,2) = lhs_(1,3,2) - coeff__2 * lhs_(2,3,2)
lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(2,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(2,5,2)
rhs_(1) = rhs_(1) - coeff__2 * rhs_(2)
coeff__2 = lhs_(3,2,2)
lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(2,3,2)
lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(2,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(2,5,2)
rhs_(3) = rhs_(3) - coeff__2 * rhs_(2)
coeff__2 = lhs_(4,2,2)
lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(2,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(2,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(2,5,2)
rhs_(4) = rhs_(4) - coeff__2 * rhs_(2)
coeff__2 = lhs_(5,2,2)
lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(2,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(2,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(2,5,2)
rhs_(5) = rhs_(5) - coeff__2 * rhs_(2)
pivot__3 = 1.00d0 / lhs_(3,3,2)
lhs_(3,4,2) = lhs_(3,4,2) * pivot__3
lhs_(3,5,2) = lhs_(3,5,2) * pivot__3
rhs_(3) = rhs_(3) * pivot__3
coeff__2 = lhs_(1,3,2)
lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(3,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(3,5,2)
rhs_(1) = rhs_(1) - coeff__2 * rhs_(3)
coeff__2 = lhs_(2,3,2)
lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(3,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(3,5,2)
rhs_(2) = rhs_(2) - coeff__2 * rhs_(3)
coeff__2 = lhs_(4,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(3,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(3,5,2)
rhs_(4) = rhs_(4) - coeff__2 * rhs_(3)
coeff__2 = lhs_(5,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(3,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(3,5,2)
rhs_(5) = rhs_(5) - coeff__2 * rhs_(3)
pivot__3 = 1.00d0 / lhs_(4,4,2)
lhs_(4,5,2) = lhs_(4,5,2) * pivot__3
rhs_(4) = rhs_(4) * pivot__3
coeff__2 = lhs_(1,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(4,5,2)
rhs_(1) = rhs_(1) - coeff__2 * rhs_(4)
coeff__2 = lhs_(2,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(4,5,2)
rhs_(2) = rhs_(2) - coeff__2 * rhs_(4)
coeff__2 = lhs_(3,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(4,5,2)
rhs_(3) = rhs_(3) - coeff__2 * rhs_(4)
coeff__2 = lhs_(5,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(4,5,2)
rhs_(5) = rhs_(5) - coeff__2 * rhs_(4)
pivot__3 = 1.00d0 / lhs_(5,5,2)
rhs_(5) = rhs_(5) * pivot__3
coeff__2 = lhs_(1,5,2)
rhs_(1) = rhs_(1) - coeff__2 * rhs_(5)
coeff__2 = lhs_(2,5,2)
rhs_(2) = rhs_(2) - coeff__2 * rhs_(5)
coeff__2 = lhs_(3,5,2)
rhs_(3) = rhs_(3) - coeff__2 * rhs_(5)
coeff__2 = lhs_(4,5,2)
rhs_(4) = rhs_(4) - coeff__2 * rhs_(5)
! endif
do m = 1,5
rhs(m,i,j,k) = rhs_(m)
enddo
! enddo
do i = problem_size - 2,0,(-(1))
do m = 1,5
rhs_(m) = rhs(m,i,j,k)
enddo
do m = 1,5
rhs_(m) = rhs_(m) - lhs__(m,1,i,j,k) * rhs(1,i + 1,j,k)
rhs_(m) = rhs_(m) - lhs__(m,2,i,j,k) * rhs(2,i + 1,j,k)
rhs_(m) = rhs_(m) - lhs__(m,3,i,j,k) * rhs(3,i + 1,j,k)
rhs_(m) = rhs_(m) - lhs__(m,4,i,j,k) * rhs(4,i + 1,j,k)
rhs_(m) = rhs_(m) - lhs__(m,5,i,j,k) * rhs(5,i + 1,j,k)
enddo
do m = 1,5
rhs(m,i,j,k) = rhs_(m)
enddo
enddo
enddo
enddo
!DVM$ end region
return
end

View File

@@ -0,0 +1,640 @@
!---------------------------------------------------------------------
! performs guaussian elimination on this cell.
!
! assumes that unpacking routines for non-first cells
! preload C' and rhs' from previous cell.
!
! assumed send happens outside this routine, but that
! c'(IMAX) and rhs'(IMAX) will be sent to next cell
!---------------------------------------------------------------------
subroutine x_solve ()
include 'header3d.h'
double precision pivot,coeff
integer i__0,j__1
integer m,n
double precision coeff__2
double precision pivot__3
double precision lhs_(5,5,3),u_(0:3,5)
double precision rhs_(5)
integer i,j,k,isize,low_k,high_k,k1,maxBlK
isize = problem_size - 1
if(mod((problem_size - 2), BL) .eq. 0) then
maxBlK = (problem_size - 2) / BL
else
maxblK = (problem_size - 2) / BL + 1
endif
!---------------------------------------------------------------------
! outer most do loops - sweeping in i direction
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! begin inner most do loop
! do all the elements of the cell unless last
!---------------------------------------------------------------------
!, ACROSS(rhs(1:0,0:0,0:0,0:0),lhs__(1:0,0:0,0:0,0:0,0:0))
!DVM$ region local(lhs__)
!DVM$ PARALLEL (k1, j) ON rhs(*,*,j,k1),private(u_,i,rhs_,tmp1,tmp2,
!DVM$&tmp3,t1,t2,t3,tm1,tm2,pivot,coeff, tm3,i__0,j__1,tmp11,
!DVM$&tmp22,lhs_,m,n,coeff__2, pivot__3,low_k,high_k,k),
!DVM$&cuda_block(32)
do k1 = 1, maxblK
do j = 1, problem_size - 2
low_k = (k1 - 1) * BL + 1
high_k = k1 * BL
if(high_k .gt. problem_size - 2) then
high_k = problem_size - 2
endif
do k = low_k, high_k
do m = 1,5
u_(0,m) = u(m,0,j,k)
u_(1,m) = u(m,1,j,k)
enddo
do i = 1,isize - 1
do m = 1,5
u_(2,m) = u(m,i + 1,j,k)
enddo
! if(i .ne. isize) then
tmp1 = 1.0d+00 / u_(1,1)
tmp2 = tmp1 * tmp1
tmp3 = tmp1 * tmp2
t1 = 1.0d+00 / u_(0,1)
t2 = t1 * t1
t3 = t1 * t2
tm1 = 1.0d+00 / u_(2,1)
tm2 = tm1 * tm1
tm3 = tm1 * tm2
tmp11 = dt * tx1
tmp22 = dt * tx2
lhs_(1,1,1) = (-(tmp11)) * dx1
lhs_(1,2,1) = (-(tmp22))
lhs_(1,3,1) = 0.
lhs_(1,4,1) = 0.
lhs_(1,5,1) = 0.
lhs_(2,1,1) = (-(tmp22)) * ((-(u_(0,2) * t2 * u_(0,2))) +
& c2 * 0.50d+00 * (u_(0,2) * u_(0,2) + u_(0,3) * u_(0,3) + u_(0,4)
&* u_(0,4)) * t2) - tmp11 * ((-(con43)) * c3c4 * t2 * u_(0,2))
lhs_(2,2,1) = (-(tmp22)) * ((2.0d+00 - c2) * (u_(0,2) / u
&_(0,1))) - tmp11 * con43 * c3c4 * t1 - tmp11 * dx2
lhs_(2,3,1) = (-(tmp22)) * ((-(c2)) * (u_(0,3) * t1))
lhs_(2,4,1) = (-(tmp22)) * ((-(c2)) * (u_(0,4) * t1))
lhs_(2,5,1) = (-(tmp22)) * c2
lhs_(3,1,1) = (-(tmp22)) * ((-(u_(0,2) * u_(0,3))) * t2)
&- tmp11 * ((-(c3c4)) * t2 * u_(0,3))
lhs_(3,2,1) = (-(tmp22)) * u_(0,3) * t1
lhs_(3,3,1) = (-(tmp22)) * u_(0,2) * t1 - tmp11 * c3c4 *
&t1 - tmp11 * dx3
lhs_(3,4,1) = 0.
lhs_(3,5,1) = 0.
lhs_(4,1,1) = (-(tmp22)) * ((-(u_(0,2) * u_(0,4))) * t2)
&- tmp11 * ((-(c3c4)) * t2 * u_(0,4))
lhs_(4,2,1) = (-(tmp22)) * u_(0,4) * t1
lhs_(4,3,1) = 0.
lhs_(4,4,1) = (-(tmp22)) * u_(0,2) * t1 - tmp11 * c3c4 *
&t1 - tmp11 * dx4
lhs_(4,5,1) = 0.
lhs_(5,1,1) = (-(tmp22)) * ((c2 * (u_(0,2) * u_(0,2) + u_
&(0,3) * u_(0,3) + u_(0,4) * u_(0,4)) * t2 - c1 * (u_(0,5) * t1)) *
& (u_(0,2) * t1)) - tmp11 * ((-(con43 * c3c4 - c1345)) * t3 * u_(0,
&2)** 2 - (c3c4 - c1345) * t3 * u_(0,3)** 2 - (c3c4 - c1345) * t3 *
& u_(0,4)** 2 - c1345 * t2 * u_(0,5))
lhs_(5,2,1) = (-(tmp22)) * (c1 * u_(0,5) * t1 - 0.50d+00
&* c2 * (3.0d+00 * u_(0,2) * u_(0,2) + u_(0,3) * u_(0,3) + u_(0,4)
&* u_(0,4)) * t2) - tmp11 * ((con43 * c3c4 - c1345) * t2 * u_(0,2))
lhs_(5,3,1) = (-(tmp22)) * ((-(c2)) * (u_(0,3) * u_(0,2))
& * t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,3)
lhs_(5,4,1) = (-(tmp22)) * ((-(c2)) * (u_(0,4) * u_(0,2))
& * t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,4)
lhs_(5,5,1) = (-(tmp22)) * c1 * (u_(0,2) * t1) - tmp11 *
&c1345 * t1 - tmp11 * dx5
lhs_(1,1,2) = 1.0d+00 + tmp11 * 2.0d+00 * dx1
lhs_(1,2,2) = 0.
lhs_(1,3,2) = 0.
lhs_(1,4,2) = 0.
lhs_(1,5,2) = 0.
lhs_(2,1,2) = tmp11 * 2.0d+00 * ((-(con43)) * c3c4 * tmp2
& * u_(1,2))
lhs_(2,2,2) = 1.0d+00 + tmp11 * 2.0d+00 * con43 * c3c4 *
&tmp1 + tmp11 * 2.0d+00 * dx2
lhs_(2,3,2) = 0.
lhs_(2,4,2) = 0.
lhs_(2,5,2) = 0.
lhs_(3,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1,
&3))
lhs_(3,2,2) = 0.
lhs_(3,3,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t
&mp11 * 2.0d+00 * dx3
lhs_(3,4,2) = 0.
lhs_(3,5,2) = 0.
lhs_(4,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1,
&4))
lhs_(4,2,2) = 0.
lhs_(4,3,2) = 0.
lhs_(4,4,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t
&mp11 * 2.0d+00 * dx4
lhs_(4,5,2) = tmp11 * 2.0d+00 * 0
lhs_(5,1,2) = tmp11 * 2.0d+00 * ((-(con43 * c3c4 - c1345)
&) * tmp3 * u_(1,2)** 2 - (c3c4 - c1345) * tmp3 * u_(1,3)** 2 - (c3
&c4 - c1345) * tmp3 * u_(1,4)** 2 - c1345 * tmp2 * u_(1,5))
lhs_(5,2,2) = tmp11 * 2.0d+00 * ((con43 * c3c4 - c1345) *
& tmp2 * u_(1,2))
lhs_(5,3,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u
&_(1,3)
lhs_(5,4,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u
&_(1,4)
lhs_(5,5,2) = 1.0d+00 + tmp11 * 2.0d+00 * c1345 * tmp1 +
&tmp11 * 2.0d+00 * dx5
if (i .ne. 1) then
do j__1 = 1,5
lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs
&_(1,j__1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,
&j__1,3) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1
&,3)
lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs
&_(1,j__1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,
&j__1,3) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1
&,3)
lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs
&_(1,j__1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,
&j__1,3) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1
&,3)
lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs
&_(1,j__1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,
&j__1,3) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1
&,3)
lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs
&_(1,j__1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,
&j__1,3) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1
&,3)
enddo
endif
lhs_(1,1,3) = (-(tmp11)) * dx1
lhs_(1,2,3) = tmp22
lhs_(1,3,3) = 0.
lhs_(1,4,3) = 0.
lhs_(1,5,3) = 0.
lhs_(2,1,3) = tmp22 * ((-(u_(2,2) * tm2 * u_(2,2))) + c2
&* 0.50d+00 * (u_(2,2) * u_(2,2) + u_(2,3) * u_(2,3) + u_(2,4) * u_
&(2,4)) * tm2) - tmp11 * ((-(con43)) * c3c4 * tm2 * u_(2,2))
lhs_(2,2,3) = tmp22 * ((2.0d+00 - c2) * (u_(2,2) / u_(2,1
&))) - tmp11 * con43 * c3c4 * tm1 - tmp11 * dx2
lhs_(2,3,3) = tmp22 * ((-(c2)) * (u_(2,3) * tm1))
lhs_(2,4,3) = tmp22 * ((-(c2)) * (u_(2,4) * tm1))
lhs_(2,5,3) = tmp22 * c2
lhs_(3,1,3) = tmp22 * ((-(u_(2,2) * u_(2,3))) * tm2) - tm
&p11 * ((-(c3c4)) * tm2 * u_(2,3))
lhs_(3,2,3) = tmp22 * u_(2,3) * tm1
lhs_(3,3,3) = tmp22 * u_(2,2) * tm1 - tmp11 * c3c4 * tm1
&- tmp11 * dx3
lhs_(3,4,3) = 0.
lhs_(3,5,3) = 0.
lhs_(4,1,3) = tmp22 * ((-(u_(2,2) * u_(2,4))) * tm2) - tm
&p11 * ((-(c3c4)) * tm2 * u_(2,4))
lhs_(4,2,3) = tmp22 * u_(2,4) * tm1
lhs_(4,3,3) = 0.
lhs_(4,4,3) = tmp22 * u_(2,2) * tm1 - tmp11 * c3c4 * tm1
&- tmp11 * dx4
lhs_(4,5,3) = 0.
lhs_(5,1,3) = tmp22 * ((c2 * (u_(2,2) * u_(2,2) + u_(2,3)
& * u_(2,3) + u_(2,4) * u_(2,4)) * tm2 - c1 * (u_(2,5) * tm1)) * (u
&_(2,2) * tm1)) - tmp11 * ((-(con43 * c3c4 - c1345)) * tm3 * u_(2,2
&)** 2 - (c3c4 - c1345) * tm3 * u_(2,3)** 2 - (c3c4 - c1345) * tm3
&* u_(2,4)** 2 - c1345 * tm2 * u_(2,5))
lhs_(5,2,3) = tmp22 * (c1 * u_(2,5) * tm1 - 0.50d+00 * c2
& * (3.0d+00 * u_(2,2) * u_(2,2) + u_(2,3) * u_(2,3) + u_(2,4) * u_
&(2,4)) * tm2) - tmp11 * ((con43 * c3c4 - c1345) * tm2 * u_(2,2))
lhs_(5,3,3) = tmp22 * ((-(c2)) * (u_(2,3) * u_(2,2)) * tm
&2) - tmp11 * (c3c4 - c1345) * tm2 * u_(2,3)
lhs_(5,4,3) = tmp22 * ((-(c2)) * (u_(2,4) * u_(2,2)) * tm
&2) - tmp11 * (c3c4 - c1345) * tm2 * u_(2,4)
lhs_(5,5,3) = tmp22 * c1 * (u_(2,2) * tm1) - tmp11 * c134
&5 * tm1 - tmp11 * dx5
do m = 1,5
rhs_(m) = rhs(m,i,j,k)
enddo
do i__0 = 1,5
rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i - 1
&,j,k) - lhs_(i__0,2,1) * rhs(2,i - 1,j,k) - lhs_(i__0,3,1) * rhs(3
&,i - 1,j,k) - lhs_(i__0,4,1) * rhs(4,i - 1,j,k) - lhs_(i__0,5,1) *
& rhs(5,i - 1,j,k)
enddo
pivot = 1.00d0 / lhs_(1,1,2)
lhs_(1,2,2) = lhs_(1,2,2) * pivot
lhs_(1,3,2) = lhs_(1,3,2) * pivot
lhs_(1,4,2) = lhs_(1,4,2) * pivot
lhs_(1,5,2) = lhs_(1,5,2) * pivot
lhs_(1,1,3) = lhs_(1,1,3) * pivot
lhs_(1,2,3) = lhs_(1,2,3) * pivot
lhs_(1,3,3) = lhs_(1,3,3) * pivot
lhs_(1,4,3) = lhs_(1,4,3) * pivot
lhs_(1,5,3) = lhs_(1,5,3) * pivot
rhs_(1) = rhs_(1) * pivot
coeff = lhs_(2,1,2)
lhs_(2,2,2) = lhs_(2,2,2) - coeff * lhs_(1,2,2)
lhs_(2,3,2) = lhs_(2,3,2) - coeff * lhs_(1,3,2)
lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(1,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(1,5,2)
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(1,1,3)
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(1,2,3)
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(1,3,3)
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(1,4,3)
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(1,5,3)
rhs_(2) = rhs_(2) - coeff * rhs_(1)
coeff = lhs_(3,1,2)
lhs_(3,2,2) = lhs_(3,2,2) - coeff * lhs_(1,2,2)
lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(1,3,2)
lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(1,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(1,5,2)
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(1,1,3)
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(1,2,3)
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(1,3,3)
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(1,4,3)
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(1,5,3)
rhs_(3) = rhs_(3) - coeff * rhs_(1)
coeff = lhs_(4,1,2)
lhs_(4,2,2) = lhs_(4,2,2) - coeff * lhs_(1,2,2)
lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(1,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(1,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(1,5,2)
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(1,1,3)
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(1,2,3)
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(1,3,3)
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(1,4,3)
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(1,5,3)
rhs_(4) = rhs_(4) - coeff * rhs_(1)
coeff = lhs_(5,1,2)
lhs_(5,2,2) = lhs_(5,2,2) - coeff * lhs_(1,2,2)
lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(1,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(1,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(1,5,2)
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(1,1,3)
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(1,2,3)
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(1,3,3)
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(1,4,3)
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(1,5,3)
rhs_(5) = rhs_(5) - coeff * rhs_(1)
pivot = 1.00d0 / lhs_(2,2,2)
lhs_(2,3,2) = lhs_(2,3,2) * pivot
lhs_(2,4,2) = lhs_(2,4,2) * pivot
lhs_(2,5,2) = lhs_(2,5,2) * pivot
lhs_(2,1,3) = lhs_(2,1,3) * pivot
lhs_(2,2,3) = lhs_(2,2,3) * pivot
lhs_(2,3,3) = lhs_(2,3,3) * pivot
lhs_(2,4,3) = lhs_(2,4,3) * pivot
lhs_(2,5,3) = lhs_(2,5,3) * pivot
rhs_(2) = rhs_(2) * pivot
coeff = lhs_(1,2,2)
lhs_(1,3,2) = lhs_(1,3,2) - coeff * lhs_(2,3,2)
lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(2,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(2,5,2)
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(2,1,3)
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(2,2,3)
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(2,3,3)
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(2,4,3)
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(2,5,3)
rhs_(1) = rhs_(1) - coeff * rhs_(2)
coeff = lhs_(3,2,2)
lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(2,3,2)
lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(2,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(2,5,2)
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(2,1,3)
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(2,2,3)
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(2,3,3)
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(2,4,3)
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(2,5,3)
rhs_(3) = rhs_(3) - coeff * rhs_(2)
coeff = lhs_(4,2,2)
lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(2,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(2,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(2,5,2)
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(2,1,3)
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(2,2,3)
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(2,3,3)
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(2,4,3)
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(2,5,3)
rhs_(4) = rhs_(4) - coeff * rhs_(2)
coeff = lhs_(5,2,2)
lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(2,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(2,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(2,5,2)
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(2,1,3)
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(2,2,3)
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(2,3,3)
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(2,4,3)
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(2,5,3)
rhs_(5) = rhs_(5) - coeff * rhs_(2)
pivot = 1.00d0 / lhs_(3,3,2)
lhs_(3,4,2) = lhs_(3,4,2) * pivot
lhs_(3,5,2) = lhs_(3,5,2) * pivot
lhs_(3,1,3) = lhs_(3,1,3) * pivot
lhs_(3,2,3) = lhs_(3,2,3) * pivot
lhs_(3,3,3) = lhs_(3,3,3) * pivot
lhs_(3,4,3) = lhs_(3,4,3) * pivot
lhs_(3,5,3) = lhs_(3,5,3) * pivot
rhs_(3) = rhs_(3) * pivot
coeff = lhs_(1,3,2)
lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(3,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(3,5,2)
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(3,1,3)
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(3,2,3)
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(3,3,3)
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(3,4,3)
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(3,5,3)
rhs_(1) = rhs_(1) - coeff * rhs_(3)
coeff = lhs_(2,3,2)
lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(3,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(3,5,2)
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(3,1,3)
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(3,2,3)
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(3,3,3)
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(3,4,3)
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(3,5,3)
rhs_(2) = rhs_(2) - coeff * rhs_(3)
coeff = lhs_(4,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(3,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(3,5,2)
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(3,1,3)
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(3,2,3)
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(3,3,3)
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(3,4,3)
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(3,5,3)
rhs_(4) = rhs_(4) - coeff * rhs_(3)
coeff = lhs_(5,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(3,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(3,5,2)
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(3,1,3)
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(3,2,3)
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(3,3,3)
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(3,4,3)
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(3,5,3)
rhs_(5) = rhs_(5) - coeff * rhs_(3)
pivot = 1.00d0 / lhs_(4,4,2)
lhs_(4,5,2) = lhs_(4,5,2) * pivot
lhs_(4,1,3) = lhs_(4,1,3) * pivot
lhs_(4,2,3) = lhs_(4,2,3) * pivot
lhs_(4,3,3) = lhs_(4,3,3) * pivot
lhs_(4,4,3) = lhs_(4,4,3) * pivot
lhs_(4,5,3) = lhs_(4,5,3) * pivot
rhs_(4) = rhs_(4) * pivot
coeff = lhs_(1,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(4,5,2)
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(4,1,3)
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(4,2,3)
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(4,3,3)
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(4,4,3)
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(4,5,3)
rhs_(1) = rhs_(1) - coeff * rhs_(4)
coeff = lhs_(2,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(4,5,2)
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(4,1,3)
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(4,2,3)
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(4,3,3)
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(4,4,3)
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(4,5,3)
rhs_(2) = rhs_(2) - coeff * rhs_(4)
coeff = lhs_(3,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(4,5,2)
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(4,1,3)
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(4,2,3)
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(4,3,3)
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(4,4,3)
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(4,5,3)
rhs_(3) = rhs_(3) - coeff * rhs_(4)
coeff = lhs_(5,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(4,5,2)
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(4,1,3)
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(4,2,3)
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(4,3,3)
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(4,4,3)
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(4,5,3)
rhs_(5) = rhs_(5) - coeff * rhs_(4)
pivot = 1.00d0 / lhs_(5,5,2)
lhs_(5,1,3) = lhs_(5,1,3) * pivot
lhs_(5,2,3) = lhs_(5,2,3) * pivot
lhs_(5,3,3) = lhs_(5,3,3) * pivot
lhs_(5,4,3) = lhs_(5,4,3) * pivot
lhs_(5,5,3) = lhs_(5,5,3) * pivot
rhs_(5) = rhs_(5) * pivot
coeff = lhs_(1,5,2)
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(5,1,3)
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(5,2,3)
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(5,3,3)
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(5,4,3)
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(5,5,3)
rhs_(1) = rhs_(1) - coeff * rhs_(5)
coeff = lhs_(2,5,2)
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(5,1,3)
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(5,2,3)
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(5,3,3)
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(5,4,3)
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(5,5,3)
rhs_(2) = rhs_(2) - coeff * rhs_(5)
coeff = lhs_(3,5,2)
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(5,1,3)
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(5,2,3)
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(5,3,3)
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(5,4,3)
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(5,5,3)
rhs_(3) = rhs_(3) - coeff * rhs_(5)
coeff = lhs_(4,5,2)
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(5,1,3)
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(5,2,3)
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(5,3,3)
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(5,4,3)
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(5,5,3)
rhs_(4) = rhs_(4) - coeff * rhs_(5)
do i__0 = 1,5
lhs__(i__0,1,i,j,k1) = lhs_(i__0,1,3)
lhs__(i__0,2,i,j,k1) = lhs_(i__0,2,3)
lhs__(i__0,3,i,j,k1) = lhs_(i__0,3,3)
lhs__(i__0,4,i,j,k1) = lhs_(i__0,4,3)
lhs__(i__0,5,i,j,k1) = lhs_(i__0,5,3)
enddo
do m = 1,5
rhs(m,i,j,k) = rhs_(m)
u_(0,m) = u_(1,m)
u_(1,m) = u_(2,m)
enddo
enddo
! else ! ******************* else case *************************
do n = 1,5
lhs_(1,n,1) = 0.0d0
lhs_(1,n,2) = 0.0d0
lhs_(1,n,3) = 0.0d0
lhs_(2,n,1) = 0.0d0
lhs_(2,n,2) = 0.0d0
lhs_(2,n,3) = 0.0d0
lhs_(3,n,1) = 0.0d0
lhs_(3,n,2) = 0.0d0
lhs_(3,n,3) = 0.0d0
lhs_(4,n,1) = 0.0d0
lhs_(4,n,2) = 0.0d0
lhs_(4,n,3) = 0.0d0
lhs_(5,n,1) = 0.0d0
lhs_(5,n,2) = 0.0d0
lhs_(5,n,3) = 0.0d0
enddo
do m = 1,5
lhs_(m,m,2) = 1.0d0
enddo
do m = 1,5
rhs_(m) = rhs(m,i,j,k)
enddo
do i__0 = 1,5
rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i - 1,j,
&k) - lhs_(i__0,2,1) * rhs(2,i - 1,j,k) - lhs_(i__0,3,1) * rhs(3,i
&- 1,j,k) - lhs_(i__0,4,1) * rhs(4,i - 1,j,k) - lhs_(i__0,5,1) * rh
&s(5,i - 1,j,k)
enddo
do j__1 = 1,5
lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs_(1,j_
&_1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,j__1,3
&) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1,3)
lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs_(1,j_
&_1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,j__1,3
&) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1,3)
lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs_(1,j_
&_1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,j__1,3
&) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1,3)
lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs_(1,j_
&_1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,j__1,3
&) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1,3)
lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs_(1,j_
&_1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,j__1,3
&) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1,3)
enddo
!---------------------------------------------------------------------
!
!---------------------------------------------------------------------
pivot__3 = 1.00d0 / lhs_(1,1,2)
lhs_(1,2,2) = lhs_(1,2,2) * pivot__3
lhs_(1,3,2) = lhs_(1,3,2) * pivot__3
lhs_(1,4,2) = lhs_(1,4,2) * pivot__3
lhs_(1,5,2) = lhs_(1,5,2) * pivot__3
rhs_(1) = rhs_(1) * pivot__3
coeff__2 = lhs_(2,1,2)
lhs_(2,2,2) = lhs_(2,2,2) - coeff__2 * lhs_(1,2,2)
lhs_(2,3,2) = lhs_(2,3,2) - coeff__2 * lhs_(1,3,2)
lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(1,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(1,5,2)
rhs_(2) = rhs_(2) - coeff__2 * rhs_(1)
coeff__2 = lhs_(3,1,2)
lhs_(3,2,2) = lhs_(3,2,2) - coeff__2 * lhs_(1,2,2)
lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(1,3,2)
lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(1,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(1,5,2)
rhs_(3) = rhs_(3) - coeff__2 * rhs_(1)
coeff__2 = lhs_(4,1,2)
lhs_(4,2,2) = lhs_(4,2,2) - coeff__2 * lhs_(1,2,2)
lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(1,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(1,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(1,5,2)
rhs_(4) = rhs_(4) - coeff__2 * rhs_(1)
coeff__2 = lhs_(5,1,2)
lhs_(5,2,2) = lhs_(5,2,2) - coeff__2 * lhs_(1,2,2)
lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(1,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(1,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(1,5,2)
rhs_(5) = rhs_(5) - coeff__2 * rhs_(1)
pivot__3 = 1.00d0 / lhs_(2,2,2)
lhs_(2,3,2) = lhs_(2,3,2) * pivot__3
lhs_(2,4,2) = lhs_(2,4,2) * pivot__3
lhs_(2,5,2) = lhs_(2,5,2) * pivot__3
rhs_(2) = rhs_(2) * pivot__3
coeff__2 = lhs_(1,2,2)
lhs_(1,3,2) = lhs_(1,3,2) - coeff__2 * lhs_(2,3,2)
lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(2,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(2,5,2)
rhs_(1) = rhs_(1) - coeff__2 * rhs_(2)
coeff__2 = lhs_(3,2,2)
lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(2,3,2)
lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(2,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(2,5,2)
rhs_(3) = rhs_(3) - coeff__2 * rhs_(2)
coeff__2 = lhs_(4,2,2)
lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(2,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(2,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(2,5,2)
rhs_(4) = rhs_(4) - coeff__2 * rhs_(2)
coeff__2 = lhs_(5,2,2)
lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(2,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(2,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(2,5,2)
rhs_(5) = rhs_(5) - coeff__2 * rhs_(2)
pivot__3 = 1.00d0 / lhs_(3,3,2)
lhs_(3,4,2) = lhs_(3,4,2) * pivot__3
lhs_(3,5,2) = lhs_(3,5,2) * pivot__3
rhs_(3) = rhs_(3) * pivot__3
coeff__2 = lhs_(1,3,2)
lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(3,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(3,5,2)
rhs_(1) = rhs_(1) - coeff__2 * rhs_(3)
coeff__2 = lhs_(2,3,2)
lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(3,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(3,5,2)
rhs_(2) = rhs_(2) - coeff__2 * rhs_(3)
coeff__2 = lhs_(4,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(3,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(3,5,2)
rhs_(4) = rhs_(4) - coeff__2 * rhs_(3)
coeff__2 = lhs_(5,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(3,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(3,5,2)
rhs_(5) = rhs_(5) - coeff__2 * rhs_(3)
pivot__3 = 1.00d0 / lhs_(4,4,2)
lhs_(4,5,2) = lhs_(4,5,2) * pivot__3
rhs_(4) = rhs_(4) * pivot__3
coeff__2 = lhs_(1,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(4,5,2)
rhs_(1) = rhs_(1) - coeff__2 * rhs_(4)
coeff__2 = lhs_(2,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(4,5,2)
rhs_(2) = rhs_(2) - coeff__2 * rhs_(4)
coeff__2 = lhs_(3,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(4,5,2)
rhs_(3) = rhs_(3) - coeff__2 * rhs_(4)
coeff__2 = lhs_(5,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(4,5,2)
rhs_(5) = rhs_(5) - coeff__2 * rhs_(4)
pivot__3 = 1.00d0 / lhs_(5,5,2)
rhs_(5) = rhs_(5) * pivot__3
coeff__2 = lhs_(1,5,2)
rhs_(1) = rhs_(1) - coeff__2 * rhs_(5)
coeff__2 = lhs_(2,5,2)
rhs_(2) = rhs_(2) - coeff__2 * rhs_(5)
coeff__2 = lhs_(3,5,2)
rhs_(3) = rhs_(3) - coeff__2 * rhs_(5)
coeff__2 = lhs_(4,5,2)
rhs_(4) = rhs_(4) - coeff__2 * rhs_(5)
! endif
do m = 1,5
rhs(m,i,j,k) = rhs_(m)
enddo
! enddo
do i = problem_size - 2,0,(-(1))
do m = 1,5
rhs_(m) = rhs(m,i,j,k)
enddo
do m = 1,5
rhs_(m) = rhs_(m) - lhs__(m,1,i,j,k1) * rhs(1,i + 1,j,k)
rhs_(m) = rhs_(m) - lhs__(m,2,i,j,k1) * rhs(2,i + 1,j,k)
rhs_(m) = rhs_(m) - lhs__(m,3,i,j,k1) * rhs(3,i + 1,j,k)
rhs_(m) = rhs_(m) - lhs__(m,4,i,j,k1) * rhs(4,i + 1,j,k)
rhs_(m) = rhs_(m) - lhs__(m,5,i,j,k1) * rhs(5,i + 1,j,k)
enddo
do m = 1,5
rhs(m,i,j,k) = rhs_(m)
enddo
enddo
enddo
enddo
enddo
!DVM$ end region
return
end

View File

@@ -0,0 +1,640 @@
!---------------------------------------------------------------------
! performs guaussian elimination on this cell.
!
! assumes that unpacking routines for non-first cells
! preload C' and rhs' from previous cell.
!
! assumed send happens outside this routine, but that
! c'(IMAX) and rhs'(IMAX) will be sent to next cell
!---------------------------------------------------------------------
subroutine x_solve ()
include 'header3d.h'
double precision pivot,coeff
integer i__0,j__1
integer m,n
double precision coeff__2
double precision pivot__3
double precision lhs_(5,5,3),u_(0:3,5)
double precision rhs_(5),rhsp_(5)
integer i,j,k,isize
isize = problem_size - 1
!---------------------------------------------------------------------
! outer most do loops - sweeping in i direction
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! begin inner most do loop
! do all the elements of the cell unless last
!---------------------------------------------------------------------
!DVM$ region local(lhs__)
!DVM$ PARALLEL (k,j,i) ON rhs(*,i,j,k),private(u_,rhs_,tmp1,tmp2,
!DVM$&tmp3,t1,t2,t3,tm1,tm2,pivot,coeff, tm3,i__0,j__1,tmp11,
!DVM$&tmp22,lhs_,m,n,coeff__2, pivot__3),stage(stage_n),
!DVM$& ACROSS(rhs(0:0,1:0,0:0,0:0),lhs__(0:0,0:0,1:0,0:0,0:0))
do k = 1,problem_size - 2
do j = 1,problem_size - 2
do i = 1,isize - 1
if(i .ne. isize) then
do m = 1,5
lhs_(m,1,3) = lhs__(m,1,i-1,j,k)
lhs_(m,2,3) = lhs__(m,2,i-1,j,k)
lhs_(m,3,3) = lhs__(m,3,i-1,j,k)
lhs_(m,4,3) = lhs__(m,4,i-1,j,k)
lhs_(m,5,3) = lhs__(m,5,i-1,j,k)
u_(0,m) = u(m,i-1,j,k)
u_(1,m) = u(m,i,j,k)
u_(2,m) = u(m,i+1,j,k)
enddo
tmp1 = 1.0d+00 / u_(1,1)
tmp2 = tmp1 * tmp1
tmp3 = tmp1 * tmp2
t1 = 1.0d+00 / u_(0,1)
t2 = t1 * t1
t3 = t1 * t2
tm1 = 1.0d+00 / u_(2,1)
tm2 = tm1 * tm1
tm3 = tm1 * tm2
tmp11 = dt * tx1
tmp22 = dt * tx2
lhs_(1,1,1) = (-(tmp11)) * dx1
lhs_(1,2,1) = (-(tmp22))
lhs_(1,3,1) = 0.
lhs_(1,4,1) = 0.
lhs_(1,5,1) = 0.
lhs_(2,1,1) = (-(tmp22)) * ((-(u_(0,2) * t2 * u_(0,2))) +
& c2 * 0.50d+00 * (u_(0,2) * u_(0,2) + u_(0,3) * u_(0,3) + u_(0,4)
&* u_(0,4)) * t2) - tmp11 * ((-(con43)) * c3c4 * t2 * u_(0,2))
lhs_(2,2,1) = (-(tmp22)) * ((2.0d+00 - c2) * (u_(0,2) / u
&_(0,1))) - tmp11 * con43 * c3c4 * t1 - tmp11 * dx2
lhs_(2,3,1) = (-(tmp22)) * ((-(c2)) * (u_(0,3) * t1))
lhs_(2,4,1) = (-(tmp22)) * ((-(c2)) * (u_(0,4) * t1))
lhs_(2,5,1) = (-(tmp22)) * c2
lhs_(3,1,1) = (-(tmp22)) * ((-(u_(0,2) * u_(0,3))) * t2)
&- tmp11 * ((-(c3c4)) * t2 * u_(0,3))
lhs_(3,2,1) = (-(tmp22)) * u_(0,3) * t1
lhs_(3,3,1) = (-(tmp22)) * u_(0,2) * t1 - tmp11 * c3c4 *
&t1 - tmp11 * dx3
lhs_(3,4,1) = 0.
lhs_(3,5,1) = 0.
lhs_(4,1,1) = (-(tmp22)) * ((-(u_(0,2) * u_(0,4))) * t2)
&- tmp11 * ((-(c3c4)) * t2 * u_(0,4))
lhs_(4,2,1) = (-(tmp22)) * u_(0,4) * t1
lhs_(4,3,1) = 0.
lhs_(4,4,1) = (-(tmp22)) * u_(0,2) * t1 - tmp11 * c3c4 *
&t1 - tmp11 * dx4
lhs_(4,5,1) = 0.
lhs_(5,1,1) = (-(tmp22)) * ((c2 * (u_(0,2) * u_(0,2) + u_
&(0,3) * u_(0,3) + u_(0,4) * u_(0,4)) * t2 - c1 * (u_(0,5) * t1)) *
& (u_(0,2) * t1)) - tmp11 * ((-(con43 * c3c4 - c1345)) * t3 * u_(0,
&2)** 2 - (c3c4 - c1345) * t3 * u_(0,3)** 2 - (c3c4 - c1345) * t3 *
& u_(0,4)** 2 - c1345 * t2 * u_(0,5))
lhs_(5,2,1) = (-(tmp22)) * (c1 * u_(0,5) * t1 - 0.50d+00
&* c2 * (3.0d+00 * u_(0,2) * u_(0,2) + u_(0,3) * u_(0,3) + u_(0,4)
&* u_(0,4)) * t2) - tmp11 * ((con43 * c3c4 - c1345) * t2 * u_(0,2))
lhs_(5,3,1) = (-(tmp22)) * ((-(c2)) * (u_(0,3) * u_(0,2))
& * t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,3)
lhs_(5,4,1) = (-(tmp22)) * ((-(c2)) * (u_(0,4) * u_(0,2))
& * t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,4)
lhs_(5,5,1) = (-(tmp22)) * c1 * (u_(0,2) * t1) - tmp11 *
&c1345 * t1 - tmp11 * dx5
lhs_(1,1,2) = 1.0d+00 + tmp11 * 2.0d+00 * dx1
lhs_(1,2,2) = 0.
lhs_(1,3,2) = 0.
lhs_(1,4,2) = 0.
lhs_(1,5,2) = 0.
lhs_(2,1,2) = tmp11 * 2.0d+00 * ((-(con43)) * c3c4 * tmp2
& * u_(1,2))
lhs_(2,2,2) = 1.0d+00 + tmp11 * 2.0d+00 * con43 * c3c4 *
&tmp1 + tmp11 * 2.0d+00 * dx2
lhs_(2,3,2) = 0.
lhs_(2,4,2) = 0.
lhs_(2,5,2) = 0.
lhs_(3,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1,
&3))
lhs_(3,2,2) = 0.
lhs_(3,3,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t
&mp11 * 2.0d+00 * dx3
lhs_(3,4,2) = 0.
lhs_(3,5,2) = 0.
lhs_(4,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1,
&4))
lhs_(4,2,2) = 0.
lhs_(4,3,2) = 0.
lhs_(4,4,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t
&mp11 * 2.0d+00 * dx4
lhs_(4,5,2) = tmp11 * 2.0d+00 * 0
lhs_(5,1,2) = tmp11 * 2.0d+00 * ((-(con43 * c3c4 - c1345)
&) * tmp3 * u_(1,2)** 2 - (c3c4 - c1345) * tmp3 * u_(1,3)** 2 - (c3
&c4 - c1345) * tmp3 * u_(1,4)** 2 - c1345 * tmp2 * u_(1,5))
lhs_(5,2,2) = tmp11 * 2.0d+00 * ((con43 * c3c4 - c1345) *
& tmp2 * u_(1,2))
lhs_(5,3,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u
&_(1,3)
lhs_(5,4,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u
&_(1,4)
lhs_(5,5,2) = 1.0d+00 + tmp11 * 2.0d+00 * c1345 * tmp1 +
&tmp11 * 2.0d+00 * dx5
if (i .ne. 1) then
do j__1 = 1,5
lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs
&_(1,j__1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,
&j__1,3) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1
&,3)
lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs
&_(1,j__1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,
&j__1,3) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1
&,3)
lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs
&_(1,j__1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,
&j__1,3) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1
&,3)
lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs
&_(1,j__1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,
&j__1,3) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1
&,3)
lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs
&_(1,j__1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,
&j__1,3) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1
&,3)
enddo
endif
lhs_(1,1,3) = (-(tmp11)) * dx1
lhs_(1,2,3) = tmp22
lhs_(1,3,3) = 0.
lhs_(1,4,3) = 0.
lhs_(1,5,3) = 0.
lhs_(2,1,3) = tmp22 * ((-(u_(2,2) * tm2 * u_(2,2))) + c2
&* 0.50d+00 * (u_(2,2) * u_(2,2) + u_(2,3) * u_(2,3) + u_(2,4) * u_
&(2,4)) * tm2) - tmp11 * ((-(con43)) * c3c4 * tm2 * u_(2,2))
lhs_(2,2,3) = tmp22 * ((2.0d+00 - c2) * (u_(2,2) / u_(2,1
&))) - tmp11 * con43 * c3c4 * tm1 - tmp11 * dx2
lhs_(2,3,3) = tmp22 * ((-(c2)) * (u_(2,3) * tm1))
lhs_(2,4,3) = tmp22 * ((-(c2)) * (u_(2,4) * tm1))
lhs_(2,5,3) = tmp22 * c2
lhs_(3,1,3) = tmp22 * ((-(u_(2,2) * u_(2,3))) * tm2) - tm
&p11 * ((-(c3c4)) * tm2 * u_(2,3))
lhs_(3,2,3) = tmp22 * u_(2,3) * tm1
lhs_(3,3,3) = tmp22 * u_(2,2) * tm1 - tmp11 * c3c4 * tm1
&- tmp11 * dx3
lhs_(3,4,3) = 0.
lhs_(3,5,3) = 0.
lhs_(4,1,3) = tmp22 * ((-(u_(2,2) * u_(2,4))) * tm2) - tm
&p11 * ((-(c3c4)) * tm2 * u_(2,4))
lhs_(4,2,3) = tmp22 * u_(2,4) * tm1
lhs_(4,3,3) = 0.
lhs_(4,4,3) = tmp22 * u_(2,2) * tm1 - tmp11 * c3c4 * tm1
&- tmp11 * dx4
lhs_(4,5,3) = 0.
lhs_(5,1,3) = tmp22 * ((c2 * (u_(2,2) * u_(2,2) + u_(2,3)
& * u_(2,3) + u_(2,4) * u_(2,4)) * tm2 - c1 * (u_(2,5) * tm1)) * (u
&_(2,2) * tm1)) - tmp11 * ((-(con43 * c3c4 - c1345)) * tm3 * u_(2,2
&)** 2 - (c3c4 - c1345) * tm3 * u_(2,3)** 2 - (c3c4 - c1345) * tm3
&* u_(2,4)** 2 - c1345 * tm2 * u_(2,5))
lhs_(5,2,3) = tmp22 * (c1 * u_(2,5) * tm1 - 0.50d+00 * c2
& * (3.0d+00 * u_(2,2) * u_(2,2) + u_(2,3) * u_(2,3) + u_(2,4) * u_
&(2,4)) * tm2) - tmp11 * ((con43 * c3c4 - c1345) * tm2 * u_(2,2))
lhs_(5,3,3) = tmp22 * ((-(c2)) * (u_(2,3) * u_(2,2)) * tm
&2) - tmp11 * (c3c4 - c1345) * tm2 * u_(2,3)
lhs_(5,4,3) = tmp22 * ((-(c2)) * (u_(2,4) * u_(2,2)) * tm
&2) - tmp11 * (c3c4 - c1345) * tm2 * u_(2,4)
lhs_(5,5,3) = tmp22 * c1 * (u_(2,2) * tm1) - tmp11 * c134
&5 * tm1 - tmp11 * dx5
do m = 1,5
rhs_(m) = rhs(m,i,j,k)
enddo
do i__0 = 1,5
rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i - 1
&,j,k) - lhs_(i__0,2,1) * rhs(2,i - 1,j,k) - lhs_(i__0,3,1) * rhs(3
&,i - 1,j,k) - lhs_(i__0,4,1) * rhs(4,i - 1,j,k) - lhs_(i__0,5,1) *
& rhs(5,i - 1,j,k)
enddo
pivot = 1.00d0 / lhs_(1,1,2)
lhs_(1,2,2) = lhs_(1,2,2) * pivot
lhs_(1,3,2) = lhs_(1,3,2) * pivot
lhs_(1,4,2) = lhs_(1,4,2) * pivot
lhs_(1,5,2) = lhs_(1,5,2) * pivot
lhs_(1,1,3) = lhs_(1,1,3) * pivot
lhs_(1,2,3) = lhs_(1,2,3) * pivot
lhs_(1,3,3) = lhs_(1,3,3) * pivot
lhs_(1,4,3) = lhs_(1,4,3) * pivot
lhs_(1,5,3) = lhs_(1,5,3) * pivot
rhs_(1) = rhs_(1) * pivot
coeff = lhs_(2,1,2)
lhs_(2,2,2) = lhs_(2,2,2) - coeff * lhs_(1,2,2)
lhs_(2,3,2) = lhs_(2,3,2) - coeff * lhs_(1,3,2)
lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(1,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(1,5,2)
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(1,1,3)
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(1,2,3)
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(1,3,3)
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(1,4,3)
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(1,5,3)
rhs_(2) = rhs_(2) - coeff * rhs_(1)
coeff = lhs_(3,1,2)
lhs_(3,2,2) = lhs_(3,2,2) - coeff * lhs_(1,2,2)
lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(1,3,2)
lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(1,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(1,5,2)
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(1,1,3)
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(1,2,3)
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(1,3,3)
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(1,4,3)
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(1,5,3)
rhs_(3) = rhs_(3) - coeff * rhs_(1)
coeff = lhs_(4,1,2)
lhs_(4,2,2) = lhs_(4,2,2) - coeff * lhs_(1,2,2)
lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(1,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(1,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(1,5,2)
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(1,1,3)
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(1,2,3)
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(1,3,3)
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(1,4,3)
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(1,5,3)
rhs_(4) = rhs_(4) - coeff * rhs_(1)
coeff = lhs_(5,1,2)
lhs_(5,2,2) = lhs_(5,2,2) - coeff * lhs_(1,2,2)
lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(1,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(1,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(1,5,2)
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(1,1,3)
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(1,2,3)
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(1,3,3)
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(1,4,3)
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(1,5,3)
rhs_(5) = rhs_(5) - coeff * rhs_(1)
pivot = 1.00d0 / lhs_(2,2,2)
lhs_(2,3,2) = lhs_(2,3,2) * pivot
lhs_(2,4,2) = lhs_(2,4,2) * pivot
lhs_(2,5,2) = lhs_(2,5,2) * pivot
lhs_(2,1,3) = lhs_(2,1,3) * pivot
lhs_(2,2,3) = lhs_(2,2,3) * pivot
lhs_(2,3,3) = lhs_(2,3,3) * pivot
lhs_(2,4,3) = lhs_(2,4,3) * pivot
lhs_(2,5,3) = lhs_(2,5,3) * pivot
rhs_(2) = rhs_(2) * pivot
coeff = lhs_(1,2,2)
lhs_(1,3,2) = lhs_(1,3,2) - coeff * lhs_(2,3,2)
lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(2,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(2,5,2)
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(2,1,3)
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(2,2,3)
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(2,3,3)
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(2,4,3)
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(2,5,3)
rhs_(1) = rhs_(1) - coeff * rhs_(2)
coeff = lhs_(3,2,2)
lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(2,3,2)
lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(2,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(2,5,2)
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(2,1,3)
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(2,2,3)
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(2,3,3)
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(2,4,3)
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(2,5,3)
rhs_(3) = rhs_(3) - coeff * rhs_(2)
coeff = lhs_(4,2,2)
lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(2,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(2,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(2,5,2)
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(2,1,3)
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(2,2,3)
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(2,3,3)
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(2,4,3)
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(2,5,3)
rhs_(4) = rhs_(4) - coeff * rhs_(2)
coeff = lhs_(5,2,2)
lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(2,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(2,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(2,5,2)
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(2,1,3)
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(2,2,3)
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(2,3,3)
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(2,4,3)
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(2,5,3)
rhs_(5) = rhs_(5) - coeff * rhs_(2)
pivot = 1.00d0 / lhs_(3,3,2)
lhs_(3,4,2) = lhs_(3,4,2) * pivot
lhs_(3,5,2) = lhs_(3,5,2) * pivot
lhs_(3,1,3) = lhs_(3,1,3) * pivot
lhs_(3,2,3) = lhs_(3,2,3) * pivot
lhs_(3,3,3) = lhs_(3,3,3) * pivot
lhs_(3,4,3) = lhs_(3,4,3) * pivot
lhs_(3,5,3) = lhs_(3,5,3) * pivot
rhs_(3) = rhs_(3) * pivot
coeff = lhs_(1,3,2)
lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(3,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(3,5,2)
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(3,1,3)
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(3,2,3)
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(3,3,3)
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(3,4,3)
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(3,5,3)
rhs_(1) = rhs_(1) - coeff * rhs_(3)
coeff = lhs_(2,3,2)
lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(3,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(3,5,2)
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(3,1,3)
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(3,2,3)
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(3,3,3)
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(3,4,3)
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(3,5,3)
rhs_(2) = rhs_(2) - coeff * rhs_(3)
coeff = lhs_(4,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(3,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(3,5,2)
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(3,1,3)
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(3,2,3)
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(3,3,3)
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(3,4,3)
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(3,5,3)
rhs_(4) = rhs_(4) - coeff * rhs_(3)
coeff = lhs_(5,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(3,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(3,5,2)
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(3,1,3)
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(3,2,3)
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(3,3,3)
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(3,4,3)
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(3,5,3)
rhs_(5) = rhs_(5) - coeff * rhs_(3)
pivot = 1.00d0 / lhs_(4,4,2)
lhs_(4,5,2) = lhs_(4,5,2) * pivot
lhs_(4,1,3) = lhs_(4,1,3) * pivot
lhs_(4,2,3) = lhs_(4,2,3) * pivot
lhs_(4,3,3) = lhs_(4,3,3) * pivot
lhs_(4,4,3) = lhs_(4,4,3) * pivot
lhs_(4,5,3) = lhs_(4,5,3) * pivot
rhs_(4) = rhs_(4) * pivot
coeff = lhs_(1,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(4,5,2)
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(4,1,3)
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(4,2,3)
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(4,3,3)
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(4,4,3)
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(4,5,3)
rhs_(1) = rhs_(1) - coeff * rhs_(4)
coeff = lhs_(2,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(4,5,2)
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(4,1,3)
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(4,2,3)
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(4,3,3)
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(4,4,3)
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(4,5,3)
rhs_(2) = rhs_(2) - coeff * rhs_(4)
coeff = lhs_(3,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(4,5,2)
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(4,1,3)
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(4,2,3)
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(4,3,3)
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(4,4,3)
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(4,5,3)
rhs_(3) = rhs_(3) - coeff * rhs_(4)
coeff = lhs_(5,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(4,5,2)
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(4,1,3)
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(4,2,3)
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(4,3,3)
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(4,4,3)
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(4,5,3)
rhs_(5) = rhs_(5) - coeff * rhs_(4)
pivot = 1.00d0 / lhs_(5,5,2)
lhs_(5,1,3) = lhs_(5,1,3) * pivot
lhs_(5,2,3) = lhs_(5,2,3) * pivot
lhs_(5,3,3) = lhs_(5,3,3) * pivot
lhs_(5,4,3) = lhs_(5,4,3) * pivot
lhs_(5,5,3) = lhs_(5,5,3) * pivot
rhs_(5) = rhs_(5) * pivot
coeff = lhs_(1,5,2)
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(5,1,3)
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(5,2,3)
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(5,3,3)
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(5,4,3)
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(5,5,3)
rhs_(1) = rhs_(1) - coeff * rhs_(5)
coeff = lhs_(2,5,2)
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(5,1,3)
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(5,2,3)
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(5,3,3)
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(5,4,3)
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(5,5,3)
rhs_(2) = rhs_(2) - coeff * rhs_(5)
coeff = lhs_(3,5,2)
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(5,1,3)
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(5,2,3)
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(5,3,3)
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(5,4,3)
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(5,5,3)
rhs_(3) = rhs_(3) - coeff * rhs_(5)
coeff = lhs_(4,5,2)
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(5,1,3)
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(5,2,3)
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(5,3,3)
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(5,4,3)
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(5,5,3)
rhs_(4) = rhs_(4) - coeff * rhs_(5)
do i__0 = 1,5
lhs__(i__0,1,i,j,k) = lhs_(i__0,1,3)
lhs__(i__0,2,i,j,k) = lhs_(i__0,2,3)
lhs__(i__0,3,i,j,k) = lhs_(i__0,3,3)
lhs__(i__0,4,i,j,k) = lhs_(i__0,4,3)
lhs__(i__0,5,i,j,k) = lhs_(i__0,5,3)
enddo
do m = 1,5
rhs(m,i,j,k) = rhs_(m)
enddo
! enddo
else ! ******************* else case *************************
do n = 1,5
lhs_(1,n,1) = 0.0d0
lhs_(1,n,2) = 0.0d0
lhs_(1,n,3) = 0.0d0
lhs_(2,n,1) = 0.0d0
lhs_(2,n,2) = 0.0d0
lhs_(2,n,3) = 0.0d0
lhs_(3,n,1) = 0.0d0
lhs_(3,n,2) = 0.0d0
lhs_(3,n,3) = 0.0d0
lhs_(4,n,1) = 0.0d0
lhs_(4,n,2) = 0.0d0
lhs_(4,n,3) = 0.0d0
lhs_(5,n,1) = 0.0d0
lhs_(5,n,2) = 0.0d0
lhs_(5,n,3) = 0.0d0
enddo
do m = 1,5
lhs_(m,m,2) = 1.0d0
enddo
do m = 1,5
rhs_(m) = rhs(m,i,j,k)
enddo
do i__0 = 1,5
rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i - 1,j,
&k) - lhs_(i__0,2,1) * rhs(2,i - 1,j,k) - lhs_(i__0,3,1) * rhs(3,i
&- 1,j,k) - lhs_(i__0,4,1) * rhs(4,i - 1,j,k) - lhs_(i__0,5,1) * rh
&s(5,i - 1,j,k)
enddo
do j__1 = 1,5
lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs_(1,j_
&_1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,j__1,3
&) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1,3)
lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs_(1,j_
&_1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,j__1,3
&) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1,3)
lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs_(1,j_
&_1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,j__1,3
&) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1,3)
lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs_(1,j_
&_1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,j__1,3
&) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1,3)
lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs_(1,j_
&_1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,j__1,3
&) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1,3)
enddo
!---------------------------------------------------------------------
!
!---------------------------------------------------------------------
pivot__3 = 1.00d0 / lhs_(1,1,2)
lhs_(1,2,2) = lhs_(1,2,2) * pivot__3
lhs_(1,3,2) = lhs_(1,3,2) * pivot__3
lhs_(1,4,2) = lhs_(1,4,2) * pivot__3
lhs_(1,5,2) = lhs_(1,5,2) * pivot__3
rhs_(1) = rhs_(1) * pivot__3
coeff__2 = lhs_(2,1,2)
lhs_(2,2,2) = lhs_(2,2,2) - coeff__2 * lhs_(1,2,2)
lhs_(2,3,2) = lhs_(2,3,2) - coeff__2 * lhs_(1,3,2)
lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(1,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(1,5,2)
rhs_(2) = rhs_(2) - coeff__2 * rhs_(1)
coeff__2 = lhs_(3,1,2)
lhs_(3,2,2) = lhs_(3,2,2) - coeff__2 * lhs_(1,2,2)
lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(1,3,2)
lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(1,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(1,5,2)
rhs_(3) = rhs_(3) - coeff__2 * rhs_(1)
coeff__2 = lhs_(4,1,2)
lhs_(4,2,2) = lhs_(4,2,2) - coeff__2 * lhs_(1,2,2)
lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(1,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(1,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(1,5,2)
rhs_(4) = rhs_(4) - coeff__2 * rhs_(1)
coeff__2 = lhs_(5,1,2)
lhs_(5,2,2) = lhs_(5,2,2) - coeff__2 * lhs_(1,2,2)
lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(1,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(1,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(1,5,2)
rhs_(5) = rhs_(5) - coeff__2 * rhs_(1)
pivot__3 = 1.00d0 / lhs_(2,2,2)
lhs_(2,3,2) = lhs_(2,3,2) * pivot__3
lhs_(2,4,2) = lhs_(2,4,2) * pivot__3
lhs_(2,5,2) = lhs_(2,5,2) * pivot__3
rhs_(2) = rhs_(2) * pivot__3
coeff__2 = lhs_(1,2,2)
lhs_(1,3,2) = lhs_(1,3,2) - coeff__2 * lhs_(2,3,2)
lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(2,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(2,5,2)
rhs_(1) = rhs_(1) - coeff__2 * rhs_(2)
coeff__2 = lhs_(3,2,2)
lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(2,3,2)
lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(2,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(2,5,2)
rhs_(3) = rhs_(3) - coeff__2 * rhs_(2)
coeff__2 = lhs_(4,2,2)
lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(2,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(2,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(2,5,2)
rhs_(4) = rhs_(4) - coeff__2 * rhs_(2)
coeff__2 = lhs_(5,2,2)
lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(2,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(2,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(2,5,2)
rhs_(5) = rhs_(5) - coeff__2 * rhs_(2)
pivot__3 = 1.00d0 / lhs_(3,3,2)
lhs_(3,4,2) = lhs_(3,4,2) * pivot__3
lhs_(3,5,2) = lhs_(3,5,2) * pivot__3
rhs_(3) = rhs_(3) * pivot__3
coeff__2 = lhs_(1,3,2)
lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(3,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(3,5,2)
rhs_(1) = rhs_(1) - coeff__2 * rhs_(3)
coeff__2 = lhs_(2,3,2)
lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(3,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(3,5,2)
rhs_(2) = rhs_(2) - coeff__2 * rhs_(3)
coeff__2 = lhs_(4,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(3,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(3,5,2)
rhs_(4) = rhs_(4) - coeff__2 * rhs_(3)
coeff__2 = lhs_(5,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(3,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(3,5,2)
rhs_(5) = rhs_(5) - coeff__2 * rhs_(3)
pivot__3 = 1.00d0 / lhs_(4,4,2)
lhs_(4,5,2) = lhs_(4,5,2) * pivot__3
rhs_(4) = rhs_(4) * pivot__3
coeff__2 = lhs_(1,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(4,5,2)
rhs_(1) = rhs_(1) - coeff__2 * rhs_(4)
coeff__2 = lhs_(2,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(4,5,2)
rhs_(2) = rhs_(2) - coeff__2 * rhs_(4)
coeff__2 = lhs_(3,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(4,5,2)
rhs_(3) = rhs_(3) - coeff__2 * rhs_(4)
coeff__2 = lhs_(5,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(4,5,2)
rhs_(5) = rhs_(5) - coeff__2 * rhs_(4)
pivot__3 = 1.00d0 / lhs_(5,5,2)
rhs_(5) = rhs_(5) * pivot__3
coeff__2 = lhs_(1,5,2)
rhs_(1) = rhs_(1) - coeff__2 * rhs_(5)
coeff__2 = lhs_(2,5,2)
rhs_(2) = rhs_(2) - coeff__2 * rhs_(5)
coeff__2 = lhs_(3,5,2)
rhs_(3) = rhs_(3) - coeff__2 * rhs_(5)
coeff__2 = lhs_(4,5,2)
rhs_(4) = rhs_(4) - coeff__2 * rhs_(5)
endif
do m = 1,5
rhs(m,i,j,k) = rhs_(m)
enddo
enddo
enddo
enddo
! enddo
!DVM$ PARALLEL (k,j,i) ON rhs(*,i,j,k),private(m,rhsp_,rhs_),
!DVM$& ACROSS(rhs(0:0,0:1,0:0,0:0)),stage(stage_n)
do k = 1,problem_size - 2
do j = 1,problem_size - 2
do i = problem_size - 2,0,(-(1))
do m = 1,5
rhs_(m) = rhs(m,i,j,k)
rhsp_(m) = rhs(m,i+1,j,k)
enddo
do m = 1,5
rhs_(m) = rhs_(m) - lhs__(m,1,i,j,k) * rhsp_(1)
rhs_(m) = rhs_(m) - lhs__(m,2,i,j,k) * rhsp_(2)
rhs_(m) = rhs_(m) - lhs__(m,3,i,j,k) * rhsp_(3)
rhs_(m) = rhs_(m) - lhs__(m,4,i,j,k) * rhsp_(4)
rhs_(m) = rhs_(m) - lhs__(m,5,i,j,k) * rhsp_(5)
enddo
do m = 1,5
rhs(m,i,j,k) = rhs_(m)
enddo
enddo
enddo
enddo
!DVM$ end region
return
end

View File

@@ -0,0 +1,622 @@
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! performs guaussian elimination on this cell.
!
! assumes that unpacking routines for non-first cells
! preload C' and rhs' from previous cell.
!
! assumed send happens outside this routine, but that
! c'(JMAX) and rhs'(JMAX) will be sent to next cell
!---------------------------------------------------------------------
subroutine y_solve ()
include 'header3d.h'
double precision coeff
double precision pivot
integer i__0
integer j__1,m,n
double precision coeff__2
double precision pivot__3
double precision lhs_(5,5,3),rhs_(5),u_(0:3,5)
integer i,j,k,jsize,jstart
jstart = 0
jsize = problem_size - 1
!ACROSS (rhs(0:0,1:0,0:0,0:0),lhs__(0:0,1:0,0:0,0:0,0:0))
!DVM$ region local(lhs__)
!DVM$ PARALLEL (k,i) ON rhs(*,i,*,k), private(u_,j,rhs_,pivot,
!DVM$&coeff,tmp1, tmp2, tmp3, t1, t2, t3, tm1, tm2, tm3, tmp11,
!DVM$&tmp22, i__0, j__1,lhs_,coeff__2,pivot__3,m,n)
do k = 1,problem_size - 2
do i = 1,problem_size - 2
do m = 1,5
u_(0,m) = u(m,i,0,k)
u_(1,m) = u(m,i,1,k)
enddo
do j = 1,jsize - 1
do m = 1,5
u_(2,m) = u(m,i,j + 1,k)
enddo
! if(j .ne. jsize) then
tmp1 = 1.0d+00 / u_(1,1)
tmp2 = tmp1 * tmp1
tmp3 = tmp1 * tmp2
t1 = 1.0d+00 / u_(0,1)
t2 = t1 * t1
t3 = t1 * t2
tm1 = 1.0d+00 / u_(2,1)
tm2 = tm1 * tm1
tm3 = tm1 * tm2
tmp11 = dt * ty1
tmp22 = dt * ty2
lhs_(1,1,1) = (-(tmp11)) * dy1
lhs_(1,2,1) = 0.
lhs_(1,3,1) = (-(tmp22))
lhs_(1,4,1) = 0.
lhs_(1,5,1) = 0.
lhs_(2,1,1) = (-(tmp22)) * ((-(u_(0,2) * u_(0,3))) * t2)
&- tmp11 * ((-(c3c4)) * t2 * u_(0,2))
lhs_(2,2,1) = (-(tmp22)) * u_(0,3) * t1 - tmp11 * c3c4 *
&t1 - tmp11 * dy2
lhs_(2,3,1) = (-(tmp22)) * u_(0,2) * t1
lhs_(2,4,1) = 0.
lhs_(2,5,1) = 0.
lhs_(3,1,1) = (-(tmp22)) * ((-(u_(0,3) * u_(0,3) * t2)) +
& 0.50d+00 * c2 * ((u_(0,2) * u_(0,2) + u_(0,3) * u_(0,3) + u_(0,4)
& * u_(0,4)) * t2)) - tmp11 * ((-(con43)) * c3c4 * t2 * u_(0,3))
lhs_(3,2,1) = (-(tmp22)) * ((-(c2)) * u_(0,2) * t1)
lhs_(3,3,1) = (-(tmp22)) * ((2.0d+00 - c2) * u_(0,3) * t1
&) - tmp11 * con43 * c3c4 * t1 - tmp11 * dy3
lhs_(3,4,1) = (-(tmp22)) * ((-(c2)) * u_(0,4) * t1)
lhs_(3,5,1) = (-(tmp22)) * c2
lhs_(4,1,1) = (-(tmp22)) * ((-(u_(0,3) * u_(0,4))) * t2)
&- tmp11 * ((-(c3c4)) * t2 * u_(0,4))
lhs_(4,2,1) = 0.
lhs_(4,3,1) = (-(tmp22)) * u_(0,4) * t1
lhs_(4,4,1) = (-(tmp22)) * u_(0,3) * t1 - tmp11 * c3c4 *
&t1 - tmp11 * dy4
lhs_(4,5,1) = 0.
lhs_(5,1,1) = (-(tmp22)) * ((c2 * (u_(0,2) * u_(0,2) + u_
&(0,3) * u_(0,3) + u_(0,4) * u_(0,4)) * t2 - c1 * u_(0,5) * t1) * u
&_(0,3) * t1) - tmp11 * ((-(c3c4 - c1345)) * t3 * u_(0,2)** 2 - (co
&n43 * c3c4 - c1345) * t3 * u_(0,3)** 2 - (c3c4 - c1345) * t3 * u_(
&0,4)** 2 - c1345 * t2 * u_(0,5))
lhs_(5,2,1) = (-(tmp22)) * ((-(c2)) * u_(0,2) * u_(0,3) *
& t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,2)
lhs_(5,3,1) = (-(tmp22)) * (c1 * u_(0,5) * t1 - 0.50d+00
&* c2 * ((u_(0,2) * u_(0,2) + 3.0d+00 * u_(0,3) * u_(0,3) + u_(0,4)
& * u_(0,4)) * t2)) - tmp11 * (con43 * c3c4 - c1345) * t2 * u_(0,3)
lhs_(5,4,1) = (-(tmp22)) * ((-(c2)) * (u_(0,3) * u_(0,4))
& * t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,4)
lhs_(5,5,1) = (-(tmp22)) * c1 * u_(0,3) * t1 - tmp11 * c1
&345 * t1 - tmp11 * dy5
lhs_(1,1,2) = 1.0d+00 + tmp11 * 2.0d+00 * dy1
lhs_(1,2,2) = 0.
lhs_(1,3,2) = 0.
lhs_(1,4,2) = 0.
lhs_(1,5,2) = 0.
lhs_(2,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1,
&2))
lhs_(2,2,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t
&mp11 * 2.0d+00 * dy2
lhs_(2,3,2) = 0.
lhs_(2,4,2) = 0.
lhs_(2,5,2) = 0.
lhs_(3,1,2) = tmp11 * 2.0d+00 * ((-(con43)) * c3c4 * tmp2
& * u_(1,3))
lhs_(3,2,2) = 0.
lhs_(3,3,2) = 1.0d+00 + tmp11 * 2.0d+00 * con43 * c3c4 *
&tmp1 + tmp11 * 2.0d+00 * dy3
lhs_(3,4,2) = 0.
lhs_(3,5,2) = 0.
lhs_(4,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1,
&4))
lhs_(4,2,2) = 0.
lhs_(4,3,2) = 0.
lhs_(4,4,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t
&mp11 * 2.0d+00 * dy4
lhs_(4,5,2) = 0.
lhs_(5,1,2) = tmp11 * 2.0d+00 * ((-(c3c4 - c1345)) * tmp3
& * u_(1,2)** 2 - (con43 * c3c4 - c1345) * tmp3 * u_(1,3)** 2 - (c3
&c4 - c1345) * tmp3 * u_(1,4)** 2 - c1345 * tmp2 * u_(1,5))
lhs_(5,2,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u
&_(1,2)
lhs_(5,3,2) = tmp11 * 2.0d+00 * (con43 * c3c4 - c1345) *
&tmp2 * u_(1,3)
lhs_(5,4,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u
&_(1,4)
lhs_(5,5,2) = 1.0d+00 + tmp11 * 2.0d+00 * c1345 * tmp1 +
&tmp11 * 2.0d+00 * dy5
if (j .ne. 1) then
do j__1 = 1,5
lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs
&_(1,j__1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,
&j__1,3) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1
&,3)
lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs
&_(1,j__1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,
&j__1,3) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1
&,3)
lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs
&_(1,j__1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,
&j__1,3) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1
&,3)
lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs
&_(1,j__1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,
&j__1,3) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1
&,3)
lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs
&_(1,j__1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,
&j__1,3) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1
&,3)
enddo
endif
lhs_(1,1,3) = (-(tmp11)) * dy1
lhs_(1,2,3) = 0.
lhs_(1,3,3) = tmp22
lhs_(1,4,3) = 0.
lhs_(1,5,3) = 0.
lhs_(2,1,3) = tmp22 * ((-(u_(2,2) * u_(2,3))) * tm2) - tm
&p11 * ((-(c3c4)) * tm2 * u_(2,2))
lhs_(2,2,3) = tmp22 * u_(2,3) * tm1 - tmp11 * c3c4 * tm1
&- tmp11 * dy2
lhs_(2,3,3) = tmp22 * u_(2,2) * tm1
lhs_(2,4,3) = 0.
lhs_(2,5,3) = 0.
lhs_(3,1,3) = tmp22 * ((-(u_(2,3) * u_(2,3) * tm2)) + 0.5
&0d+00 * c2 * ((u_(2,2) * u_(2,2) + u_(2,3) * u_(2,3) + u_(2,4) * u
&_(2,4)) * tm2)) - tmp11 * ((-(con43)) * c3c4 * tm2 * u_(2,3))
lhs_(3,2,3) = tmp22 * ((-(c2)) * u_(2,2) * tm1)
lhs_(3,3,3) = tmp22 * ((2.0d+00 - c2) * u_(2,3) * tm1) -
&tmp11 * con43 * c3c4 * tm1 - tmp11 * dy3
lhs_(3,4,3) = tmp22 * ((-(c2)) * u_(2,4) * tm1)
lhs_(3,5,3) = tmp22 * c2
lhs_(4,1,3) = tmp22 * ((-(u_(2,3) * u_(2,4))) * tm2) - tm
&p11 * ((-(c3c4)) * tm2 * u_(2,4))
lhs_(4,2,3) = 0.
lhs_(4,3,3) = tmp22 * u_(2,4) * tm1
lhs_(4,4,3) = tmp22 * u_(2,3) * tm1 - tmp11 * c3c4 * tm1
&- tmp11 * dy4
lhs_(4,5,3) = 0.
lhs_(5,1,3) = tmp22 * ((c2 * (u_(2,2) * u_(2,2) + u_(2,3)
& * u_(2,3) + u_(2,4) * u_(2,4)) * tm2 - c1 * u_(2,5) * tm1) * u_(2
&,3) * tm1) - tmp11 * ((-(c3c4 - c1345)) * tm3 * u_(2,2)** 2 - (con
&43 * c3c4 - c1345) * tm3 * u_(2,3)** 2 - (c3c4 - c1345) * tm3 * u_
&(2,4)** 2 - c1345 * tm2 * u_(2,5))
lhs_(5,2,3) = tmp22 * ((-(c2)) * u_(2,2) * u_(2,3) * tm2)
& - tmp11 * (c3c4 - c1345) * tm2 * u_(2,2)
lhs_(5,3,3) = tmp22 * (c1 * u_(2,5) * tm1 - 0.50d+00 * c2
& * ((u_(2,2) * u_(2,2) + 3.0d+00 * u_(2,3) * u_(2,3) + u_(2,4) * u
&_(2,4)) * tm2)) - tmp11 * (con43 * c3c4 - c1345) * tm2 * u_(2,3)
lhs_(5,4,3) = tmp22 * ((-(c2)) * (u_(2,3) * u_(2,4)) * tm
&2) - tmp11 * (c3c4 - c1345) * tm2 * u_(2,4)
lhs_(5,5,3) = tmp22 * c1 * u_(2,3) * tm1 - tmp11 * c1345
&* tm1 - tmp11 * dy5
do m = 1,5
rhs_(m) = rhs(m,i,j,k)
enddo
do i__0 = 1,5
rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i,j -
& 1,k) - lhs_(i__0,2,1) * rhs(2,i,j - 1,k) - lhs_(i__0,3,1) * rhs(3
&,i,j - 1,k) - lhs_(i__0,4,1) * rhs(4,i,j - 1,k) - lhs_(i__0,5,1) *
& rhs(5,i,j - 1,k)
enddo
!---------------------------------------------------------------------
!
!---------------------------------------------------------------------
pivot = 1.00d0 / lhs_(1,1,2)
lhs_(1,2,2) = lhs_(1,2,2) * pivot
lhs_(1,3,2) = lhs_(1,3,2) * pivot
lhs_(1,4,2) = lhs_(1,4,2) * pivot
lhs_(1,5,2) = lhs_(1,5,2) * pivot
lhs_(1,1,3) = lhs_(1,1,3) * pivot
lhs_(1,2,3) = lhs_(1,2,3) * pivot
lhs_(1,3,3) = lhs_(1,3,3) * pivot
lhs_(1,4,3) = lhs_(1,4,3) * pivot
lhs_(1,5,3) = lhs_(1,5,3) * pivot
rhs_(1) = rhs_(1) * pivot
coeff = lhs_(2,1,2)
lhs_(2,2,2) = lhs_(2,2,2) - coeff * lhs_(1,2,2)
lhs_(2,3,2) = lhs_(2,3,2) - coeff * lhs_(1,3,2)
lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(1,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(1,5,2)
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(1,1,3)
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(1,2,3)
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(1,3,3)
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(1,4,3)
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(1,5,3)
rhs_(2) = rhs_(2) - coeff * rhs_(1)
coeff = lhs_(3,1,2)
lhs_(3,2,2) = lhs_(3,2,2) - coeff * lhs_(1,2,2)
lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(1,3,2)
lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(1,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(1,5,2)
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(1,1,3)
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(1,2,3)
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(1,3,3)
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(1,4,3)
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(1,5,3)
rhs_(3) = rhs_(3) - coeff * rhs_(1)
coeff = lhs_(4,1,2)
lhs_(4,2,2) = lhs_(4,2,2) - coeff * lhs_(1,2,2)
lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(1,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(1,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(1,5,2)
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(1,1,3)
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(1,2,3)
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(1,3,3)
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(1,4,3)
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(1,5,3)
rhs_(4) = rhs_(4) - coeff * rhs_(1)
coeff = lhs_(5,1,2)
lhs_(5,2,2) = lhs_(5,2,2) - coeff * lhs_(1,2,2)
lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(1,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(1,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(1,5,2)
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(1,1,3)
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(1,2,3)
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(1,3,3)
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(1,4,3)
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(1,5,3)
rhs_(5) = rhs_(5) - coeff * rhs_(1)
pivot = 1.00d0 / lhs_(2,2,2)
lhs_(2,3,2) = lhs_(2,3,2) * pivot
lhs_(2,4,2) = lhs_(2,4,2) * pivot
lhs_(2,5,2) = lhs_(2,5,2) * pivot
lhs_(2,1,3) = lhs_(2,1,3) * pivot
lhs_(2,2,3) = lhs_(2,2,3) * pivot
lhs_(2,3,3) = lhs_(2,3,3) * pivot
lhs_(2,4,3) = lhs_(2,4,3) * pivot
lhs_(2,5,3) = lhs_(2,5,3) * pivot
rhs_(2) = rhs_(2) * pivot
coeff = lhs_(1,2,2)
lhs_(1,3,2) = lhs_(1,3,2) - coeff * lhs_(2,3,2)
lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(2,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(2,5,2)
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(2,1,3)
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(2,2,3)
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(2,3,3)
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(2,4,3)
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(2,5,3)
rhs_(1) = rhs_(1) - coeff * rhs_(2)
coeff = lhs_(3,2,2)
lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(2,3,2)
lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(2,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(2,5,2)
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(2,1,3)
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(2,2,3)
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(2,3,3)
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(2,4,3)
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(2,5,3)
rhs_(3) = rhs_(3) - coeff * rhs_(2)
coeff = lhs_(4,2,2)
lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(2,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(2,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(2,5,2)
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(2,1,3)
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(2,2,3)
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(2,3,3)
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(2,4,3)
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(2,5,3)
rhs_(4) = rhs_(4) - coeff * rhs_(2)
coeff = lhs_(5,2,2)
lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(2,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(2,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(2,5,2)
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(2,1,3)
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(2,2,3)
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(2,3,3)
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(2,4,3)
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(2,5,3)
rhs_(5) = rhs_(5) - coeff * rhs_(2)
pivot = 1.00d0 / lhs_(3,3,2)
lhs_(3,4,2) = lhs_(3,4,2) * pivot
lhs_(3,5,2) = lhs_(3,5,2) * pivot
lhs_(3,1,3) = lhs_(3,1,3) * pivot
lhs_(3,2,3) = lhs_(3,2,3) * pivot
lhs_(3,3,3) = lhs_(3,3,3) * pivot
lhs_(3,4,3) = lhs_(3,4,3) * pivot
lhs_(3,5,3) = lhs_(3,5,3) * pivot
rhs_(3) = rhs_(3) * pivot
coeff = lhs_(1,3,2)
lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(3,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(3,5,2)
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(3,1,3)
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(3,2,3)
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(3,3,3)
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(3,4,3)
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(3,5,3)
rhs_(1) = rhs_(1) - coeff * rhs_(3)
coeff = lhs_(2,3,2)
lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(3,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(3,5,2)
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(3,1,3)
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(3,2,3)
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(3,3,3)
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(3,4,3)
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(3,5,3)
rhs_(2) = rhs_(2) - coeff * rhs_(3)
coeff = lhs_(4,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(3,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(3,5,2)
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(3,1,3)
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(3,2,3)
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(3,3,3)
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(3,4,3)
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(3,5,3)
rhs_(4) = rhs_(4) - coeff * rhs_(3)
coeff = lhs_(5,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(3,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(3,5,2)
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(3,1,3)
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(3,2,3)
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(3,3,3)
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(3,4,3)
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(3,5,3)
rhs_(5) = rhs_(5) - coeff * rhs_(3)
pivot = 1.00d0 / lhs_(4,4,2)
lhs_(4,5,2) = lhs_(4,5,2) * pivot
lhs_(4,1,3) = lhs_(4,1,3) * pivot
lhs_(4,2,3) = lhs_(4,2,3) * pivot
lhs_(4,3,3) = lhs_(4,3,3) * pivot
lhs_(4,4,3) = lhs_(4,4,3) * pivot
lhs_(4,5,3) = lhs_(4,5,3) * pivot
rhs_(4) = rhs_(4) * pivot
coeff = lhs_(1,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(4,5,2)
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(4,1,3)
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(4,2,3)
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(4,3,3)
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(4,4,3)
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(4,5,3)
rhs_(1) = rhs_(1) - coeff * rhs_(4)
coeff = lhs_(2,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(4,5,2)
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(4,1,3)
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(4,2,3)
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(4,3,3)
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(4,4,3)
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(4,5,3)
rhs_(2) = rhs_(2) - coeff * rhs_(4)
coeff = lhs_(3,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(4,5,2)
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(4,1,3)
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(4,2,3)
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(4,3,3)
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(4,4,3)
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(4,5,3)
rhs_(3) = rhs_(3) - coeff * rhs_(4)
coeff = lhs_(5,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(4,5,2)
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(4,1,3)
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(4,2,3)
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(4,3,3)
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(4,4,3)
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(4,5,3)
rhs_(5) = rhs_(5) - coeff * rhs_(4)
pivot = 1.00d0 / lhs_(5,5,2)
lhs_(5,1,3) = lhs_(5,1,3) * pivot
lhs_(5,2,3) = lhs_(5,2,3) * pivot
lhs_(5,3,3) = lhs_(5,3,3) * pivot
lhs_(5,4,3) = lhs_(5,4,3) * pivot
lhs_(5,5,3) = lhs_(5,5,3) * pivot
rhs_(5) = rhs_(5) * pivot
coeff = lhs_(1,5,2)
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(5,1,3)
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(5,2,3)
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(5,3,3)
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(5,4,3)
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(5,5,3)
rhs_(1) = rhs_(1) - coeff * rhs_(5)
coeff = lhs_(2,5,2)
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(5,1,3)
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(5,2,3)
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(5,3,3)
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(5,4,3)
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(5,5,3)
rhs_(2) = rhs_(2) - coeff * rhs_(5)
coeff = lhs_(3,5,2)
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(5,1,3)
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(5,2,3)
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(5,3,3)
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(5,4,3)
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(5,5,3)
rhs_(3) = rhs_(3) - coeff * rhs_(5)
coeff = lhs_(4,5,2)
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(5,1,3)
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(5,2,3)
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(5,3,3)
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(5,4,3)
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(5,5,3)
rhs_(4) = rhs_(4) - coeff * rhs_(5)
do i__0 = 1,5
lhs__(i__0,1,i,j,k) = lhs_(i__0,1,3)
lhs__(i__0,2,i,j,k) = lhs_(i__0,2,3)
lhs__(i__0,3,i,j,k) = lhs_(i__0,3,3)
lhs__(i__0,4,i,j,k) = lhs_(i__0,4,3)
lhs__(i__0,5,i,j,k) = lhs_(i__0,5,3)
enddo
do m = 1,5
rhs(m,i,j,k) = rhs_(m)
u_(0,m) = u_(1,m)
u_(1,m) = u_(2,m)
enddo
enddo
! else ! ******************* else case *************************
do n = 1,5
lhs_(1,n,1) = 0.0d0
lhs_(1,n,2) = 0.0d0
lhs_(1,n,3) = 0.0d0
lhs_(2,n,1) = 0.0d0
lhs_(2,n,2) = 0.0d0
lhs_(2,n,3) = 0.0d0
lhs_(3,n,1) = 0.0d0
lhs_(3,n,2) = 0.0d0
lhs_(3,n,3) = 0.0d0
lhs_(4,n,1) = 0.0d0
lhs_(4,n,2) = 0.0d0
lhs_(4,n,3) = 0.0d0
lhs_(5,n,1) = 0.0d0
lhs_(5,n,2) = 0.0d0
lhs_(5,n,3) = 0.0d0
enddo
do m = 1,5
lhs_(m,m,2) = 1.0d0
enddo
do m = 1,5
rhs_(m) = rhs(m,i,j,k)
enddo
do i__0 = 1,5
rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i,jsize
&- 1,k) - lhs_(i__0,2,1) * rhs(2,i,jsize - 1,k) - lhs_(i__0,3,1) *
&rhs(3,i,jsize - 1,k) - lhs_(i__0,4,1) * rhs(4,i,jsize - 1,k) - lhs
&_(i__0,5,1) * rhs(5,i,jsize - 1,k)
enddo
do j__1 = 1,5
lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs_(1,j_
&_1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,j__1,3
&) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1,3)
lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs_(1,j_
&_1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,j__1,3
&) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1,3)
lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs_(1,j_
&_1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,j__1,3
&) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1,3)
lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs_(1,j_
&_1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,j__1,3
&) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1,3)
lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs_(1,j_
&_1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,j__1,3
&) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1,3)
enddo
pivot__3 = 1.00d0 / lhs_(1,1,2)
lhs_(1,2,2) = lhs_(1,2,2) * pivot__3
lhs_(1,3,2) = lhs_(1,3,2) * pivot__3
lhs_(1,4,2) = lhs_(1,4,2) * pivot__3
lhs_(1,5,2) = lhs_(1,5,2) * pivot__3
rhs_(1) = rhs_(1) * pivot__3
coeff__2 = lhs_(2,1,2)
lhs_(2,2,2) = lhs_(2,2,2) - coeff__2 * lhs_(1,2,2)
lhs_(2,3,2) = lhs_(2,3,2) - coeff__2 * lhs_(1,3,2)
lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(1,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(1,5,2)
rhs_(2) = rhs_(2) - coeff__2 * rhs_(1)
coeff__2 = lhs_(3,1,2)
lhs_(3,2,2) = lhs_(3,2,2) - coeff__2 * lhs_(1,2,2)
lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(1,3,2)
lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(1,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(1,5,2)
rhs_(3) = rhs_(3) - coeff__2 * rhs_(1)
coeff__2 = lhs_(4,1,2)
lhs_(4,2,2) = lhs_(4,2,2) - coeff__2 * lhs_(1,2,2)
lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(1,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(1,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(1,5,2)
rhs_(4) = rhs_(4) - coeff__2 * rhs_(1)
coeff__2 = lhs_(5,1,2)
lhs_(5,2,2) = lhs_(5,2,2) - coeff__2 * lhs_(1,2,2)
lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(1,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(1,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(1,5,2)
rhs_(5) = rhs_(5) - coeff__2 * rhs_(1)
pivot__3 = 1.00d0 / lhs_(2,2,2)
lhs_(2,3,2) = lhs_(2,3,2) * pivot__3
lhs_(2,4,2) = lhs_(2,4,2) * pivot__3
lhs_(2,5,2) = lhs_(2,5,2) * pivot__3
rhs_(2) = rhs_(2) * pivot__3
coeff__2 = lhs_(1,2,2)
lhs_(1,3,2) = lhs_(1,3,2) - coeff__2 * lhs_(2,3,2)
lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(2,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(2,5,2)
rhs_(1) = rhs_(1) - coeff__2 * rhs_(2)
coeff__2 = lhs_(3,2,2)
lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(2,3,2)
lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(2,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(2,5,2)
rhs_(3) = rhs_(3) - coeff__2 * rhs_(2)
coeff__2 = lhs_(4,2,2)
lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(2,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(2,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(2,5,2)
rhs_(4) = rhs_(4) - coeff__2 * rhs_(2)
coeff__2 = lhs_(5,2,2)
lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(2,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(2,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(2,5,2)
rhs_(5) = rhs_(5) - coeff__2 * rhs_(2)
pivot__3 = 1.00d0 / lhs_(3,3,2)
lhs_(3,4,2) = lhs_(3,4,2) * pivot__3
lhs_(3,5,2) = lhs_(3,5,2) * pivot__3
rhs_(3) = rhs_(3) * pivot__3
coeff__2 = lhs_(1,3,2)
lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(3,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(3,5,2)
rhs_(1) = rhs_(1) - coeff__2 * rhs_(3)
coeff__2 = lhs_(2,3,2)
lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(3,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(3,5,2)
rhs_(2) = rhs_(2) - coeff__2 * rhs_(3)
coeff__2 = lhs_(4,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(3,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(3,5,2)
rhs_(4) = rhs_(4) - coeff__2 * rhs_(3)
coeff__2 = lhs_(5,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(3,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(3,5,2)
rhs_(5) = rhs_(5) - coeff__2 * rhs_(3)
pivot__3 = 1.00d0 / lhs_(4,4,2)
lhs_(4,5,2) = lhs_(4,5,2) * pivot__3
rhs_(4) = rhs_(4) * pivot__3
coeff__2 = lhs_(1,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(4,5,2)
rhs_(1) = rhs_(1) - coeff__2 * rhs_(4)
coeff__2 = lhs_(2,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(4,5,2)
rhs_(2) = rhs_(2) - coeff__2 * rhs_(4)
coeff__2 = lhs_(3,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(4,5,2)
rhs_(3) = rhs_(3) - coeff__2 * rhs_(4)
coeff__2 = lhs_(5,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(4,5,2)
rhs_(5) = rhs_(5) - coeff__2 * rhs_(4)
pivot__3 = 1.00d0 / lhs_(5,5,2)
rhs_(5) = rhs_(5) * pivot__3
coeff__2 = lhs_(1,5,2)
rhs_(1) = rhs_(1) - coeff__2 * rhs_(5)
coeff__2 = lhs_(2,5,2)
rhs_(2) = rhs_(2) - coeff__2 * rhs_(5)
coeff__2 = lhs_(3,5,2)
rhs_(3) = rhs_(3) - coeff__2 * rhs_(5)
coeff__2 = lhs_(4,5,2)
rhs_(4) = rhs_(4) - coeff__2 * rhs_(5)
! endif
do m = 1,5
rhs(m,i,j,k) = rhs_(m)
enddo
! enddo
do j = problem_size - 2,0,(-(1))
do m = 1,5
rhs_(m) = rhs(m,i,j,k)
enddo
do m = 1,5
rhs_(m) = rhs_(m) - lhs__(m,1,i,j,k) * rhs(1,i,j + 1,k)
rhs_(m) = rhs_(m) - lhs__(m,2,i,j,k) * rhs(2,i,j + 1,k)
rhs_(m) = rhs_(m) - lhs__(m,3,i,j,k) * rhs(3,i,j + 1,k)
rhs_(m) = rhs_(m) - lhs__(m,4,i,j,k) * rhs(4,i,j + 1,k)
rhs_(m) = rhs_(m) - lhs__(m,5,i,j,k) * rhs(5,i,j + 1,k)
enddo
do m = 1,5
rhs(m,i,j,k) = rhs_(m)
enddo
enddo
enddo
enddo
!DVM$ end region
return
end

View File

@@ -0,0 +1,635 @@
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! performs guaussian elimination on this cell.
!
! assumes that unpacking routines for non-first cells
! preload C' and rhs' from previous cell.
!
! assumed send happens outside this routine, but that
! c'(JMAX) and rhs'(JMAX) will be sent to next cell
!---------------------------------------------------------------------
subroutine y_solve ()
include 'header3d.h'
double precision coeff
double precision pivot
integer i__0
integer j__1,m,n
double precision coeff__2
double precision pivot__3
double precision lhs_(5,5,3),rhs_(5),u_(0:3,5)
integer i,j,k,jsize,low_k,high_k,k1,maxBlK
jsize = problem_size - 1
if(mod((problem_size - 2), BL) .eq. 0) then
maxBlK = (problem_size - 2) / BL
else
maxblK = (problem_size - 2) / BL + 1
endif
!ACROSS (rhs(0:0,1:0,0:0,0:0),lhs__(0:0,1:0,0:0,0:0,0:0))
!DVM$ region local(lhs__)
!DVM$ PARALLEL (k1,i) ON rhs(*,i,*,k1), private(u_,j,rhs_,pivot,
!DVM$&coeff,tmp1, tmp2, tmp3, t1, t2, t3, tm1, tm2, tm3, tmp11,
!DVM$&tmp22, i__0, j__1,lhs_,coeff__2,pivot__3,m,n,k,low_k,high_k)
!DVM$&,cuda_block(32)
do k1 = 1, maxBlK
do i = 1, problem_size - 2
low_k = (k1 - 1) * BL + 1
high_k = k1 * BL
if(high_k .gt. problem_size - 2) then
high_k = problem_size - 2
endif
do k = low_k, high_k
do m = 1,5
u_(0,m) = u(m,i,0,k)
u_(1,m) = u(m,i,1,k)
enddo
do j = 1,jsize - 1
do m = 1,5
u_(2,m) = u(m,i,j + 1,k)
enddo
! if(j .ne. jsize) then
tmp1 = 1.0d+00 / u_(1,1)
tmp2 = tmp1 * tmp1
tmp3 = tmp1 * tmp2
t1 = 1.0d+00 / u_(0,1)
t2 = t1 * t1
t3 = t1 * t2
tm1 = 1.0d+00 / u_(2,1)
tm2 = tm1 * tm1
tm3 = tm1 * tm2
tmp11 = dt * ty1
tmp22 = dt * ty2
lhs_(1,1,1) = (-(tmp11)) * dy1
lhs_(1,2,1) = 0.
lhs_(1,3,1) = (-(tmp22))
lhs_(1,4,1) = 0.
lhs_(1,5,1) = 0.
lhs_(2,1,1) = (-(tmp22)) * ((-(u_(0,2) * u_(0,3))) * t2)
&- tmp11 * ((-(c3c4)) * t2 * u_(0,2))
lhs_(2,2,1) = (-(tmp22)) * u_(0,3) * t1 - tmp11 * c3c4 *
&t1 - tmp11 * dy2
lhs_(2,3,1) = (-(tmp22)) * u_(0,2) * t1
lhs_(2,4,1) = 0.
lhs_(2,5,1) = 0.
lhs_(3,1,1) = (-(tmp22)) * ((-(u_(0,3) * u_(0,3) * t2)) +
& 0.50d+00 * c2 * ((u_(0,2) * u_(0,2) + u_(0,3) * u_(0,3) + u_(0,4)
& * u_(0,4)) * t2)) - tmp11 * ((-(con43)) * c3c4 * t2 * u_(0,3))
lhs_(3,2,1) = (-(tmp22)) * ((-(c2)) * u_(0,2) * t1)
lhs_(3,3,1) = (-(tmp22)) * ((2.0d+00 - c2) * u_(0,3) * t1
&) - tmp11 * con43 * c3c4 * t1 - tmp11 * dy3
lhs_(3,4,1) = (-(tmp22)) * ((-(c2)) * u_(0,4) * t1)
lhs_(3,5,1) = (-(tmp22)) * c2
lhs_(4,1,1) = (-(tmp22)) * ((-(u_(0,3) * u_(0,4))) * t2)
&- tmp11 * ((-(c3c4)) * t2 * u_(0,4))
lhs_(4,2,1) = 0.
lhs_(4,3,1) = (-(tmp22)) * u_(0,4) * t1
lhs_(4,4,1) = (-(tmp22)) * u_(0,3) * t1 - tmp11 * c3c4 *
&t1 - tmp11 * dy4
lhs_(4,5,1) = 0.
lhs_(5,1,1) = (-(tmp22)) * ((c2 * (u_(0,2) * u_(0,2) + u_
&(0,3) * u_(0,3) + u_(0,4) * u_(0,4)) * t2 - c1 * u_(0,5) * t1) * u
&_(0,3) * t1) - tmp11 * ((-(c3c4 - c1345)) * t3 * u_(0,2)** 2 - (co
&n43 * c3c4 - c1345) * t3 * u_(0,3)** 2 - (c3c4 - c1345) * t3 * u_(
&0,4)** 2 - c1345 * t2 * u_(0,5))
lhs_(5,2,1) = (-(tmp22)) * ((-(c2)) * u_(0,2) * u_(0,3) *
& t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,2)
lhs_(5,3,1) = (-(tmp22)) * (c1 * u_(0,5) * t1 - 0.50d+00
&* c2 * ((u_(0,2) * u_(0,2) + 3.0d+00 * u_(0,3) * u_(0,3) + u_(0,4)
& * u_(0,4)) * t2)) - tmp11 * (con43 * c3c4 - c1345) * t2 * u_(0,3)
lhs_(5,4,1) = (-(tmp22)) * ((-(c2)) * (u_(0,3) * u_(0,4))
& * t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,4)
lhs_(5,5,1) = (-(tmp22)) * c1 * u_(0,3) * t1 - tmp11 * c1
&345 * t1 - tmp11 * dy5
lhs_(1,1,2) = 1.0d+00 + tmp11 * 2.0d+00 * dy1
lhs_(1,2,2) = 0.
lhs_(1,3,2) = 0.
lhs_(1,4,2) = 0.
lhs_(1,5,2) = 0.
lhs_(2,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1,
&2))
lhs_(2,2,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t
&mp11 * 2.0d+00 * dy2
lhs_(2,3,2) = 0.
lhs_(2,4,2) = 0.
lhs_(2,5,2) = 0.
lhs_(3,1,2) = tmp11 * 2.0d+00 * ((-(con43)) * c3c4 * tmp2
& * u_(1,3))
lhs_(3,2,2) = 0.
lhs_(3,3,2) = 1.0d+00 + tmp11 * 2.0d+00 * con43 * c3c4 *
&tmp1 + tmp11 * 2.0d+00 * dy3
lhs_(3,4,2) = 0.
lhs_(3,5,2) = 0.
lhs_(4,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1,
&4))
lhs_(4,2,2) = 0.
lhs_(4,3,2) = 0.
lhs_(4,4,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t
&mp11 * 2.0d+00 * dy4
lhs_(4,5,2) = 0.
lhs_(5,1,2) = tmp11 * 2.0d+00 * ((-(c3c4 - c1345)) * tmp3
& * u_(1,2)** 2 - (con43 * c3c4 - c1345) * tmp3 * u_(1,3)** 2 - (c3
&c4 - c1345) * tmp3 * u_(1,4)** 2 - c1345 * tmp2 * u_(1,5))
lhs_(5,2,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u
&_(1,2)
lhs_(5,3,2) = tmp11 * 2.0d+00 * (con43 * c3c4 - c1345) *
&tmp2 * u_(1,3)
lhs_(5,4,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u
&_(1,4)
lhs_(5,5,2) = 1.0d+00 + tmp11 * 2.0d+00 * c1345 * tmp1 +
&tmp11 * 2.0d+00 * dy5
if (j .ne. 1) then
do j__1 = 1,5
lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs
&_(1,j__1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,
&j__1,3) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1
&,3)
lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs
&_(1,j__1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,
&j__1,3) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1
&,3)
lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs
&_(1,j__1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,
&j__1,3) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1
&,3)
lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs
&_(1,j__1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,
&j__1,3) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1
&,3)
lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs
&_(1,j__1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,
&j__1,3) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1
&,3)
enddo
endif
lhs_(1,1,3) = (-(tmp11)) * dy1
lhs_(1,2,3) = 0.
lhs_(1,3,3) = tmp22
lhs_(1,4,3) = 0.
lhs_(1,5,3) = 0.
lhs_(2,1,3) = tmp22 * ((-(u_(2,2) * u_(2,3))) * tm2) - tm
&p11 * ((-(c3c4)) * tm2 * u_(2,2))
lhs_(2,2,3) = tmp22 * u_(2,3) * tm1 - tmp11 * c3c4 * tm1
&- tmp11 * dy2
lhs_(2,3,3) = tmp22 * u_(2,2) * tm1
lhs_(2,4,3) = 0.
lhs_(2,5,3) = 0.
lhs_(3,1,3) = tmp22 * ((-(u_(2,3) * u_(2,3) * tm2)) + 0.5
&0d+00 * c2 * ((u_(2,2) * u_(2,2) + u_(2,3) * u_(2,3) + u_(2,4) * u
&_(2,4)) * tm2)) - tmp11 * ((-(con43)) * c3c4 * tm2 * u_(2,3))
lhs_(3,2,3) = tmp22 * ((-(c2)) * u_(2,2) * tm1)
lhs_(3,3,3) = tmp22 * ((2.0d+00 - c2) * u_(2,3) * tm1) -
&tmp11 * con43 * c3c4 * tm1 - tmp11 * dy3
lhs_(3,4,3) = tmp22 * ((-(c2)) * u_(2,4) * tm1)
lhs_(3,5,3) = tmp22 * c2
lhs_(4,1,3) = tmp22 * ((-(u_(2,3) * u_(2,4))) * tm2) - tm
&p11 * ((-(c3c4)) * tm2 * u_(2,4))
lhs_(4,2,3) = 0.
lhs_(4,3,3) = tmp22 * u_(2,4) * tm1
lhs_(4,4,3) = tmp22 * u_(2,3) * tm1 - tmp11 * c3c4 * tm1
&- tmp11 * dy4
lhs_(4,5,3) = 0.
lhs_(5,1,3) = tmp22 * ((c2 * (u_(2,2) * u_(2,2) + u_(2,3)
& * u_(2,3) + u_(2,4) * u_(2,4)) * tm2 - c1 * u_(2,5) * tm1) * u_(2
&,3) * tm1) - tmp11 * ((-(c3c4 - c1345)) * tm3 * u_(2,2)** 2 - (con
&43 * c3c4 - c1345) * tm3 * u_(2,3)** 2 - (c3c4 - c1345) * tm3 * u_
&(2,4)** 2 - c1345 * tm2 * u_(2,5))
lhs_(5,2,3) = tmp22 * ((-(c2)) * u_(2,2) * u_(2,3) * tm2)
& - tmp11 * (c3c4 - c1345) * tm2 * u_(2,2)
lhs_(5,3,3) = tmp22 * (c1 * u_(2,5) * tm1 - 0.50d+00 * c2
& * ((u_(2,2) * u_(2,2) + 3.0d+00 * u_(2,3) * u_(2,3) + u_(2,4) * u
&_(2,4)) * tm2)) - tmp11 * (con43 * c3c4 - c1345) * tm2 * u_(2,3)
lhs_(5,4,3) = tmp22 * ((-(c2)) * (u_(2,3) * u_(2,4)) * tm
&2) - tmp11 * (c3c4 - c1345) * tm2 * u_(2,4)
lhs_(5,5,3) = tmp22 * c1 * u_(2,3) * tm1 - tmp11 * c1345
&* tm1 - tmp11 * dy5
do m = 1,5
rhs_(m) = rhs(m,i,j,k)
enddo
do i__0 = 1,5
rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i,j -
& 1,k) - lhs_(i__0,2,1) * rhs(2,i,j - 1,k) - lhs_(i__0,3,1) * rhs(3
&,i,j - 1,k) - lhs_(i__0,4,1) * rhs(4,i,j - 1,k) - lhs_(i__0,5,1) *
& rhs(5,i,j - 1,k)
enddo
!---------------------------------------------------------------------
!
!---------------------------------------------------------------------
pivot = 1.00d0 / lhs_(1,1,2)
lhs_(1,2,2) = lhs_(1,2,2) * pivot
lhs_(1,3,2) = lhs_(1,3,2) * pivot
lhs_(1,4,2) = lhs_(1,4,2) * pivot
lhs_(1,5,2) = lhs_(1,5,2) * pivot
lhs_(1,1,3) = lhs_(1,1,3) * pivot
lhs_(1,2,3) = lhs_(1,2,3) * pivot
lhs_(1,3,3) = lhs_(1,3,3) * pivot
lhs_(1,4,3) = lhs_(1,4,3) * pivot
lhs_(1,5,3) = lhs_(1,5,3) * pivot
rhs_(1) = rhs_(1) * pivot
coeff = lhs_(2,1,2)
lhs_(2,2,2) = lhs_(2,2,2) - coeff * lhs_(1,2,2)
lhs_(2,3,2) = lhs_(2,3,2) - coeff * lhs_(1,3,2)
lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(1,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(1,5,2)
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(1,1,3)
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(1,2,3)
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(1,3,3)
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(1,4,3)
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(1,5,3)
rhs_(2) = rhs_(2) - coeff * rhs_(1)
coeff = lhs_(3,1,2)
lhs_(3,2,2) = lhs_(3,2,2) - coeff * lhs_(1,2,2)
lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(1,3,2)
lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(1,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(1,5,2)
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(1,1,3)
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(1,2,3)
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(1,3,3)
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(1,4,3)
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(1,5,3)
rhs_(3) = rhs_(3) - coeff * rhs_(1)
coeff = lhs_(4,1,2)
lhs_(4,2,2) = lhs_(4,2,2) - coeff * lhs_(1,2,2)
lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(1,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(1,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(1,5,2)
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(1,1,3)
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(1,2,3)
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(1,3,3)
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(1,4,3)
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(1,5,3)
rhs_(4) = rhs_(4) - coeff * rhs_(1)
coeff = lhs_(5,1,2)
lhs_(5,2,2) = lhs_(5,2,2) - coeff * lhs_(1,2,2)
lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(1,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(1,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(1,5,2)
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(1,1,3)
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(1,2,3)
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(1,3,3)
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(1,4,3)
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(1,5,3)
rhs_(5) = rhs_(5) - coeff * rhs_(1)
pivot = 1.00d0 / lhs_(2,2,2)
lhs_(2,3,2) = lhs_(2,3,2) * pivot
lhs_(2,4,2) = lhs_(2,4,2) * pivot
lhs_(2,5,2) = lhs_(2,5,2) * pivot
lhs_(2,1,3) = lhs_(2,1,3) * pivot
lhs_(2,2,3) = lhs_(2,2,3) * pivot
lhs_(2,3,3) = lhs_(2,3,3) * pivot
lhs_(2,4,3) = lhs_(2,4,3) * pivot
lhs_(2,5,3) = lhs_(2,5,3) * pivot
rhs_(2) = rhs_(2) * pivot
coeff = lhs_(1,2,2)
lhs_(1,3,2) = lhs_(1,3,2) - coeff * lhs_(2,3,2)
lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(2,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(2,5,2)
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(2,1,3)
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(2,2,3)
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(2,3,3)
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(2,4,3)
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(2,5,3)
rhs_(1) = rhs_(1) - coeff * rhs_(2)
coeff = lhs_(3,2,2)
lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(2,3,2)
lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(2,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(2,5,2)
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(2,1,3)
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(2,2,3)
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(2,3,3)
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(2,4,3)
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(2,5,3)
rhs_(3) = rhs_(3) - coeff * rhs_(2)
coeff = lhs_(4,2,2)
lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(2,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(2,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(2,5,2)
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(2,1,3)
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(2,2,3)
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(2,3,3)
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(2,4,3)
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(2,5,3)
rhs_(4) = rhs_(4) - coeff * rhs_(2)
coeff = lhs_(5,2,2)
lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(2,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(2,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(2,5,2)
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(2,1,3)
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(2,2,3)
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(2,3,3)
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(2,4,3)
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(2,5,3)
rhs_(5) = rhs_(5) - coeff * rhs_(2)
pivot = 1.00d0 / lhs_(3,3,2)
lhs_(3,4,2) = lhs_(3,4,2) * pivot
lhs_(3,5,2) = lhs_(3,5,2) * pivot
lhs_(3,1,3) = lhs_(3,1,3) * pivot
lhs_(3,2,3) = lhs_(3,2,3) * pivot
lhs_(3,3,3) = lhs_(3,3,3) * pivot
lhs_(3,4,3) = lhs_(3,4,3) * pivot
lhs_(3,5,3) = lhs_(3,5,3) * pivot
rhs_(3) = rhs_(3) * pivot
coeff = lhs_(1,3,2)
lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(3,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(3,5,2)
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(3,1,3)
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(3,2,3)
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(3,3,3)
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(3,4,3)
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(3,5,3)
rhs_(1) = rhs_(1) - coeff * rhs_(3)
coeff = lhs_(2,3,2)
lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(3,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(3,5,2)
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(3,1,3)
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(3,2,3)
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(3,3,3)
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(3,4,3)
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(3,5,3)
rhs_(2) = rhs_(2) - coeff * rhs_(3)
coeff = lhs_(4,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(3,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(3,5,2)
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(3,1,3)
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(3,2,3)
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(3,3,3)
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(3,4,3)
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(3,5,3)
rhs_(4) = rhs_(4) - coeff * rhs_(3)
coeff = lhs_(5,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(3,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(3,5,2)
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(3,1,3)
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(3,2,3)
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(3,3,3)
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(3,4,3)
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(3,5,3)
rhs_(5) = rhs_(5) - coeff * rhs_(3)
pivot = 1.00d0 / lhs_(4,4,2)
lhs_(4,5,2) = lhs_(4,5,2) * pivot
lhs_(4,1,3) = lhs_(4,1,3) * pivot
lhs_(4,2,3) = lhs_(4,2,3) * pivot
lhs_(4,3,3) = lhs_(4,3,3) * pivot
lhs_(4,4,3) = lhs_(4,4,3) * pivot
lhs_(4,5,3) = lhs_(4,5,3) * pivot
rhs_(4) = rhs_(4) * pivot
coeff = lhs_(1,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(4,5,2)
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(4,1,3)
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(4,2,3)
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(4,3,3)
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(4,4,3)
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(4,5,3)
rhs_(1) = rhs_(1) - coeff * rhs_(4)
coeff = lhs_(2,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(4,5,2)
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(4,1,3)
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(4,2,3)
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(4,3,3)
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(4,4,3)
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(4,5,3)
rhs_(2) = rhs_(2) - coeff * rhs_(4)
coeff = lhs_(3,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(4,5,2)
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(4,1,3)
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(4,2,3)
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(4,3,3)
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(4,4,3)
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(4,5,3)
rhs_(3) = rhs_(3) - coeff * rhs_(4)
coeff = lhs_(5,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(4,5,2)
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(4,1,3)
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(4,2,3)
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(4,3,3)
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(4,4,3)
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(4,5,3)
rhs_(5) = rhs_(5) - coeff * rhs_(4)
pivot = 1.00d0 / lhs_(5,5,2)
lhs_(5,1,3) = lhs_(5,1,3) * pivot
lhs_(5,2,3) = lhs_(5,2,3) * pivot
lhs_(5,3,3) = lhs_(5,3,3) * pivot
lhs_(5,4,3) = lhs_(5,4,3) * pivot
lhs_(5,5,3) = lhs_(5,5,3) * pivot
rhs_(5) = rhs_(5) * pivot
coeff = lhs_(1,5,2)
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(5,1,3)
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(5,2,3)
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(5,3,3)
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(5,4,3)
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(5,5,3)
rhs_(1) = rhs_(1) - coeff * rhs_(5)
coeff = lhs_(2,5,2)
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(5,1,3)
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(5,2,3)
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(5,3,3)
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(5,4,3)
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(5,5,3)
rhs_(2) = rhs_(2) - coeff * rhs_(5)
coeff = lhs_(3,5,2)
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(5,1,3)
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(5,2,3)
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(5,3,3)
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(5,4,3)
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(5,5,3)
rhs_(3) = rhs_(3) - coeff * rhs_(5)
coeff = lhs_(4,5,2)
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(5,1,3)
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(5,2,3)
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(5,3,3)
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(5,4,3)
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(5,5,3)
rhs_(4) = rhs_(4) - coeff * rhs_(5)
do i__0 = 1,5
lhs__(i__0,1,i,j,k1) = lhs_(i__0,1,3)
lhs__(i__0,2,i,j,k1) = lhs_(i__0,2,3)
lhs__(i__0,3,i,j,k1) = lhs_(i__0,3,3)
lhs__(i__0,4,i,j,k1) = lhs_(i__0,4,3)
lhs__(i__0,5,i,j,k1) = lhs_(i__0,5,3)
enddo
do m = 1,5
rhs(m,i,j,k) = rhs_(m)
u_(0,m) = u_(1,m)
u_(1,m) = u_(2,m)
enddo
enddo
! else ! ******************* else case *************************
do n = 1,5
lhs_(1,n,1) = 0.0d0
lhs_(1,n,2) = 0.0d0
lhs_(1,n,3) = 0.0d0
lhs_(2,n,1) = 0.0d0
lhs_(2,n,2) = 0.0d0
lhs_(2,n,3) = 0.0d0
lhs_(3,n,1) = 0.0d0
lhs_(3,n,2) = 0.0d0
lhs_(3,n,3) = 0.0d0
lhs_(4,n,1) = 0.0d0
lhs_(4,n,2) = 0.0d0
lhs_(4,n,3) = 0.0d0
lhs_(5,n,1) = 0.0d0
lhs_(5,n,2) = 0.0d0
lhs_(5,n,3) = 0.0d0
enddo
do m = 1,5
lhs_(m,m,2) = 1.0d0
enddo
do m = 1,5
rhs_(m) = rhs(m,i,j,k)
enddo
do i__0 = 1,5
rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i,jsize
&- 1,k) - lhs_(i__0,2,1) * rhs(2,i,jsize - 1,k) - lhs_(i__0,3,1) *
&rhs(3,i,jsize - 1,k) - lhs_(i__0,4,1) * rhs(4,i,jsize - 1,k) - lhs
&_(i__0,5,1) * rhs(5,i,jsize - 1,k)
enddo
do j__1 = 1,5
lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs_(1,j_
&_1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,j__1,3
&) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1,3)
lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs_(1,j_
&_1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,j__1,3
&) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1,3)
lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs_(1,j_
&_1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,j__1,3
&) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1,3)
lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs_(1,j_
&_1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,j__1,3
&) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1,3)
lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs_(1,j_
&_1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,j__1,3
&) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1,3)
enddo
pivot__3 = 1.00d0 / lhs_(1,1,2)
lhs_(1,2,2) = lhs_(1,2,2) * pivot__3
lhs_(1,3,2) = lhs_(1,3,2) * pivot__3
lhs_(1,4,2) = lhs_(1,4,2) * pivot__3
lhs_(1,5,2) = lhs_(1,5,2) * pivot__3
rhs_(1) = rhs_(1) * pivot__3
coeff__2 = lhs_(2,1,2)
lhs_(2,2,2) = lhs_(2,2,2) - coeff__2 * lhs_(1,2,2)
lhs_(2,3,2) = lhs_(2,3,2) - coeff__2 * lhs_(1,3,2)
lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(1,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(1,5,2)
rhs_(2) = rhs_(2) - coeff__2 * rhs_(1)
coeff__2 = lhs_(3,1,2)
lhs_(3,2,2) = lhs_(3,2,2) - coeff__2 * lhs_(1,2,2)
lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(1,3,2)
lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(1,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(1,5,2)
rhs_(3) = rhs_(3) - coeff__2 * rhs_(1)
coeff__2 = lhs_(4,1,2)
lhs_(4,2,2) = lhs_(4,2,2) - coeff__2 * lhs_(1,2,2)
lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(1,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(1,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(1,5,2)
rhs_(4) = rhs_(4) - coeff__2 * rhs_(1)
coeff__2 = lhs_(5,1,2)
lhs_(5,2,2) = lhs_(5,2,2) - coeff__2 * lhs_(1,2,2)
lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(1,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(1,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(1,5,2)
rhs_(5) = rhs_(5) - coeff__2 * rhs_(1)
pivot__3 = 1.00d0 / lhs_(2,2,2)
lhs_(2,3,2) = lhs_(2,3,2) * pivot__3
lhs_(2,4,2) = lhs_(2,4,2) * pivot__3
lhs_(2,5,2) = lhs_(2,5,2) * pivot__3
rhs_(2) = rhs_(2) * pivot__3
coeff__2 = lhs_(1,2,2)
lhs_(1,3,2) = lhs_(1,3,2) - coeff__2 * lhs_(2,3,2)
lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(2,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(2,5,2)
rhs_(1) = rhs_(1) - coeff__2 * rhs_(2)
coeff__2 = lhs_(3,2,2)
lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(2,3,2)
lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(2,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(2,5,2)
rhs_(3) = rhs_(3) - coeff__2 * rhs_(2)
coeff__2 = lhs_(4,2,2)
lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(2,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(2,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(2,5,2)
rhs_(4) = rhs_(4) - coeff__2 * rhs_(2)
coeff__2 = lhs_(5,2,2)
lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(2,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(2,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(2,5,2)
rhs_(5) = rhs_(5) - coeff__2 * rhs_(2)
pivot__3 = 1.00d0 / lhs_(3,3,2)
lhs_(3,4,2) = lhs_(3,4,2) * pivot__3
lhs_(3,5,2) = lhs_(3,5,2) * pivot__3
rhs_(3) = rhs_(3) * pivot__3
coeff__2 = lhs_(1,3,2)
lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(3,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(3,5,2)
rhs_(1) = rhs_(1) - coeff__2 * rhs_(3)
coeff__2 = lhs_(2,3,2)
lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(3,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(3,5,2)
rhs_(2) = rhs_(2) - coeff__2 * rhs_(3)
coeff__2 = lhs_(4,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(3,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(3,5,2)
rhs_(4) = rhs_(4) - coeff__2 * rhs_(3)
coeff__2 = lhs_(5,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(3,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(3,5,2)
rhs_(5) = rhs_(5) - coeff__2 * rhs_(3)
pivot__3 = 1.00d0 / lhs_(4,4,2)
lhs_(4,5,2) = lhs_(4,5,2) * pivot__3
rhs_(4) = rhs_(4) * pivot__3
coeff__2 = lhs_(1,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(4,5,2)
rhs_(1) = rhs_(1) - coeff__2 * rhs_(4)
coeff__2 = lhs_(2,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(4,5,2)
rhs_(2) = rhs_(2) - coeff__2 * rhs_(4)
coeff__2 = lhs_(3,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(4,5,2)
rhs_(3) = rhs_(3) - coeff__2 * rhs_(4)
coeff__2 = lhs_(5,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(4,5,2)
rhs_(5) = rhs_(5) - coeff__2 * rhs_(4)
pivot__3 = 1.00d0 / lhs_(5,5,2)
rhs_(5) = rhs_(5) * pivot__3
coeff__2 = lhs_(1,5,2)
rhs_(1) = rhs_(1) - coeff__2 * rhs_(5)
coeff__2 = lhs_(2,5,2)
rhs_(2) = rhs_(2) - coeff__2 * rhs_(5)
coeff__2 = lhs_(3,5,2)
rhs_(3) = rhs_(3) - coeff__2 * rhs_(5)
coeff__2 = lhs_(4,5,2)
rhs_(4) = rhs_(4) - coeff__2 * rhs_(5)
! endif
do m = 1,5
rhs(m,i,j,k) = rhs_(m)
enddo
! enddo
do j = problem_size - 2,0,(-(1))
do m = 1,5
rhs_(m) = rhs(m,i,j,k)
enddo
do m = 1,5
rhs_(m) = rhs_(m) - lhs__(m,1,i,j,k1) * rhs(1,i,j + 1,k)
rhs_(m) = rhs_(m) - lhs__(m,2,i,j,k1) * rhs(2,i,j + 1,k)
rhs_(m) = rhs_(m) - lhs__(m,3,i,j,k1) * rhs(3,i,j + 1,k)
rhs_(m) = rhs_(m) - lhs__(m,4,i,j,k1) * rhs(4,i,j + 1,k)
rhs_(m) = rhs_(m) - lhs__(m,5,i,j,k1) * rhs(5,i,j + 1,k)
enddo
do m = 1,5
rhs(m,i,j,k) = rhs_(m)
enddo
enddo
enddo
enddo
enddo
!DVM$ end region
return
end

View File

@@ -0,0 +1,634 @@
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! performs guaussian elimination on this cell.
!
! assumes that unpacking routines for non-first cells
! preload C' and rhs' from previous cell.
!
! assumed send happens outside this routine, but that
! c'(JMAX) and rhs'(JMAX) will be sent to next cell
!---------------------------------------------------------------------
subroutine y_solve ()
include 'header3d.h'
double precision coeff
double precision pivot
integer i__0
integer j__1,m,n
double precision coeff__2
double precision pivot__3
double precision lhs_(5,5,3),rhs_(5),rhsp_(5),u_(0:3,5)
integer i,j,k,jsize,jstart
jstart = 0
jsize = problem_size - 1
!DVM$ region local(lhs__)
!DVM$ PARALLEL (k,j,i) ON rhs(*,i,j,k), private(u_,rhs_,pivot,
!DVM$&coeff,tmp1, tmp2, tmp3, t1, t2, t3, tm1, tm2, tm3, tmp11,
!DVM$&tmp22, i__0, j__1,lhs_,coeff__2,pivot__3,m,n),stage(stage_n),
!DVM$& ACROSS(rhs(0:0,0:0,1:0,0:0),lhs__(0:0,0:0,0:0,1:0,0:0))
do k = 1,problem_size - 2
do j = 1,jsize - 1
do i = 1,problem_size - 2
if(j .ne. jsize) then
do m = 1,5
lhs_(m,1,3) = lhs__(m,1,i,j-1,k)
lhs_(m,2,3) = lhs__(m,2,i,j-1,k)
lhs_(m,3,3) = lhs__(m,3,i,j-1,k)
lhs_(m,4,3) = lhs__(m,4,i,j-1,k)
lhs_(m,5,3) = lhs__(m,5,i,j-1,k)
u_(0,m) = u(m,i,j-1,k)
u_(1,m) = u(m,i,j,k)
u_(2,m) = u(m,i,j+1,k)
enddo
tmp1 = 1.0d+00 / u_(1,1)
tmp2 = tmp1 * tmp1
tmp3 = tmp1 * tmp2
t1 = 1.0d+00 / u_(0,1)
t2 = t1 * t1
t3 = t1 * t2
tm1 = 1.0d+00 / u_(2,1)
tm2 = tm1 * tm1
tm3 = tm1 * tm2
tmp11 = dt * ty1
tmp22 = dt * ty2
lhs_(1,1,1) = (-(tmp11)) * dy1
lhs_(1,2,1) = 0.
lhs_(1,3,1) = (-(tmp22))
lhs_(1,4,1) = 0.
lhs_(1,5,1) = 0.
lhs_(2,1,1) = (-(tmp22)) * ((-(u_(0,2) * u_(0,3))) * t2)
&- tmp11 * ((-(c3c4)) * t2 * u_(0,2))
lhs_(2,2,1) = (-(tmp22)) * u_(0,3) * t1 - tmp11 * c3c4 *
&t1 - tmp11 * dy2
lhs_(2,3,1) = (-(tmp22)) * u_(0,2) * t1
lhs_(2,4,1) = 0.
lhs_(2,5,1) = 0.
lhs_(3,1,1) = (-(tmp22)) * ((-(u_(0,3) * u_(0,3) * t2)) +
& 0.50d+00 * c2 * ((u_(0,2) * u_(0,2) + u_(0,3) * u_(0,3) + u_(0,4)
& * u_(0,4)) * t2)) - tmp11 * ((-(con43)) * c3c4 * t2 * u_(0,3))
lhs_(3,2,1) = (-(tmp22)) * ((-(c2)) * u_(0,2) * t1)
lhs_(3,3,1) = (-(tmp22)) * ((2.0d+00 - c2) * u_(0,3) * t1
&) - tmp11 * con43 * c3c4 * t1 - tmp11 * dy3
lhs_(3,4,1) = (-(tmp22)) * ((-(c2)) * u_(0,4) * t1)
lhs_(3,5,1) = (-(tmp22)) * c2
lhs_(4,1,1) = (-(tmp22)) * ((-(u_(0,3) * u_(0,4))) * t2)
&- tmp11 * ((-(c3c4)) * t2 * u_(0,4))
lhs_(4,2,1) = 0.
lhs_(4,3,1) = (-(tmp22)) * u_(0,4) * t1
lhs_(4,4,1) = (-(tmp22)) * u_(0,3) * t1 - tmp11 * c3c4 *
&t1 - tmp11 * dy4
lhs_(4,5,1) = 0.
lhs_(5,1,1) = (-(tmp22)) * ((c2 * (u_(0,2) * u_(0,2) + u_
&(0,3) * u_(0,3) + u_(0,4) * u_(0,4)) * t2 - c1 * u_(0,5) * t1) * u
&_(0,3) * t1) - tmp11 * ((-(c3c4 - c1345)) * t3 * u_(0,2)** 2 - (co
&n43 * c3c4 - c1345) * t3 * u_(0,3)** 2 - (c3c4 - c1345) * t3 * u_(
&0,4)** 2 - c1345 * t2 * u_(0,5))
lhs_(5,2,1) = (-(tmp22)) * ((-(c2)) * u_(0,2) * u_(0,3) *
& t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,2)
lhs_(5,3,1) = (-(tmp22)) * (c1 * u_(0,5) * t1 - 0.50d+00
&* c2 * ((u_(0,2) * u_(0,2) + 3.0d+00 * u_(0,3) * u_(0,3) + u_(0,4)
& * u_(0,4)) * t2)) - tmp11 * (con43 * c3c4 - c1345) * t2 * u_(0,3)
lhs_(5,4,1) = (-(tmp22)) * ((-(c2)) * (u_(0,3) * u_(0,4))
& * t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,4)
lhs_(5,5,1) = (-(tmp22)) * c1 * u_(0,3) * t1 - tmp11 * c1
&345 * t1 - tmp11 * dy5
lhs_(1,1,2) = 1.0d+00 + tmp11 * 2.0d+00 * dy1
lhs_(1,2,2) = 0.
lhs_(1,3,2) = 0.
lhs_(1,4,2) = 0.
lhs_(1,5,2) = 0.
lhs_(2,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1,
&2))
lhs_(2,2,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t
&mp11 * 2.0d+00 * dy2
lhs_(2,3,2) = 0.
lhs_(2,4,2) = 0.
lhs_(2,5,2) = 0.
lhs_(3,1,2) = tmp11 * 2.0d+00 * ((-(con43)) * c3c4 * tmp2
& * u_(1,3))
lhs_(3,2,2) = 0.
lhs_(3,3,2) = 1.0d+00 + tmp11 * 2.0d+00 * con43 * c3c4 *
&tmp1 + tmp11 * 2.0d+00 * dy3
lhs_(3,4,2) = 0.
lhs_(3,5,2) = 0.
lhs_(4,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1,
&4))
lhs_(4,2,2) = 0.
lhs_(4,3,2) = 0.
lhs_(4,4,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t
&mp11 * 2.0d+00 * dy4
lhs_(4,5,2) = 0.
lhs_(5,1,2) = tmp11 * 2.0d+00 * ((-(c3c4 - c1345)) * tmp3
& * u_(1,2)** 2 - (con43 * c3c4 - c1345) * tmp3 * u_(1,3)** 2 - (c3
&c4 - c1345) * tmp3 * u_(1,4)** 2 - c1345 * tmp2 * u_(1,5))
lhs_(5,2,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u
&_(1,2)
lhs_(5,3,2) = tmp11 * 2.0d+00 * (con43 * c3c4 - c1345) *
&tmp2 * u_(1,3)
lhs_(5,4,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u
&_(1,4)
lhs_(5,5,2) = 1.0d+00 + tmp11 * 2.0d+00 * c1345 * tmp1 +
&tmp11 * 2.0d+00 * dy5
if (j .ne. 1) then
do j__1 = 1,5
lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs
&_(1,j__1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,
&j__1,3) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1
&,3)
lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs
&_(1,j__1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,
&j__1,3) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1
&,3)
lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs
&_(1,j__1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,
&j__1,3) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1
&,3)
lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs
&_(1,j__1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,
&j__1,3) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1
&,3)
lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs
&_(1,j__1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,
&j__1,3) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1
&,3)
enddo
endif
lhs_(1,1,3) = (-(tmp11)) * dy1
lhs_(1,2,3) = 0.
lhs_(1,3,3) = tmp22
lhs_(1,4,3) = 0.
lhs_(1,5,3) = 0.
lhs_(2,1,3) = tmp22 * ((-(u_(2,2) * u_(2,3))) * tm2) - tm
&p11 * ((-(c3c4)) * tm2 * u_(2,2))
lhs_(2,2,3) = tmp22 * u_(2,3) * tm1 - tmp11 * c3c4 * tm1
&- tmp11 * dy2
lhs_(2,3,3) = tmp22 * u_(2,2) * tm1
lhs_(2,4,3) = 0.
lhs_(2,5,3) = 0.
lhs_(3,1,3) = tmp22 * ((-(u_(2,3) * u_(2,3) * tm2)) + 0.5
&0d+00 * c2 * ((u_(2,2) * u_(2,2) + u_(2,3) * u_(2,3) + u_(2,4) * u
&_(2,4)) * tm2)) - tmp11 * ((-(con43)) * c3c4 * tm2 * u_(2,3))
lhs_(3,2,3) = tmp22 * ((-(c2)) * u_(2,2) * tm1)
lhs_(3,3,3) = tmp22 * ((2.0d+00 - c2) * u_(2,3) * tm1) -
&tmp11 * con43 * c3c4 * tm1 - tmp11 * dy3
lhs_(3,4,3) = tmp22 * ((-(c2)) * u_(2,4) * tm1)
lhs_(3,5,3) = tmp22 * c2
lhs_(4,1,3) = tmp22 * ((-(u_(2,3) * u_(2,4))) * tm2) - tm
&p11 * ((-(c3c4)) * tm2 * u_(2,4))
lhs_(4,2,3) = 0.
lhs_(4,3,3) = tmp22 * u_(2,4) * tm1
lhs_(4,4,3) = tmp22 * u_(2,3) * tm1 - tmp11 * c3c4 * tm1
&- tmp11 * dy4
lhs_(4,5,3) = 0.
lhs_(5,1,3) = tmp22 * ((c2 * (u_(2,2) * u_(2,2) + u_(2,3)
& * u_(2,3) + u_(2,4) * u_(2,4)) * tm2 - c1 * u_(2,5) * tm1) * u_(2
&,3) * tm1) - tmp11 * ((-(c3c4 - c1345)) * tm3 * u_(2,2)** 2 - (con
&43 * c3c4 - c1345) * tm3 * u_(2,3)** 2 - (c3c4 - c1345) * tm3 * u_
&(2,4)** 2 - c1345 * tm2 * u_(2,5))
lhs_(5,2,3) = tmp22 * ((-(c2)) * u_(2,2) * u_(2,3) * tm2)
& - tmp11 * (c3c4 - c1345) * tm2 * u_(2,2)
lhs_(5,3,3) = tmp22 * (c1 * u_(2,5) * tm1 - 0.50d+00 * c2
& * ((u_(2,2) * u_(2,2) + 3.0d+00 * u_(2,3) * u_(2,3) + u_(2,4) * u
&_(2,4)) * tm2)) - tmp11 * (con43 * c3c4 - c1345) * tm2 * u_(2,3)
lhs_(5,4,3) = tmp22 * ((-(c2)) * (u_(2,3) * u_(2,4)) * tm
&2) - tmp11 * (c3c4 - c1345) * tm2 * u_(2,4)
lhs_(5,5,3) = tmp22 * c1 * u_(2,3) * tm1 - tmp11 * c1345
&* tm1 - tmp11 * dy5
do m = 1,5
rhs_(m) = rhs(m,i,j,k)
enddo
do i__0 = 1,5
rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i,j -
& 1,k) - lhs_(i__0,2,1) * rhs(2,i,j - 1,k) - lhs_(i__0,3,1) * rhs(3
&,i,j - 1,k) - lhs_(i__0,4,1) * rhs(4,i,j - 1,k) - lhs_(i__0,5,1) *
& rhs(5,i,j - 1,k)
enddo
!---------------------------------------------------------------------
!
!---------------------------------------------------------------------
pivot = 1.00d0 / lhs_(1,1,2)
lhs_(1,2,2) = lhs_(1,2,2) * pivot
lhs_(1,3,2) = lhs_(1,3,2) * pivot
lhs_(1,4,2) = lhs_(1,4,2) * pivot
lhs_(1,5,2) = lhs_(1,5,2) * pivot
lhs_(1,1,3) = lhs_(1,1,3) * pivot
lhs_(1,2,3) = lhs_(1,2,3) * pivot
lhs_(1,3,3) = lhs_(1,3,3) * pivot
lhs_(1,4,3) = lhs_(1,4,3) * pivot
lhs_(1,5,3) = lhs_(1,5,3) * pivot
rhs_(1) = rhs_(1) * pivot
coeff = lhs_(2,1,2)
lhs_(2,2,2) = lhs_(2,2,2) - coeff * lhs_(1,2,2)
lhs_(2,3,2) = lhs_(2,3,2) - coeff * lhs_(1,3,2)
lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(1,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(1,5,2)
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(1,1,3)
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(1,2,3)
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(1,3,3)
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(1,4,3)
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(1,5,3)
rhs_(2) = rhs_(2) - coeff * rhs_(1)
coeff = lhs_(3,1,2)
lhs_(3,2,2) = lhs_(3,2,2) - coeff * lhs_(1,2,2)
lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(1,3,2)
lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(1,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(1,5,2)
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(1,1,3)
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(1,2,3)
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(1,3,3)
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(1,4,3)
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(1,5,3)
rhs_(3) = rhs_(3) - coeff * rhs_(1)
coeff = lhs_(4,1,2)
lhs_(4,2,2) = lhs_(4,2,2) - coeff * lhs_(1,2,2)
lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(1,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(1,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(1,5,2)
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(1,1,3)
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(1,2,3)
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(1,3,3)
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(1,4,3)
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(1,5,3)
rhs_(4) = rhs_(4) - coeff * rhs_(1)
coeff = lhs_(5,1,2)
lhs_(5,2,2) = lhs_(5,2,2) - coeff * lhs_(1,2,2)
lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(1,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(1,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(1,5,2)
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(1,1,3)
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(1,2,3)
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(1,3,3)
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(1,4,3)
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(1,5,3)
rhs_(5) = rhs_(5) - coeff * rhs_(1)
pivot = 1.00d0 / lhs_(2,2,2)
lhs_(2,3,2) = lhs_(2,3,2) * pivot
lhs_(2,4,2) = lhs_(2,4,2) * pivot
lhs_(2,5,2) = lhs_(2,5,2) * pivot
lhs_(2,1,3) = lhs_(2,1,3) * pivot
lhs_(2,2,3) = lhs_(2,2,3) * pivot
lhs_(2,3,3) = lhs_(2,3,3) * pivot
lhs_(2,4,3) = lhs_(2,4,3) * pivot
lhs_(2,5,3) = lhs_(2,5,3) * pivot
rhs_(2) = rhs_(2) * pivot
coeff = lhs_(1,2,2)
lhs_(1,3,2) = lhs_(1,3,2) - coeff * lhs_(2,3,2)
lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(2,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(2,5,2)
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(2,1,3)
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(2,2,3)
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(2,3,3)
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(2,4,3)
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(2,5,3)
rhs_(1) = rhs_(1) - coeff * rhs_(2)
coeff = lhs_(3,2,2)
lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(2,3,2)
lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(2,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(2,5,2)
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(2,1,3)
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(2,2,3)
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(2,3,3)
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(2,4,3)
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(2,5,3)
rhs_(3) = rhs_(3) - coeff * rhs_(2)
coeff = lhs_(4,2,2)
lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(2,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(2,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(2,5,2)
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(2,1,3)
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(2,2,3)
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(2,3,3)
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(2,4,3)
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(2,5,3)
rhs_(4) = rhs_(4) - coeff * rhs_(2)
coeff = lhs_(5,2,2)
lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(2,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(2,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(2,5,2)
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(2,1,3)
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(2,2,3)
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(2,3,3)
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(2,4,3)
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(2,5,3)
rhs_(5) = rhs_(5) - coeff * rhs_(2)
pivot = 1.00d0 / lhs_(3,3,2)
lhs_(3,4,2) = lhs_(3,4,2) * pivot
lhs_(3,5,2) = lhs_(3,5,2) * pivot
lhs_(3,1,3) = lhs_(3,1,3) * pivot
lhs_(3,2,3) = lhs_(3,2,3) * pivot
lhs_(3,3,3) = lhs_(3,3,3) * pivot
lhs_(3,4,3) = lhs_(3,4,3) * pivot
lhs_(3,5,3) = lhs_(3,5,3) * pivot
rhs_(3) = rhs_(3) * pivot
coeff = lhs_(1,3,2)
lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(3,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(3,5,2)
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(3,1,3)
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(3,2,3)
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(3,3,3)
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(3,4,3)
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(3,5,3)
rhs_(1) = rhs_(1) - coeff * rhs_(3)
coeff = lhs_(2,3,2)
lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(3,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(3,5,2)
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(3,1,3)
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(3,2,3)
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(3,3,3)
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(3,4,3)
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(3,5,3)
rhs_(2) = rhs_(2) - coeff * rhs_(3)
coeff = lhs_(4,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(3,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(3,5,2)
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(3,1,3)
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(3,2,3)
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(3,3,3)
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(3,4,3)
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(3,5,3)
rhs_(4) = rhs_(4) - coeff * rhs_(3)
coeff = lhs_(5,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(3,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(3,5,2)
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(3,1,3)
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(3,2,3)
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(3,3,3)
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(3,4,3)
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(3,5,3)
rhs_(5) = rhs_(5) - coeff * rhs_(3)
pivot = 1.00d0 / lhs_(4,4,2)
lhs_(4,5,2) = lhs_(4,5,2) * pivot
lhs_(4,1,3) = lhs_(4,1,3) * pivot
lhs_(4,2,3) = lhs_(4,2,3) * pivot
lhs_(4,3,3) = lhs_(4,3,3) * pivot
lhs_(4,4,3) = lhs_(4,4,3) * pivot
lhs_(4,5,3) = lhs_(4,5,3) * pivot
rhs_(4) = rhs_(4) * pivot
coeff = lhs_(1,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(4,5,2)
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(4,1,3)
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(4,2,3)
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(4,3,3)
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(4,4,3)
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(4,5,3)
rhs_(1) = rhs_(1) - coeff * rhs_(4)
coeff = lhs_(2,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(4,5,2)
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(4,1,3)
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(4,2,3)
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(4,3,3)
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(4,4,3)
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(4,5,3)
rhs_(2) = rhs_(2) - coeff * rhs_(4)
coeff = lhs_(3,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(4,5,2)
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(4,1,3)
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(4,2,3)
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(4,3,3)
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(4,4,3)
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(4,5,3)
rhs_(3) = rhs_(3) - coeff * rhs_(4)
coeff = lhs_(5,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(4,5,2)
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(4,1,3)
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(4,2,3)
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(4,3,3)
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(4,4,3)
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(4,5,3)
rhs_(5) = rhs_(5) - coeff * rhs_(4)
pivot = 1.00d0 / lhs_(5,5,2)
lhs_(5,1,3) = lhs_(5,1,3) * pivot
lhs_(5,2,3) = lhs_(5,2,3) * pivot
lhs_(5,3,3) = lhs_(5,3,3) * pivot
lhs_(5,4,3) = lhs_(5,4,3) * pivot
lhs_(5,5,3) = lhs_(5,5,3) * pivot
rhs_(5) = rhs_(5) * pivot
coeff = lhs_(1,5,2)
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(5,1,3)
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(5,2,3)
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(5,3,3)
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(5,4,3)
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(5,5,3)
rhs_(1) = rhs_(1) - coeff * rhs_(5)
coeff = lhs_(2,5,2)
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(5,1,3)
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(5,2,3)
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(5,3,3)
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(5,4,3)
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(5,5,3)
rhs_(2) = rhs_(2) - coeff * rhs_(5)
coeff = lhs_(3,5,2)
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(5,1,3)
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(5,2,3)
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(5,3,3)
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(5,4,3)
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(5,5,3)
rhs_(3) = rhs_(3) - coeff * rhs_(5)
coeff = lhs_(4,5,2)
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(5,1,3)
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(5,2,3)
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(5,3,3)
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(5,4,3)
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(5,5,3)
rhs_(4) = rhs_(4) - coeff * rhs_(5)
do i__0 = 1,5
lhs__(i__0,1,i,j,k) = lhs_(i__0,1,3)
lhs__(i__0,2,i,j,k) = lhs_(i__0,2,3)
lhs__(i__0,3,i,j,k) = lhs_(i__0,3,3)
lhs__(i__0,4,i,j,k) = lhs_(i__0,4,3)
lhs__(i__0,5,i,j,k) = lhs_(i__0,5,3)
enddo
do m = 1,5
rhs(m,i,j,k) = rhs_(m)
enddo
! enddo
else ! ******************* else case *************************
do n = 1,5
lhs_(1,n,1) = 0.0d0
lhs_(1,n,2) = 0.0d0
lhs_(1,n,3) = 0.0d0
lhs_(2,n,1) = 0.0d0
lhs_(2,n,2) = 0.0d0
lhs_(2,n,3) = 0.0d0
lhs_(3,n,1) = 0.0d0
lhs_(3,n,2) = 0.0d0
lhs_(3,n,3) = 0.0d0
lhs_(4,n,1) = 0.0d0
lhs_(4,n,2) = 0.0d0
lhs_(4,n,3) = 0.0d0
lhs_(5,n,1) = 0.0d0
lhs_(5,n,2) = 0.0d0
lhs_(5,n,3) = 0.0d0
enddo
do m = 1,5
lhs_(m,m,2) = 1.0d0
enddo
do m = 1,5
rhs_(m) = rhs(m,i,j,k)
enddo
do i__0 = 1,5
rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i,jsize
&- 1,k) - lhs_(i__0,2,1) * rhs(2,i,jsize - 1,k) - lhs_(i__0,3,1) *
&rhs(3,i,jsize - 1,k) - lhs_(i__0,4,1) * rhs(4,i,jsize - 1,k) - lhs
&_(i__0,5,1) * rhs(5,i,jsize - 1,k)
enddo
do j__1 = 1,5
lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs_(1,j_
&_1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,j__1,3
&) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1,3)
lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs_(1,j_
&_1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,j__1,3
&) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1,3)
lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs_(1,j_
&_1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,j__1,3
&) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1,3)
lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs_(1,j_
&_1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,j__1,3
&) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1,3)
lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs_(1,j_
&_1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,j__1,3
&) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1,3)
enddo
pivot__3 = 1.00d0 / lhs_(1,1,2)
lhs_(1,2,2) = lhs_(1,2,2) * pivot__3
lhs_(1,3,2) = lhs_(1,3,2) * pivot__3
lhs_(1,4,2) = lhs_(1,4,2) * pivot__3
lhs_(1,5,2) = lhs_(1,5,2) * pivot__3
rhs_(1) = rhs_(1) * pivot__3
coeff__2 = lhs_(2,1,2)
lhs_(2,2,2) = lhs_(2,2,2) - coeff__2 * lhs_(1,2,2)
lhs_(2,3,2) = lhs_(2,3,2) - coeff__2 * lhs_(1,3,2)
lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(1,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(1,5,2)
rhs_(2) = rhs_(2) - coeff__2 * rhs_(1)
coeff__2 = lhs_(3,1,2)
lhs_(3,2,2) = lhs_(3,2,2) - coeff__2 * lhs_(1,2,2)
lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(1,3,2)
lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(1,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(1,5,2)
rhs_(3) = rhs_(3) - coeff__2 * rhs_(1)
coeff__2 = lhs_(4,1,2)
lhs_(4,2,2) = lhs_(4,2,2) - coeff__2 * lhs_(1,2,2)
lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(1,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(1,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(1,5,2)
rhs_(4) = rhs_(4) - coeff__2 * rhs_(1)
coeff__2 = lhs_(5,1,2)
lhs_(5,2,2) = lhs_(5,2,2) - coeff__2 * lhs_(1,2,2)
lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(1,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(1,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(1,5,2)
rhs_(5) = rhs_(5) - coeff__2 * rhs_(1)
pivot__3 = 1.00d0 / lhs_(2,2,2)
lhs_(2,3,2) = lhs_(2,3,2) * pivot__3
lhs_(2,4,2) = lhs_(2,4,2) * pivot__3
lhs_(2,5,2) = lhs_(2,5,2) * pivot__3
rhs_(2) = rhs_(2) * pivot__3
coeff__2 = lhs_(1,2,2)
lhs_(1,3,2) = lhs_(1,3,2) - coeff__2 * lhs_(2,3,2)
lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(2,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(2,5,2)
rhs_(1) = rhs_(1) - coeff__2 * rhs_(2)
coeff__2 = lhs_(3,2,2)
lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(2,3,2)
lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(2,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(2,5,2)
rhs_(3) = rhs_(3) - coeff__2 * rhs_(2)
coeff__2 = lhs_(4,2,2)
lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(2,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(2,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(2,5,2)
rhs_(4) = rhs_(4) - coeff__2 * rhs_(2)
coeff__2 = lhs_(5,2,2)
lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(2,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(2,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(2,5,2)
rhs_(5) = rhs_(5) - coeff__2 * rhs_(2)
pivot__3 = 1.00d0 / lhs_(3,3,2)
lhs_(3,4,2) = lhs_(3,4,2) * pivot__3
lhs_(3,5,2) = lhs_(3,5,2) * pivot__3
rhs_(3) = rhs_(3) * pivot__3
coeff__2 = lhs_(1,3,2)
lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(3,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(3,5,2)
rhs_(1) = rhs_(1) - coeff__2 * rhs_(3)
coeff__2 = lhs_(2,3,2)
lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(3,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(3,5,2)
rhs_(2) = rhs_(2) - coeff__2 * rhs_(3)
coeff__2 = lhs_(4,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(3,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(3,5,2)
rhs_(4) = rhs_(4) - coeff__2 * rhs_(3)
coeff__2 = lhs_(5,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(3,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(3,5,2)
rhs_(5) = rhs_(5) - coeff__2 * rhs_(3)
pivot__3 = 1.00d0 / lhs_(4,4,2)
lhs_(4,5,2) = lhs_(4,5,2) * pivot__3
rhs_(4) = rhs_(4) * pivot__3
coeff__2 = lhs_(1,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(4,5,2)
rhs_(1) = rhs_(1) - coeff__2 * rhs_(4)
coeff__2 = lhs_(2,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(4,5,2)
rhs_(2) = rhs_(2) - coeff__2 * rhs_(4)
coeff__2 = lhs_(3,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(4,5,2)
rhs_(3) = rhs_(3) - coeff__2 * rhs_(4)
coeff__2 = lhs_(5,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(4,5,2)
rhs_(5) = rhs_(5) - coeff__2 * rhs_(4)
pivot__3 = 1.00d0 / lhs_(5,5,2)
rhs_(5) = rhs_(5) * pivot__3
coeff__2 = lhs_(1,5,2)
rhs_(1) = rhs_(1) - coeff__2 * rhs_(5)
coeff__2 = lhs_(2,5,2)
rhs_(2) = rhs_(2) - coeff__2 * rhs_(5)
coeff__2 = lhs_(3,5,2)
rhs_(3) = rhs_(3) - coeff__2 * rhs_(5)
coeff__2 = lhs_(4,5,2)
rhs_(4) = rhs_(4) - coeff__2 * rhs_(5)
endif
do m = 1,5
rhs(m,i,j,k) = rhs_(m)
enddo
enddo
enddo
enddo
!DVM$ PARALLEL (k,j,i) ON rhs(*,i,j,k), private(m, rhs_,rhsp_)
!DVM$& ,ACROSS(rhs(0:0,0:0,0:1,0:0)),stage(stage_n)
do k = 1,problem_size - 2
do j = problem_size - 2,0,(-(1))
do i = 1,problem_size - 2
do m = 1,5
rhs_(m) = rhs(m,i,j,k)
rhsp_(m) = rhs(m,i,j+1,k)
enddo
do m = 1,5
rhs_(m) = rhs_(m) - lhs__(m,1,i,j,k) * rhsp_(1)
rhs_(m) = rhs_(m) - lhs__(m,2,i,j,k) * rhsp_(2)
rhs_(m) = rhs_(m) - lhs__(m,3,i,j,k) * rhsp_(3)
rhs_(m) = rhs_(m) - lhs__(m,4,i,j,k) * rhsp_(4)
rhs_(m) = rhs_(m) - lhs__(m,5,i,j,k) * rhsp_(5)
enddo
do m = 1,5
rhs(m,i,j,k) = rhs_(m)
enddo
enddo
enddo
enddo
!DVM$ end region
return
end

View File

@@ -0,0 +1,623 @@
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! performs guaussian elimination on this cell.
!
! assumes that unpacking routines for non-first cells
! preload C' and rhs' from previous cell.
!
! assumed send happens outside this routine, but that
! c'(KMAX) and rhs'(KMAX) will be sent to next cell.
!---------------------------------------------------------------------
subroutine z_solve ()
include 'header3d.h'
double precision coeff
double precision pivot
integer i__0
integer j__1,m,n
double precision coeff__2
double precision pivot__3
double precision lhs_(5,5,3),rhs_(5),rhsp_(5),u_(0:3,5)
integer i,j,k,ksize, k1
ksize = problem_size - 1
!DVM$ region local(lhs__)
!DVM$ PARALLEL (j,i) ON rhs(*,i,j,*), private(k,u_,rhs_,pivot,
!DVM$& coeff, tmp1, tmp2, tmp3, t1, t2, t3, tm1, tm2, tm3,rhsp_,
!DVM$& tmp11, tmp22, i__0, j__1,lhs_,n,m,pivot__3,coeff__2)
do j = 1,problem_size - 2
do i = 1,problem_size - 2
do m = 1,5
u_(0,m) = u(m,i,j,0)
u_(1,m) = u(m,i,j,1)
enddo
do k = 1,ksize - 1
do m = 1,5
u_(2,m) = u(m,i,j,k + 1)
enddo
tmp1 = 1.0d+00 / u_(1,1)
tmp2 = tmp1 * tmp1
tmp3 = tmp1 * tmp2
t1 = 1.0d+00 / u_(0,1)
t2 = t1 * t1
t3 = t1 * t2
tm1 = 1.0d+00 / u_(2,1)
tm2 = tm1 * tm1
tm3 = tm1 * tm2
tmp11 = dt * tz1
tmp22 = dt * tz2
lhs_(1,1,1) = (-(tmp11)) * dz1
lhs_(1,2,1) = 0.
lhs_(1,3,1) = 0.
lhs_(1,4,1) = (-(tmp22))
lhs_(1,5,1) = 0.
lhs_(2,1,1) = (-(tmp22)) * ((-(u_(0,2) * u_(0,4))) * t2)
&- tmp11 * ((-(c3c4)) * t2 * u_(0,2))
lhs_(2,2,1) = (-(tmp22)) * u_(0,4) * t1 - tmp11 * c3c4 *
&t1 - tmp11 * dz2
lhs_(2,3,1) = 0.
lhs_(2,4,1) = (-(tmp22)) * u_(0,2) * t1
lhs_(2,5,1) = 0.
lhs_(3,1,1) = (-(tmp22)) * ((-(u_(0,3) * u_(0,4))) * t2)
&- tmp11 * ((-(c3c4)) * t2 * u_(0,3))
lhs_(3,2,1) = 0.
lhs_(3,3,1) = (-(tmp22)) * u_(0,4) * t1 - tmp11 * c3c4 *
&t1 - tmp11 * dz3
lhs_(3,4,1) = (-(tmp22)) * u_(0,3) * t1
lhs_(3,5,1) = 0.
lhs_(4,1,1) = (-(tmp22)) * ((-(u_(0,4) * u_(0,4) * t2)) +
& 0.50d+00 * c2 * ((u_(0,2) * u_(0,2) + u_(0,3) * u_(0,3) + u_(0,4)
& * u_(0,4)) * t2)) - tmp11 * ((-(con43)) * c3c4 * t2 * u_(0,4))
lhs_(4,2,1) = (-(tmp22)) * ((-(c2)) * u_(0,2) * t1)
lhs_(4,3,1) = (-(tmp22)) * ((-(c2)) * u_(0,3) * t1)
lhs_(4,4,1) = (-(tmp22)) * (2.0d+00 - c2) * u_(0,4) * t1
&- tmp11 * con43 * c3 * c4 * t1 - tmp11 * dz4
lhs_(4,5,1) = (-(tmp22)) * c2
lhs_(5,1,1) = (-(tmp22)) * ((c2 * (u_(0,2) * u_(0,2) + u_
&(0,3) * u_(0,3) + u_(0,4) * u_(0,4)) * t2 - c1 * (u_(0,5) * t1)) *
& (u_(0,4) * t1)) - tmp11 * ((-(c3c4 - c1345)) * t3 * u_(0,2)** 2 -
& (c3c4 - c1345) * t3 * u_(0,3)** 2 - (con43 * c3c4 - c1345) * t3 *
& u_(0,4)** 2 - c1345 * t2 * u_(0,5))
lhs_(5,2,1) = (-(tmp22)) * ((-(c2)) * (u_(0,2) * u_(0,4))
& * t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,2)
lhs_(5,3,1) = (-(tmp22)) * ((-(c2)) * (u_(0,3) * u_(0,4))
& * t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,3)
lhs_(5,4,1) = (-(tmp22)) * (c1 * (u_(0,5) * t1) - 0.50d+0
&0 * c2 * ((u_(0,2) * u_(0,2) + u_(0,3) * u_(0,3) + 3.0d+00 * u_(0,
&4) * u_(0,4)) * t2)) - tmp11 * (con43 * c3c4 - c1345) * t2 * u_(0,
&4)
lhs_(5,5,1) = (-(tmp22)) * c1 * u_(0,4) * t1 - tmp11 * c1
&345 * t1 - tmp11 * dz5
lhs_(1,1,2) = 1.0d+00 + tmp11 * 2.0d+00 * dz1
lhs_(1,2,2) = 0.
lhs_(1,3,2) = 0.
lhs_(1,4,2) = 0.
lhs_(1,5,2) = 0.
lhs_(2,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1,
&2))
lhs_(2,2,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t
&mp11 * 2.0d+00 * dz2
lhs_(2,3,2) = 0.
lhs_(2,4,2) = 0.
lhs_(2,5,2) = 0.
lhs_(3,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1,
&3))
lhs_(3,2,2) = 0.
lhs_(3,3,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t
&mp11 * 2.0d+00 * dz3
lhs_(3,4,2) = 0.
lhs_(3,5,2) = 0.
lhs_(4,1,2) = tmp11 * 2.0d+00 * ((-(con43)) * c3c4 * tmp2
& * u_(1,4))
lhs_(4,2,2) = 0.
lhs_(4,3,2) = 0.
lhs_(4,4,2) = 1.0d+00 + tmp11 * 2.0d+00 * con43 * c3 * c4
& * tmp1 + tmp11 * 2.0d+00 * dz4
lhs_(4,5,2) = 0.
lhs_(5,1,2) = tmp11 * 2.0d+00 * ((-(c3c4 - c1345)) * tmp3
& * u_(1,2)** 2 - (c3c4 - c1345) * tmp3 * u_(1,3)** 2 - (con43 * c3
&c4 - c1345) * tmp3 * u_(1,4)** 2 - c1345 * tmp2 * u_(1,5))
lhs_(5,2,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u
&_(1,2)
lhs_(5,3,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u
&_(1,3)
lhs_(5,4,2) = tmp11 * 2.0d+00 * (con43 * c3c4 - c1345) *
&tmp2 * u_(1,4)
lhs_(5,5,2) = 1.0d+00 + tmp11 * 2.0d+00 * c1345 * tmp1 +
&tmp11 * 2.0d+00 * dz5
if (k .ne. 1) then
do j__1 = 1,5
lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs
&_(1,j__1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,
&j__1,3) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1
&,3)
lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs
&_(1,j__1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,
&j__1,3) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1
&,3)
lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs
&_(1,j__1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,
&j__1,3) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1
&,3)
lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs
&_(1,j__1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,
&j__1,3) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1
&,3)
lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs
&_(1,j__1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,
&j__1,3) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1
&,3)
enddo
endif
lhs_(1,1,3) = (-(tmp11)) * dz1
lhs_(1,2,3) = 0.
lhs_(1,3,3) = 0.
lhs_(1,4,3) = tmp22
lhs_(1,5,3) = 0.
lhs_(2,1,3) = tmp22 * ((-(u_(2,2) * u_(2,4))) * tm2) - tm
&p11 * ((-(c3c4)) * tm2 * u_(2,2))
lhs_(2,2,3) = tmp22 * u_(2,4) * tm1 - tmp11 * c3c4 * tm1
&- tmp11 * dz2
lhs_(2,3,3) = 0.
lhs_(2,4,3) = tmp22 * u_(2,2) * tm1
lhs_(2,5,3) = 0.
lhs_(3,1,3) = tmp22 * ((-(u_(2,3) * u_(2,4))) * tm2) - tm
&p11 * ((-(c3c4)) * tm2 * u_(2,3))
lhs_(3,2,3) = 0.
lhs_(3,3,3) = tmp22 * u_(2,4) * tm1 - tmp11 * c3c4 * tm1
&- tmp11 * dz3
lhs_(3,4,3) = tmp22 * u_(2,3) * tm1
lhs_(3,5,3) = 0.
lhs_(4,1,3) = tmp22 * ((-(u_(2,4) * u_(2,4) * tm2)) + 0.5
&0d+00 * c2 * ((u_(2,2) * u_(2,2) + u_(2,3) * u_(2,3) + u_(2,4) * u
&_(2,4)) * tm2)) - tmp11 * ((-(con43)) * c3c4 * tm2 * u_(2,4))
lhs_(4,2,3) = tmp22 * ((-(c2)) * u_(2,2) * tm1)
lhs_(4,3,3) = tmp22 * ((-(c2)) * u_(2,3) * tm1)
lhs_(4,4,3) = tmp22 * (2.0d+00 - c2) * u_(2,4) * tm1 - tm
&p11 * con43 * c3 * c4 * tm1 - tmp11 * dz4
lhs_(4,5,3) = tmp22 * c2
lhs_(5,1,3) = tmp22 * ((c2 * (u_(2,2) * u_(2,2) + u_(2,3)
& * u_(2,3) + u_(2,4) * u_(2,4)) * tm2 - c1 * (u_(2,5) * tm1)) * (u
&_(2,4) * tm1)) - tmp11 * ((-(c3c4 - c1345)) * tm3 * u_(2,2)** 2 -
&(c3c4 - c1345) * tm3 * u_(2,3)** 2 - (con43 * c3c4 - c1345) * tm3
&* u_(2,4)** 2 - c1345 * tm2 * u_(2,5))
lhs_(5,2,3) = tmp22 * ((-(c2)) * (u_(2,2) * u_(2,4)) * tm
&2) - tmp11 * (c3c4 - c1345) * tm2 * u_(2,2)
lhs_(5,3,3) = tmp22 * ((-(c2)) * (u_(2,3) * u_(2,4)) * tm
&2) - tmp11 * (c3c4 - c1345) * tm2 * u_(2,3)
lhs_(5,4,3) = tmp22 * (c1 * (u_(2,5) * tm1) - 0.50d+00 *
&c2 * ((u_(2,2) * u_(2,2) + u_(2,3) * u_(2,3) + 3.0d+00 * u_(2,4) *
& u_(2,4)) * tm2)) - tmp11 * (con43 * c3c4 - c1345) * tm2 * u_(2,4)
lhs_(5,5,3) = tmp22 * c1 * u_(2,4) * tm1 - tmp11 * c1345
&* tm1 - tmp11 * dz5
do m = 1,5
rhs_(m) = rhs(m,i,j,k)
enddo
do i__0 = 1,5
rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i,j,k
& - 1) - lhs_(i__0,2,1) * rhs(2,i,j,k - 1) - lhs_(i__0,3,1) * rhs(3
&,i,j,k - 1) - lhs_(i__0,4,1) * rhs(4,i,j,k - 1) - lhs_(i__0,5,1) *
& rhs(5,i,j,k - 1)
enddo
!---------------------------------------------------------------------
!
!---------------------------------------------------------------------
pivot = 1.00d0 / lhs_(1,1,2)
lhs_(1,2,2) = lhs_(1,2,2) * pivot
lhs_(1,3,2) = lhs_(1,3,2) * pivot
lhs_(1,4,2) = lhs_(1,4,2) * pivot
lhs_(1,5,2) = lhs_(1,5,2) * pivot
lhs_(1,1,3) = lhs_(1,1,3) * pivot
lhs_(1,2,3) = lhs_(1,2,3) * pivot
lhs_(1,3,3) = lhs_(1,3,3) * pivot
lhs_(1,4,3) = lhs_(1,4,3) * pivot
lhs_(1,5,3) = lhs_(1,5,3) * pivot
rhs_(1) = rhs_(1) * pivot
coeff = lhs_(2,1,2)
lhs_(2,2,2) = lhs_(2,2,2) - coeff * lhs_(1,2,2)
lhs_(2,3,2) = lhs_(2,3,2) - coeff * lhs_(1,3,2)
lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(1,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(1,5,2)
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(1,1,3)
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(1,2,3)
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(1,3,3)
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(1,4,3)
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(1,5,3)
rhs_(2) = rhs_(2) - coeff * rhs_(1)
coeff = lhs_(3,1,2)
lhs_(3,2,2) = lhs_(3,2,2) - coeff * lhs_(1,2,2)
lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(1,3,2)
lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(1,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(1,5,2)
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(1,1,3)
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(1,2,3)
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(1,3,3)
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(1,4,3)
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(1,5,3)
rhs_(3) = rhs_(3) - coeff * rhs_(1)
coeff = lhs_(4,1,2)
lhs_(4,2,2) = lhs_(4,2,2) - coeff * lhs_(1,2,2)
lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(1,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(1,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(1,5,2)
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(1,1,3)
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(1,2,3)
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(1,3,3)
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(1,4,3)
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(1,5,3)
rhs_(4) = rhs_(4) - coeff * rhs_(1)
coeff = lhs_(5,1,2)
lhs_(5,2,2) = lhs_(5,2,2) - coeff * lhs_(1,2,2)
lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(1,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(1,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(1,5,2)
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(1,1,3)
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(1,2,3)
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(1,3,3)
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(1,4,3)
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(1,5,3)
rhs_(5) = rhs_(5) - coeff * rhs_(1)
pivot = 1.00d0 / lhs_(2,2,2)
lhs_(2,3,2) = lhs_(2,3,2) * pivot
lhs_(2,4,2) = lhs_(2,4,2) * pivot
lhs_(2,5,2) = lhs_(2,5,2) * pivot
lhs_(2,1,3) = lhs_(2,1,3) * pivot
lhs_(2,2,3) = lhs_(2,2,3) * pivot
lhs_(2,3,3) = lhs_(2,3,3) * pivot
lhs_(2,4,3) = lhs_(2,4,3) * pivot
lhs_(2,5,3) = lhs_(2,5,3) * pivot
rhs_(2) = rhs_(2) * pivot
coeff = lhs_(1,2,2)
lhs_(1,3,2) = lhs_(1,3,2) - coeff * lhs_(2,3,2)
lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(2,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(2,5,2)
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(2,1,3)
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(2,2,3)
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(2,3,3)
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(2,4,3)
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(2,5,3)
rhs_(1) = rhs_(1) - coeff * rhs_(2)
coeff = lhs_(3,2,2)
lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(2,3,2)
lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(2,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(2,5,2)
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(2,1,3)
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(2,2,3)
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(2,3,3)
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(2,4,3)
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(2,5,3)
rhs_(3) = rhs_(3) - coeff * rhs_(2)
coeff = lhs_(4,2,2)
lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(2,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(2,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(2,5,2)
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(2,1,3)
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(2,2,3)
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(2,3,3)
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(2,4,3)
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(2,5,3)
rhs_(4) = rhs_(4) - coeff * rhs_(2)
coeff = lhs_(5,2,2)
lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(2,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(2,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(2,5,2)
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(2,1,3)
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(2,2,3)
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(2,3,3)
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(2,4,3)
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(2,5,3)
rhs_(5) = rhs_(5) - coeff * rhs_(2)
pivot = 1.00d0 / lhs_(3,3,2)
lhs_(3,4,2) = lhs_(3,4,2) * pivot
lhs_(3,5,2) = lhs_(3,5,2) * pivot
lhs_(3,1,3) = lhs_(3,1,3) * pivot
lhs_(3,2,3) = lhs_(3,2,3) * pivot
lhs_(3,3,3) = lhs_(3,3,3) * pivot
lhs_(3,4,3) = lhs_(3,4,3) * pivot
lhs_(3,5,3) = lhs_(3,5,3) * pivot
rhs_(3) = rhs_(3) * pivot
coeff = lhs_(1,3,2)
lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(3,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(3,5,2)
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(3,1,3)
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(3,2,3)
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(3,3,3)
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(3,4,3)
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(3,5,3)
rhs_(1) = rhs_(1) - coeff * rhs_(3)
coeff = lhs_(2,3,2)
lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(3,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(3,5,2)
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(3,1,3)
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(3,2,3)
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(3,3,3)
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(3,4,3)
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(3,5,3)
rhs_(2) = rhs_(2) - coeff * rhs_(3)
coeff = lhs_(4,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(3,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(3,5,2)
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(3,1,3)
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(3,2,3)
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(3,3,3)
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(3,4,3)
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(3,5,3)
rhs_(4) = rhs_(4) - coeff * rhs_(3)
coeff = lhs_(5,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(3,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(3,5,2)
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(3,1,3)
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(3,2,3)
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(3,3,3)
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(3,4,3)
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(3,5,3)
rhs_(5) = rhs_(5) - coeff * rhs_(3)
pivot = 1.00d0 / lhs_(4,4,2)
lhs_(4,5,2) = lhs_(4,5,2) * pivot
lhs_(4,1,3) = lhs_(4,1,3) * pivot
lhs_(4,2,3) = lhs_(4,2,3) * pivot
lhs_(4,3,3) = lhs_(4,3,3) * pivot
lhs_(4,4,3) = lhs_(4,4,3) * pivot
lhs_(4,5,3) = lhs_(4,5,3) * pivot
rhs_(4) = rhs_(4) * pivot
coeff = lhs_(1,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(4,5,2)
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(4,1,3)
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(4,2,3)
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(4,3,3)
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(4,4,3)
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(4,5,3)
rhs_(1) = rhs_(1) - coeff * rhs_(4)
coeff = lhs_(2,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(4,5,2)
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(4,1,3)
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(4,2,3)
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(4,3,3)
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(4,4,3)
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(4,5,3)
rhs_(2) = rhs_(2) - coeff * rhs_(4)
coeff = lhs_(3,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(4,5,2)
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(4,1,3)
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(4,2,3)
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(4,3,3)
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(4,4,3)
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(4,5,3)
rhs_(3) = rhs_(3) - coeff * rhs_(4)
coeff = lhs_(5,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(4,5,2)
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(4,1,3)
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(4,2,3)
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(4,3,3)
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(4,4,3)
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(4,5,3)
rhs_(5) = rhs_(5) - coeff * rhs_(4)
pivot = 1.00d0 / lhs_(5,5,2)
lhs_(5,1,3) = lhs_(5,1,3) * pivot
lhs_(5,2,3) = lhs_(5,2,3) * pivot
lhs_(5,3,3) = lhs_(5,3,3) * pivot
lhs_(5,4,3) = lhs_(5,4,3) * pivot
lhs_(5,5,3) = lhs_(5,5,3) * pivot
rhs_(5) = rhs_(5) * pivot
coeff = lhs_(1,5,2)
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(5,1,3)
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(5,2,3)
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(5,3,3)
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(5,4,3)
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(5,5,3)
rhs_(1) = rhs_(1) - coeff * rhs_(5)
coeff = lhs_(2,5,2)
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(5,1,3)
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(5,2,3)
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(5,3,3)
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(5,4,3)
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(5,5,3)
rhs_(2) = rhs_(2) - coeff * rhs_(5)
coeff = lhs_(3,5,2)
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(5,1,3)
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(5,2,3)
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(5,3,3)
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(5,4,3)
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(5,5,3)
rhs_(3) = rhs_(3) - coeff * rhs_(5)
coeff = lhs_(4,5,2)
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(5,1,3)
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(5,2,3)
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(5,3,3)
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(5,4,3)
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(5,5,3)
rhs_(4) = rhs_(4) - coeff * rhs_(5)
do i__0 = 1,5
lhs__(i__0,1,i,j,k) = lhs_(i__0,1,3)
lhs__(i__0,2,i,j,k) = lhs_(i__0,2,3)
lhs__(i__0,3,i,j,k) = lhs_(i__0,3,3)
lhs__(i__0,4,i,j,k) = lhs_(i__0,4,3)
lhs__(i__0,5,i,j,k) = lhs_(i__0,5,3)
enddo
do m = 1,5
rhs(m,i,j,k) = rhs_(m)
u_(0,m) = u_(1,m)
u_(1,m) = u_(2,m)
enddo
enddo
do n = 1,5
lhs_(1,n,1) = 0.0d0
lhs_(1,n,2) = 0.0d0
lhs_(1,n,3) = 0.0d0
lhs_(2,n,1) = 0.0d0
lhs_(2,n,2) = 0.0d0
lhs_(2,n,3) = 0.0d0
lhs_(3,n,1) = 0.0d0
lhs_(3,n,2) = 0.0d0
lhs_(3,n,3) = 0.0d0
lhs_(4,n,1) = 0.0d0
lhs_(4,n,2) = 0.0d0
lhs_(4,n,3) = 0.0d0
lhs_(5,n,1) = 0.0d0
lhs_(5,n,2) = 0.0d0
lhs_(5,n,3) = 0.0d0
enddo
do m = 1,5
lhs_(m,m,2) = 1.0d0
enddo
do m = 1,5
rhs_(m) = rhs(m,i,j,k)
enddo
do i__0 = 1,5
rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i,j,ksiz
&e - 1) - lhs_(i__0,2,1) * rhs(2,i,j,ksize - 1) - lhs_(i__0,3,1) *
&rhs(3,i,j,ksize - 1) - lhs_(i__0,4,1) * rhs(4,i,j,ksize - 1) - lhs
&_(i__0,5,1) * rhs(5,i,j,ksize - 1)
enddo
do j__1 = 1,5
lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs_(1,j_
&_1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,j__1,3
&) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1,3)
lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs_(1,j_
&_1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,j__1,3
&) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1,3)
lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs_(1,j_
&_1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,j__1,3
&) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1,3)
lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs_(1,j_
&_1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,j__1,3
&) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1,3)
lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs_(1,j_
&_1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,j__1,3
&) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1,3)
enddo
!---------------------------------------------------------------------
!
!---------------------------------------------------------------------
pivot__3 = 1.00d0 / lhs_(1,1,2)
lhs_(1,2,2) = lhs_(1,2,2) * pivot__3
lhs_(1,3,2) = lhs_(1,3,2) * pivot__3
lhs_(1,4,2) = lhs_(1,4,2) * pivot__3
lhs_(1,5,2) = lhs_(1,5,2) * pivot__3
rhs_(1) = rhs_(1) * pivot__3
coeff__2 = lhs_(2,1,2)
lhs_(2,2,2) = lhs_(2,2,2) - coeff__2 * lhs_(1,2,2)
lhs_(2,3,2) = lhs_(2,3,2) - coeff__2 * lhs_(1,3,2)
lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(1,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(1,5,2)
rhs_(2) = rhs_(2) - coeff__2 * rhs_(1)
coeff__2 = lhs_(3,1,2)
lhs_(3,2,2) = lhs_(3,2,2) - coeff__2 * lhs_(1,2,2)
lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(1,3,2)
lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(1,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(1,5,2)
rhs_(3) = rhs_(3) - coeff__2 * rhs_(1)
coeff__2 = lhs_(4,1,2)
lhs_(4,2,2) = lhs_(4,2,2) - coeff__2 * lhs_(1,2,2)
lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(1,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(1,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(1,5,2)
rhs_(4) = rhs_(4) - coeff__2 * rhs_(1)
coeff__2 = lhs_(5,1,2)
lhs_(5,2,2) = lhs_(5,2,2) - coeff__2 * lhs_(1,2,2)
lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(1,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(1,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(1,5,2)
rhs_(5) = rhs_(5) - coeff__2 * rhs_(1)
pivot__3 = 1.00d0 / lhs_(2,2,2)
lhs_(2,3,2) = lhs_(2,3,2) * pivot__3
lhs_(2,4,2) = lhs_(2,4,2) * pivot__3
lhs_(2,5,2) = lhs_(2,5,2) * pivot__3
rhs_(2) = rhs_(2) * pivot__3
coeff__2 = lhs_(1,2,2)
lhs_(1,3,2) = lhs_(1,3,2) - coeff__2 * lhs_(2,3,2)
lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(2,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(2,5,2)
rhs_(1) = rhs_(1) - coeff__2 * rhs_(2)
coeff__2 = lhs_(3,2,2)
lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(2,3,2)
lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(2,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(2,5,2)
rhs_(3) = rhs_(3) - coeff__2 * rhs_(2)
coeff__2 = lhs_(4,2,2)
lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(2,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(2,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(2,5,2)
rhs_(4) = rhs_(4) - coeff__2 * rhs_(2)
coeff__2 = lhs_(5,2,2)
lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(2,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(2,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(2,5,2)
rhs_(5) = rhs_(5) - coeff__2 * rhs_(2)
pivot__3 = 1.00d0 / lhs_(3,3,2)
lhs_(3,4,2) = lhs_(3,4,2) * pivot__3
lhs_(3,5,2) = lhs_(3,5,2) * pivot__3
rhs_(3) = rhs_(3) * pivot__3
coeff__2 = lhs_(1,3,2)
lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(3,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(3,5,2)
rhs_(1) = rhs_(1) - coeff__2 * rhs_(3)
coeff__2 = lhs_(2,3,2)
lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(3,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(3,5,2)
rhs_(2) = rhs_(2) - coeff__2 * rhs_(3)
coeff__2 = lhs_(4,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(3,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(3,5,2)
rhs_(4) = rhs_(4) - coeff__2 * rhs_(3)
coeff__2 = lhs_(5,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(3,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(3,5,2)
rhs_(5) = rhs_(5) - coeff__2 * rhs_(3)
pivot__3 = 1.00d0 / lhs_(4,4,2)
lhs_(4,5,2) = lhs_(4,5,2) * pivot__3
rhs_(4) = rhs_(4) * pivot__3
coeff__2 = lhs_(1,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(4,5,2)
rhs_(1) = rhs_(1) - coeff__2 * rhs_(4)
coeff__2 = lhs_(2,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(4,5,2)
rhs_(2) = rhs_(2) - coeff__2 * rhs_(4)
coeff__2 = lhs_(3,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(4,5,2)
rhs_(3) = rhs_(3) - coeff__2 * rhs_(4)
coeff__2 = lhs_(5,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(4,5,2)
rhs_(5) = rhs_(5) - coeff__2 * rhs_(4)
pivot__3 = 1.00d0 / lhs_(5,5,2)
rhs_(5) = rhs_(5) * pivot__3
coeff__2 = lhs_(1,5,2)
rhs_(1) = rhs_(1) - coeff__2 * rhs_(5)
coeff__2 = lhs_(2,5,2)
rhs_(2) = rhs_(2) - coeff__2 * rhs_(5)
coeff__2 = lhs_(3,5,2)
rhs_(3) = rhs_(3) - coeff__2 * rhs_(5)
coeff__2 = lhs_(4,5,2)
rhs_(4) = rhs_(4) - coeff__2 * rhs_(5)
do m = 1,5
rhs(m,i,j,k) = rhs_(m)
enddo
k = ksize-1
do m = 1,5
rhs_(m) = rhs(m,i,j,k)
rhsp_(m) = rhs(m,i,j,k + 1)
enddo
do k = ksize-1, 1, (-(1))
do m = 1,5
rhs_(m) = rhs(m,i,j,k)
enddo
do m = 1,5
rhs_(m) = rhs_(m) - lhs__(m,1,i,j,k) * rhsp_(1)
rhs_(m) = rhs_(m) - lhs__(m,2,i,j,k) * rhsp_(2)
rhs_(m) = rhs_(m) - lhs__(m,3,i,j,k) * rhsp_(3)
rhs_(m) = rhs_(m) - lhs__(m,4,i,j,k) * rhsp_(4)
rhs_(m) = rhs_(m) - lhs__(m,5,i,j,k) * rhsp_(5)
enddo
do m = 1,5
rhsp_(m) = rhs_(m)
u(m,i,j,k) = u(m,i,j,k) + rhs_(m)
enddo
enddo
enddo
enddo
!DVM$ end region
return
end

View File

@@ -0,0 +1,636 @@
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! performs guaussian elimination on this cell.
!
! assumes that unpacking routines for non-first cells
! preload C' and rhs' from previous cell.
!
! assumed send happens outside this routine, but that
! c'(KMAX) and rhs'(KMAX) will be sent to next cell.
!---------------------------------------------------------------------
subroutine z_solve ()
include 'header3d.h'
double precision coeff
double precision pivot
integer i__0
integer j__1,m,n
double precision coeff__2
double precision pivot__3
double precision lhs_(5,5,3),rhs_(5),rhsp_(5),u_(0:3,5)
integer i,j,k,ksize,j1,maxblJ,low_j,high_j
ksize = problem_size - 1
if(mod((problem_size - 2), BL) .eq. 0) then
maxBlJ = (problem_size - 2) / BL
else
maxblJ = (problem_size - 2) / BL + 1
endif
!DVM$ region local(lhs__)
!DVM$ PARALLEL (j1,i) ON rhs(*,i,j1,*), private(k,u_,rhs_,pivot,
!DVM$& coeff, tmp1, tmp2, tmp3, t1, t2, t3, tm1, tm2, tm3,rhsp_,
!DVM$& tmp11, tmp22, i__0, j__1,lhs_,n,m,pivot__3,
!DVM$& coeff__2,j,low_j,high_j),cuda_block(32)
do j1 = 1, maxBlJ
do i = 1, problem_size - 2
low_j = (j1 - 1) * BL + 1
high_j = j1 * BL
if(high_j .gt. problem_size - 2) then
high_j = problem_size - 2
endif
do j = low_j, high_j
do m = 1,5
u_(0,m) = u(m,i,j,0)
u_(1,m) = u(m,i,j,1)
enddo
do k = 1,ksize - 1
do m = 1,5
u_(2,m) = u(m,i,j,k + 1)
enddo
tmp1 = 1.0d+00 / u_(1,1)
tmp2 = tmp1 * tmp1
tmp3 = tmp1 * tmp2
t1 = 1.0d+00 / u_(0,1)
t2 = t1 * t1
t3 = t1 * t2
tm1 = 1.0d+00 / u_(2,1)
tm2 = tm1 * tm1
tm3 = tm1 * tm2
tmp11 = dt * tz1
tmp22 = dt * tz2
lhs_(1,1,1) = (-(tmp11)) * dz1
lhs_(1,2,1) = 0.
lhs_(1,3,1) = 0.
lhs_(1,4,1) = (-(tmp22))
lhs_(1,5,1) = 0.
lhs_(2,1,1) = (-(tmp22)) * ((-(u_(0,2) * u_(0,4))) * t2)
&- tmp11 * ((-(c3c4)) * t2 * u_(0,2))
lhs_(2,2,1) = (-(tmp22)) * u_(0,4) * t1 - tmp11 * c3c4 *
&t1 - tmp11 * dz2
lhs_(2,3,1) = 0.
lhs_(2,4,1) = (-(tmp22)) * u_(0,2) * t1
lhs_(2,5,1) = 0.
lhs_(3,1,1) = (-(tmp22)) * ((-(u_(0,3) * u_(0,4))) * t2)
&- tmp11 * ((-(c3c4)) * t2 * u_(0,3))
lhs_(3,2,1) = 0.
lhs_(3,3,1) = (-(tmp22)) * u_(0,4) * t1 - tmp11 * c3c4 *
&t1 - tmp11 * dz3
lhs_(3,4,1) = (-(tmp22)) * u_(0,3) * t1
lhs_(3,5,1) = 0.
lhs_(4,1,1) = (-(tmp22)) * ((-(u_(0,4) * u_(0,4) * t2)) +
& 0.50d+00 * c2 * ((u_(0,2) * u_(0,2) + u_(0,3) * u_(0,3) + u_(0,4)
& * u_(0,4)) * t2)) - tmp11 * ((-(con43)) * c3c4 * t2 * u_(0,4))
lhs_(4,2,1) = (-(tmp22)) * ((-(c2)) * u_(0,2) * t1)
lhs_(4,3,1) = (-(tmp22)) * ((-(c2)) * u_(0,3) * t1)
lhs_(4,4,1) = (-(tmp22)) * (2.0d+00 - c2) * u_(0,4) * t1
&- tmp11 * con43 * c3 * c4 * t1 - tmp11 * dz4
lhs_(4,5,1) = (-(tmp22)) * c2
lhs_(5,1,1) = (-(tmp22)) * ((c2 * (u_(0,2) * u_(0,2) + u_
&(0,3) * u_(0,3) + u_(0,4) * u_(0,4)) * t2 - c1 * (u_(0,5) * t1)) *
& (u_(0,4) * t1)) - tmp11 * ((-(c3c4 - c1345)) * t3 * u_(0,2)** 2 -
& (c3c4 - c1345) * t3 * u_(0,3)** 2 - (con43 * c3c4 - c1345) * t3 *
& u_(0,4)** 2 - c1345 * t2 * u_(0,5))
lhs_(5,2,1) = (-(tmp22)) * ((-(c2)) * (u_(0,2) * u_(0,4))
& * t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,2)
lhs_(5,3,1) = (-(tmp22)) * ((-(c2)) * (u_(0,3) * u_(0,4))
& * t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,3)
lhs_(5,4,1) = (-(tmp22)) * (c1 * (u_(0,5) * t1) - 0.50d+0
&0 * c2 * ((u_(0,2) * u_(0,2) + u_(0,3) * u_(0,3) + 3.0d+00 * u_(0,
&4) * u_(0,4)) * t2)) - tmp11 * (con43 * c3c4 - c1345) * t2 * u_(0,
&4)
lhs_(5,5,1) = (-(tmp22)) * c1 * u_(0,4) * t1 - tmp11 * c1
&345 * t1 - tmp11 * dz5
lhs_(1,1,2) = 1.0d+00 + tmp11 * 2.0d+00 * dz1
lhs_(1,2,2) = 0.
lhs_(1,3,2) = 0.
lhs_(1,4,2) = 0.
lhs_(1,5,2) = 0.
lhs_(2,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1,
&2))
lhs_(2,2,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t
&mp11 * 2.0d+00 * dz2
lhs_(2,3,2) = 0.
lhs_(2,4,2) = 0.
lhs_(2,5,2) = 0.
lhs_(3,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1,
&3))
lhs_(3,2,2) = 0.
lhs_(3,3,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t
&mp11 * 2.0d+00 * dz3
lhs_(3,4,2) = 0.
lhs_(3,5,2) = 0.
lhs_(4,1,2) = tmp11 * 2.0d+00 * ((-(con43)) * c3c4 * tmp2
& * u_(1,4))
lhs_(4,2,2) = 0.
lhs_(4,3,2) = 0.
lhs_(4,4,2) = 1.0d+00 + tmp11 * 2.0d+00 * con43 * c3 * c4
& * tmp1 + tmp11 * 2.0d+00 * dz4
lhs_(4,5,2) = 0.
lhs_(5,1,2) = tmp11 * 2.0d+00 * ((-(c3c4 - c1345)) * tmp3
& * u_(1,2)** 2 - (c3c4 - c1345) * tmp3 * u_(1,3)** 2 - (con43 * c3
&c4 - c1345) * tmp3 * u_(1,4)** 2 - c1345 * tmp2 * u_(1,5))
lhs_(5,2,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u
&_(1,2)
lhs_(5,3,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u
&_(1,3)
lhs_(5,4,2) = tmp11 * 2.0d+00 * (con43 * c3c4 - c1345) *
&tmp2 * u_(1,4)
lhs_(5,5,2) = 1.0d+00 + tmp11 * 2.0d+00 * c1345 * tmp1 +
&tmp11 * 2.0d+00 * dz5
if (k .ne. 1) then
do j__1 = 1,5
lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs
&_(1,j__1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,
&j__1,3) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1
&,3)
lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs
&_(1,j__1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,
&j__1,3) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1
&,3)
lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs
&_(1,j__1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,
&j__1,3) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1
&,3)
lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs
&_(1,j__1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,
&j__1,3) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1
&,3)
lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs
&_(1,j__1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,
&j__1,3) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1
&,3)
enddo
endif
lhs_(1,1,3) = (-(tmp11)) * dz1
lhs_(1,2,3) = 0.
lhs_(1,3,3) = 0.
lhs_(1,4,3) = tmp22
lhs_(1,5,3) = 0.
lhs_(2,1,3) = tmp22 * ((-(u_(2,2) * u_(2,4))) * tm2) - tm
&p11 * ((-(c3c4)) * tm2 * u_(2,2))
lhs_(2,2,3) = tmp22 * u_(2,4) * tm1 - tmp11 * c3c4 * tm1
&- tmp11 * dz2
lhs_(2,3,3) = 0.
lhs_(2,4,3) = tmp22 * u_(2,2) * tm1
lhs_(2,5,3) = 0.
lhs_(3,1,3) = tmp22 * ((-(u_(2,3) * u_(2,4))) * tm2) - tm
&p11 * ((-(c3c4)) * tm2 * u_(2,3))
lhs_(3,2,3) = 0.
lhs_(3,3,3) = tmp22 * u_(2,4) * tm1 - tmp11 * c3c4 * tm1
&- tmp11 * dz3
lhs_(3,4,3) = tmp22 * u_(2,3) * tm1
lhs_(3,5,3) = 0.
lhs_(4,1,3) = tmp22 * ((-(u_(2,4) * u_(2,4) * tm2)) + 0.5
&0d+00 * c2 * ((u_(2,2) * u_(2,2) + u_(2,3) * u_(2,3) + u_(2,4) * u
&_(2,4)) * tm2)) - tmp11 * ((-(con43)) * c3c4 * tm2 * u_(2,4))
lhs_(4,2,3) = tmp22 * ((-(c2)) * u_(2,2) * tm1)
lhs_(4,3,3) = tmp22 * ((-(c2)) * u_(2,3) * tm1)
lhs_(4,4,3) = tmp22 * (2.0d+00 - c2) * u_(2,4) * tm1 - tm
&p11 * con43 * c3 * c4 * tm1 - tmp11 * dz4
lhs_(4,5,3) = tmp22 * c2
lhs_(5,1,3) = tmp22 * ((c2 * (u_(2,2) * u_(2,2) + u_(2,3)
& * u_(2,3) + u_(2,4) * u_(2,4)) * tm2 - c1 * (u_(2,5) * tm1)) * (u
&_(2,4) * tm1)) - tmp11 * ((-(c3c4 - c1345)) * tm3 * u_(2,2)** 2 -
&(c3c4 - c1345) * tm3 * u_(2,3)** 2 - (con43 * c3c4 - c1345) * tm3
&* u_(2,4)** 2 - c1345 * tm2 * u_(2,5))
lhs_(5,2,3) = tmp22 * ((-(c2)) * (u_(2,2) * u_(2,4)) * tm
&2) - tmp11 * (c3c4 - c1345) * tm2 * u_(2,2)
lhs_(5,3,3) = tmp22 * ((-(c2)) * (u_(2,3) * u_(2,4)) * tm
&2) - tmp11 * (c3c4 - c1345) * tm2 * u_(2,3)
lhs_(5,4,3) = tmp22 * (c1 * (u_(2,5) * tm1) - 0.50d+00 *
&c2 * ((u_(2,2) * u_(2,2) + u_(2,3) * u_(2,3) + 3.0d+00 * u_(2,4) *
& u_(2,4)) * tm2)) - tmp11 * (con43 * c3c4 - c1345) * tm2 * u_(2,4)
lhs_(5,5,3) = tmp22 * c1 * u_(2,4) * tm1 - tmp11 * c1345
&* tm1 - tmp11 * dz5
do m = 1,5
rhs_(m) = rhs(m,i,j,k)
enddo
do i__0 = 1,5
rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i,j,k
& - 1) - lhs_(i__0,2,1) * rhs(2,i,j,k - 1) - lhs_(i__0,3,1) * rhs(3
&,i,j,k - 1) - lhs_(i__0,4,1) * rhs(4,i,j,k - 1) - lhs_(i__0,5,1) *
& rhs(5,i,j,k - 1)
enddo
!---------------------------------------------------------------------
!
!---------------------------------------------------------------------
pivot = 1.00d0 / lhs_(1,1,2)
lhs_(1,2,2) = lhs_(1,2,2) * pivot
lhs_(1,3,2) = lhs_(1,3,2) * pivot
lhs_(1,4,2) = lhs_(1,4,2) * pivot
lhs_(1,5,2) = lhs_(1,5,2) * pivot
lhs_(1,1,3) = lhs_(1,1,3) * pivot
lhs_(1,2,3) = lhs_(1,2,3) * pivot
lhs_(1,3,3) = lhs_(1,3,3) * pivot
lhs_(1,4,3) = lhs_(1,4,3) * pivot
lhs_(1,5,3) = lhs_(1,5,3) * pivot
rhs_(1) = rhs_(1) * pivot
coeff = lhs_(2,1,2)
lhs_(2,2,2) = lhs_(2,2,2) - coeff * lhs_(1,2,2)
lhs_(2,3,2) = lhs_(2,3,2) - coeff * lhs_(1,3,2)
lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(1,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(1,5,2)
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(1,1,3)
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(1,2,3)
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(1,3,3)
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(1,4,3)
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(1,5,3)
rhs_(2) = rhs_(2) - coeff * rhs_(1)
coeff = lhs_(3,1,2)
lhs_(3,2,2) = lhs_(3,2,2) - coeff * lhs_(1,2,2)
lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(1,3,2)
lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(1,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(1,5,2)
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(1,1,3)
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(1,2,3)
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(1,3,3)
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(1,4,3)
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(1,5,3)
rhs_(3) = rhs_(3) - coeff * rhs_(1)
coeff = lhs_(4,1,2)
lhs_(4,2,2) = lhs_(4,2,2) - coeff * lhs_(1,2,2)
lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(1,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(1,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(1,5,2)
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(1,1,3)
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(1,2,3)
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(1,3,3)
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(1,4,3)
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(1,5,3)
rhs_(4) = rhs_(4) - coeff * rhs_(1)
coeff = lhs_(5,1,2)
lhs_(5,2,2) = lhs_(5,2,2) - coeff * lhs_(1,2,2)
lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(1,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(1,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(1,5,2)
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(1,1,3)
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(1,2,3)
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(1,3,3)
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(1,4,3)
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(1,5,3)
rhs_(5) = rhs_(5) - coeff * rhs_(1)
pivot = 1.00d0 / lhs_(2,2,2)
lhs_(2,3,2) = lhs_(2,3,2) * pivot
lhs_(2,4,2) = lhs_(2,4,2) * pivot
lhs_(2,5,2) = lhs_(2,5,2) * pivot
lhs_(2,1,3) = lhs_(2,1,3) * pivot
lhs_(2,2,3) = lhs_(2,2,3) * pivot
lhs_(2,3,3) = lhs_(2,3,3) * pivot
lhs_(2,4,3) = lhs_(2,4,3) * pivot
lhs_(2,5,3) = lhs_(2,5,3) * pivot
rhs_(2) = rhs_(2) * pivot
coeff = lhs_(1,2,2)
lhs_(1,3,2) = lhs_(1,3,2) - coeff * lhs_(2,3,2)
lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(2,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(2,5,2)
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(2,1,3)
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(2,2,3)
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(2,3,3)
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(2,4,3)
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(2,5,3)
rhs_(1) = rhs_(1) - coeff * rhs_(2)
coeff = lhs_(3,2,2)
lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(2,3,2)
lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(2,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(2,5,2)
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(2,1,3)
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(2,2,3)
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(2,3,3)
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(2,4,3)
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(2,5,3)
rhs_(3) = rhs_(3) - coeff * rhs_(2)
coeff = lhs_(4,2,2)
lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(2,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(2,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(2,5,2)
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(2,1,3)
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(2,2,3)
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(2,3,3)
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(2,4,3)
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(2,5,3)
rhs_(4) = rhs_(4) - coeff * rhs_(2)
coeff = lhs_(5,2,2)
lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(2,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(2,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(2,5,2)
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(2,1,3)
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(2,2,3)
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(2,3,3)
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(2,4,3)
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(2,5,3)
rhs_(5) = rhs_(5) - coeff * rhs_(2)
pivot = 1.00d0 / lhs_(3,3,2)
lhs_(3,4,2) = lhs_(3,4,2) * pivot
lhs_(3,5,2) = lhs_(3,5,2) * pivot
lhs_(3,1,3) = lhs_(3,1,3) * pivot
lhs_(3,2,3) = lhs_(3,2,3) * pivot
lhs_(3,3,3) = lhs_(3,3,3) * pivot
lhs_(3,4,3) = lhs_(3,4,3) * pivot
lhs_(3,5,3) = lhs_(3,5,3) * pivot
rhs_(3) = rhs_(3) * pivot
coeff = lhs_(1,3,2)
lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(3,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(3,5,2)
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(3,1,3)
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(3,2,3)
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(3,3,3)
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(3,4,3)
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(3,5,3)
rhs_(1) = rhs_(1) - coeff * rhs_(3)
coeff = lhs_(2,3,2)
lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(3,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(3,5,2)
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(3,1,3)
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(3,2,3)
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(3,3,3)
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(3,4,3)
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(3,5,3)
rhs_(2) = rhs_(2) - coeff * rhs_(3)
coeff = lhs_(4,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(3,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(3,5,2)
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(3,1,3)
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(3,2,3)
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(3,3,3)
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(3,4,3)
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(3,5,3)
rhs_(4) = rhs_(4) - coeff * rhs_(3)
coeff = lhs_(5,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(3,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(3,5,2)
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(3,1,3)
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(3,2,3)
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(3,3,3)
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(3,4,3)
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(3,5,3)
rhs_(5) = rhs_(5) - coeff * rhs_(3)
pivot = 1.00d0 / lhs_(4,4,2)
lhs_(4,5,2) = lhs_(4,5,2) * pivot
lhs_(4,1,3) = lhs_(4,1,3) * pivot
lhs_(4,2,3) = lhs_(4,2,3) * pivot
lhs_(4,3,3) = lhs_(4,3,3) * pivot
lhs_(4,4,3) = lhs_(4,4,3) * pivot
lhs_(4,5,3) = lhs_(4,5,3) * pivot
rhs_(4) = rhs_(4) * pivot
coeff = lhs_(1,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(4,5,2)
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(4,1,3)
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(4,2,3)
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(4,3,3)
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(4,4,3)
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(4,5,3)
rhs_(1) = rhs_(1) - coeff * rhs_(4)
coeff = lhs_(2,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(4,5,2)
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(4,1,3)
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(4,2,3)
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(4,3,3)
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(4,4,3)
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(4,5,3)
rhs_(2) = rhs_(2) - coeff * rhs_(4)
coeff = lhs_(3,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(4,5,2)
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(4,1,3)
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(4,2,3)
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(4,3,3)
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(4,4,3)
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(4,5,3)
rhs_(3) = rhs_(3) - coeff * rhs_(4)
coeff = lhs_(5,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(4,5,2)
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(4,1,3)
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(4,2,3)
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(4,3,3)
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(4,4,3)
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(4,5,3)
rhs_(5) = rhs_(5) - coeff * rhs_(4)
pivot = 1.00d0 / lhs_(5,5,2)
lhs_(5,1,3) = lhs_(5,1,3) * pivot
lhs_(5,2,3) = lhs_(5,2,3) * pivot
lhs_(5,3,3) = lhs_(5,3,3) * pivot
lhs_(5,4,3) = lhs_(5,4,3) * pivot
lhs_(5,5,3) = lhs_(5,5,3) * pivot
rhs_(5) = rhs_(5) * pivot
coeff = lhs_(1,5,2)
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(5,1,3)
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(5,2,3)
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(5,3,3)
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(5,4,3)
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(5,5,3)
rhs_(1) = rhs_(1) - coeff * rhs_(5)
coeff = lhs_(2,5,2)
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(5,1,3)
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(5,2,3)
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(5,3,3)
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(5,4,3)
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(5,5,3)
rhs_(2) = rhs_(2) - coeff * rhs_(5)
coeff = lhs_(3,5,2)
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(5,1,3)
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(5,2,3)
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(5,3,3)
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(5,4,3)
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(5,5,3)
rhs_(3) = rhs_(3) - coeff * rhs_(5)
coeff = lhs_(4,5,2)
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(5,1,3)
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(5,2,3)
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(5,3,3)
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(5,4,3)
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(5,5,3)
rhs_(4) = rhs_(4) - coeff * rhs_(5)
do i__0 = 1,5
lhs__(i__0,1,i,k,j1) = lhs_(i__0,1,3)
lhs__(i__0,2,i,k,j1) = lhs_(i__0,2,3)
lhs__(i__0,3,i,k,j1) = lhs_(i__0,3,3)
lhs__(i__0,4,i,k,j1) = lhs_(i__0,4,3)
lhs__(i__0,5,i,k,j1) = lhs_(i__0,5,3)
enddo
do m = 1,5
rhs(m,i,j,k) = rhs_(m)
u_(0,m) = u_(1,m)
u_(1,m) = u_(2,m)
enddo
enddo
do n = 1,5
lhs_(1,n,1) = 0.0d0
lhs_(1,n,2) = 0.0d0
lhs_(1,n,3) = 0.0d0
lhs_(2,n,1) = 0.0d0
lhs_(2,n,2) = 0.0d0
lhs_(2,n,3) = 0.0d0
lhs_(3,n,1) = 0.0d0
lhs_(3,n,2) = 0.0d0
lhs_(3,n,3) = 0.0d0
lhs_(4,n,1) = 0.0d0
lhs_(4,n,2) = 0.0d0
lhs_(4,n,3) = 0.0d0
lhs_(5,n,1) = 0.0d0
lhs_(5,n,2) = 0.0d0
lhs_(5,n,3) = 0.0d0
enddo
do m = 1,5
lhs_(m,m,2) = 1.0d0
enddo
do m = 1,5
rhs_(m) = rhs(m,i,j,k)
enddo
do i__0 = 1,5
rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i,j,ksiz
&e - 1) - lhs_(i__0,2,1) * rhs(2,i,j,ksize - 1) - lhs_(i__0,3,1) *
&rhs(3,i,j,ksize - 1) - lhs_(i__0,4,1) * rhs(4,i,j,ksize - 1) - lhs
&_(i__0,5,1) * rhs(5,i,j,ksize - 1)
enddo
do j__1 = 1,5
lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs_(1,j_
&_1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,j__1,3
&) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1,3)
lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs_(1,j_
&_1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,j__1,3
&) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1,3)
lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs_(1,j_
&_1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,j__1,3
&) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1,3)
lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs_(1,j_
&_1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,j__1,3
&) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1,3)
lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs_(1,j_
&_1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,j__1,3
&) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1,3)
enddo
!---------------------------------------------------------------------
!
!---------------------------------------------------------------------
pivot__3 = 1.00d0 / lhs_(1,1,2)
lhs_(1,2,2) = lhs_(1,2,2) * pivot__3
lhs_(1,3,2) = lhs_(1,3,2) * pivot__3
lhs_(1,4,2) = lhs_(1,4,2) * pivot__3
lhs_(1,5,2) = lhs_(1,5,2) * pivot__3
rhs_(1) = rhs_(1) * pivot__3
coeff__2 = lhs_(2,1,2)
lhs_(2,2,2) = lhs_(2,2,2) - coeff__2 * lhs_(1,2,2)
lhs_(2,3,2) = lhs_(2,3,2) - coeff__2 * lhs_(1,3,2)
lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(1,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(1,5,2)
rhs_(2) = rhs_(2) - coeff__2 * rhs_(1)
coeff__2 = lhs_(3,1,2)
lhs_(3,2,2) = lhs_(3,2,2) - coeff__2 * lhs_(1,2,2)
lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(1,3,2)
lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(1,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(1,5,2)
rhs_(3) = rhs_(3) - coeff__2 * rhs_(1)
coeff__2 = lhs_(4,1,2)
lhs_(4,2,2) = lhs_(4,2,2) - coeff__2 * lhs_(1,2,2)
lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(1,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(1,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(1,5,2)
rhs_(4) = rhs_(4) - coeff__2 * rhs_(1)
coeff__2 = lhs_(5,1,2)
lhs_(5,2,2) = lhs_(5,2,2) - coeff__2 * lhs_(1,2,2)
lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(1,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(1,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(1,5,2)
rhs_(5) = rhs_(5) - coeff__2 * rhs_(1)
pivot__3 = 1.00d0 / lhs_(2,2,2)
lhs_(2,3,2) = lhs_(2,3,2) * pivot__3
lhs_(2,4,2) = lhs_(2,4,2) * pivot__3
lhs_(2,5,2) = lhs_(2,5,2) * pivot__3
rhs_(2) = rhs_(2) * pivot__3
coeff__2 = lhs_(1,2,2)
lhs_(1,3,2) = lhs_(1,3,2) - coeff__2 * lhs_(2,3,2)
lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(2,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(2,5,2)
rhs_(1) = rhs_(1) - coeff__2 * rhs_(2)
coeff__2 = lhs_(3,2,2)
lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(2,3,2)
lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(2,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(2,5,2)
rhs_(3) = rhs_(3) - coeff__2 * rhs_(2)
coeff__2 = lhs_(4,2,2)
lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(2,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(2,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(2,5,2)
rhs_(4) = rhs_(4) - coeff__2 * rhs_(2)
coeff__2 = lhs_(5,2,2)
lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(2,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(2,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(2,5,2)
rhs_(5) = rhs_(5) - coeff__2 * rhs_(2)
pivot__3 = 1.00d0 / lhs_(3,3,2)
lhs_(3,4,2) = lhs_(3,4,2) * pivot__3
lhs_(3,5,2) = lhs_(3,5,2) * pivot__3
rhs_(3) = rhs_(3) * pivot__3
coeff__2 = lhs_(1,3,2)
lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(3,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(3,5,2)
rhs_(1) = rhs_(1) - coeff__2 * rhs_(3)
coeff__2 = lhs_(2,3,2)
lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(3,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(3,5,2)
rhs_(2) = rhs_(2) - coeff__2 * rhs_(3)
coeff__2 = lhs_(4,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(3,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(3,5,2)
rhs_(4) = rhs_(4) - coeff__2 * rhs_(3)
coeff__2 = lhs_(5,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(3,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(3,5,2)
rhs_(5) = rhs_(5) - coeff__2 * rhs_(3)
pivot__3 = 1.00d0 / lhs_(4,4,2)
lhs_(4,5,2) = lhs_(4,5,2) * pivot__3
rhs_(4) = rhs_(4) * pivot__3
coeff__2 = lhs_(1,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(4,5,2)
rhs_(1) = rhs_(1) - coeff__2 * rhs_(4)
coeff__2 = lhs_(2,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(4,5,2)
rhs_(2) = rhs_(2) - coeff__2 * rhs_(4)
coeff__2 = lhs_(3,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(4,5,2)
rhs_(3) = rhs_(3) - coeff__2 * rhs_(4)
coeff__2 = lhs_(5,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(4,5,2)
rhs_(5) = rhs_(5) - coeff__2 * rhs_(4)
pivot__3 = 1.00d0 / lhs_(5,5,2)
rhs_(5) = rhs_(5) * pivot__3
coeff__2 = lhs_(1,5,2)
rhs_(1) = rhs_(1) - coeff__2 * rhs_(5)
coeff__2 = lhs_(2,5,2)
rhs_(2) = rhs_(2) - coeff__2 * rhs_(5)
coeff__2 = lhs_(3,5,2)
rhs_(3) = rhs_(3) - coeff__2 * rhs_(5)
coeff__2 = lhs_(4,5,2)
rhs_(4) = rhs_(4) - coeff__2 * rhs_(5)
do m = 1,5
rhs(m,i,j,k) = rhs_(m)
enddo
k = ksize-1
do m = 1,5
rhs_(m) = rhs(m,i,j,k)
rhsp_(m) = rhs(m,i,j,k + 1)
enddo
do k = ksize-1, 1, (-(1))
do m = 1,5
rhs_(m) = rhs(m,i,j,k)
enddo
do m = 1,5
rhs_(m) = rhs_(m) - lhs__(m,1,i,k,j1) * rhsp_(1)
rhs_(m) = rhs_(m) - lhs__(m,2,i,k,j1) * rhsp_(2)
rhs_(m) = rhs_(m) - lhs__(m,3,i,k,j1) * rhsp_(3)
rhs_(m) = rhs_(m) - lhs__(m,4,i,k,j1) * rhsp_(4)
rhs_(m) = rhs_(m) - lhs__(m,5,i,k,j1) * rhsp_(5)
enddo
do m = 1,5
rhsp_(m) = rhs_(m)
u(m,i,j,k) = u(m,i,j,k) + rhs_(m)
enddo
enddo
enddo
enddo
enddo
!DVM$ end region
return
end

View File

@@ -0,0 +1,640 @@
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! performs guaussian elimination on this cell.
!
! assumes that unpacking routines for non-first cells
! preload C' and rhs' from previous cell.
!
! assumed send happens outside this routine, but that
! c'(KMAX) and rhs'(KMAX) will be sent to next cell.
!---------------------------------------------------------------------
subroutine z_solve ()
include 'header3d.h'
double precision coeff
double precision pivot
integer i__0
integer j__1,m,n,zst
double precision coeff__2
double precision pivot__3
double precision lhs_(5,5,3),rhs_(5),rhsp_(5),u_(0:3,5)
integer i,j,k,ksize, k1
ksize = problem_size - 1
zst = ksize
!DVM$ region local(lhs__)
!DVM$ PARALLEL (k,j,i) ON rhs(*,i,j,k), private(u_,rhs_,pivot,
!DVM$& coeff, tmp1, tmp2, tmp3, t1, t2, t3, tm1, tm2, tm3,
!DVM$& tmp11, tmp22, i__0, j__1,lhs_,n,m,pivot__3,coeff__2),
!DVM$& stage(stage_n)
!DVM$& ,ACROSS(rhs(0:0,0:0,0:0,1:0),lhs__(0:0,0:0,0:0,0:0,1:0))
do k = 1, problem_size - 1
do j = 1,problem_size - 2
do i = 1,problem_size - 2
if( k .ne. problem_size - 1) then
do m = 1,5
lhs_(m,1,3) = lhs__(m,1,i,j,k-1)
lhs_(m,2,3) = lhs__(m,2,i,j,k-1)
lhs_(m,3,3) = lhs__(m,3,i,j,k-1)
lhs_(m,4,3) = lhs__(m,4,i,j,k-1)
lhs_(m,5,3) = lhs__(m,5,i,j,k-1)
u_(0,m) = u(m,i,j,k - 1)
u_(1,m) = u(m,i,j,k)
u_(2,m) = u(m,i,j,k + 1)
enddo
tmp1 = 1.0d+00 / u_(1,1)
tmp2 = tmp1 * tmp1
tmp3 = tmp1 * tmp2
t1 = 1.0d+00 / u_(0,1)
t2 = t1 * t1
t3 = t1 * t2
tm1 = 1.0d+00 / u_(2,1)
tm2 = tm1 * tm1
tm3 = tm1 * tm2
tmp11 = dt * tz1
tmp22 = dt * tz2
lhs_(1,1,1) = (-(tmp11)) * dz1
lhs_(1,2,1) = 0.
lhs_(1,3,1) = 0.
lhs_(1,4,1) = (-(tmp22))
lhs_(1,5,1) = 0.
lhs_(2,1,1) = (-(tmp22)) * ((-(u_(0,2) * u_(0,4))) * t2)
&- tmp11 * ((-(c3c4)) * t2 * u_(0,2))
lhs_(2,2,1) = (-(tmp22)) * u_(0,4) * t1 - tmp11 * c3c4 *
&t1 - tmp11 * dz2
lhs_(2,3,1) = 0.
lhs_(2,4,1) = (-(tmp22)) * u_(0,2) * t1
lhs_(2,5,1) = 0.
lhs_(3,1,1) = (-(tmp22)) * ((-(u_(0,3) * u_(0,4))) * t2)
&- tmp11 * ((-(c3c4)) * t2 * u_(0,3))
lhs_(3,2,1) = 0.
lhs_(3,3,1) = (-(tmp22)) * u_(0,4) * t1 - tmp11 * c3c4 *
&t1 - tmp11 * dz3
lhs_(3,4,1) = (-(tmp22)) * u_(0,3) * t1
lhs_(3,5,1) = 0.
lhs_(4,1,1) = (-(tmp22)) * ((-(u_(0,4) * u_(0,4) * t2)) +
& 0.50d+00 * c2 * ((u_(0,2) * u_(0,2) + u_(0,3) * u_(0,3) + u_(0,4)
& * u_(0,4)) * t2)) - tmp11 * ((-(con43)) * c3c4 * t2 * u_(0,4))
lhs_(4,2,1) = (-(tmp22)) * ((-(c2)) * u_(0,2) * t1)
lhs_(4,3,1) = (-(tmp22)) * ((-(c2)) * u_(0,3) * t1)
lhs_(4,4,1) = (-(tmp22)) * (2.0d+00 - c2) * u_(0,4) * t1
&- tmp11 * con43 * c3 * c4 * t1 - tmp11 * dz4
lhs_(4,5,1) = (-(tmp22)) * c2
lhs_(5,1,1) = (-(tmp22)) * ((c2 * (u_(0,2) * u_(0,2) + u_
&(0,3) * u_(0,3) + u_(0,4) * u_(0,4)) * t2 - c1 * (u_(0,5) * t1)) *
& (u_(0,4) * t1)) - tmp11 * ((-(c3c4 - c1345)) * t3 * u_(0,2)** 2 -
& (c3c4 - c1345) * t3 * u_(0,3)** 2 - (con43 * c3c4 - c1345) * t3 *
& u_(0,4)** 2 - c1345 * t2 * u_(0,5))
lhs_(5,2,1) = (-(tmp22)) * ((-(c2)) * (u_(0,2) * u_(0,4))
& * t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,2)
lhs_(5,3,1) = (-(tmp22)) * ((-(c2)) * (u_(0,3) * u_(0,4))
& * t2) - tmp11 * (c3c4 - c1345) * t2 * u_(0,3)
lhs_(5,4,1) = (-(tmp22)) * (c1 * (u_(0,5) * t1) - 0.50d+0
&0 * c2 * ((u_(0,2) * u_(0,2) + u_(0,3) * u_(0,3) + 3.0d+00 * u_(0,
&4) * u_(0,4)) * t2)) - tmp11 * (con43 * c3c4 - c1345) * t2 * u_(0,
&4)
lhs_(5,5,1) = (-(tmp22)) * c1 * u_(0,4) * t1 - tmp11 * c1
&345 * t1 - tmp11 * dz5
lhs_(1,1,2) = 1.0d+00 + tmp11 * 2.0d+00 * dz1
lhs_(1,2,2) = 0.
lhs_(1,3,2) = 0.
lhs_(1,4,2) = 0.
lhs_(1,5,2) = 0.
lhs_(2,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1,
&2))
lhs_(2,2,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t
&mp11 * 2.0d+00 * dz2
lhs_(2,3,2) = 0.
lhs_(2,4,2) = 0.
lhs_(2,5,2) = 0.
lhs_(3,1,2) = tmp11 * 2.0d+00 * ((-(c3c4)) * tmp2 * u_(1,
&3))
lhs_(3,2,2) = 0.
lhs_(3,3,2) = 1.0d+00 + tmp11 * 2.0d+00 * c3c4 * tmp1 + t
&mp11 * 2.0d+00 * dz3
lhs_(3,4,2) = 0.
lhs_(3,5,2) = 0.
lhs_(4,1,2) = tmp11 * 2.0d+00 * ((-(con43)) * c3c4 * tmp2
& * u_(1,4))
lhs_(4,2,2) = 0.
lhs_(4,3,2) = 0.
lhs_(4,4,2) = 1.0d+00 + tmp11 * 2.0d+00 * con43 * c3 * c4
& * tmp1 + tmp11 * 2.0d+00 * dz4
lhs_(4,5,2) = 0.
lhs_(5,1,2) = tmp11 * 2.0d+00 * ((-(c3c4 - c1345)) * tmp3
& * u_(1,2)** 2 - (c3c4 - c1345) * tmp3 * u_(1,3)** 2 - (con43 * c3
&c4 - c1345) * tmp3 * u_(1,4)** 2 - c1345 * tmp2 * u_(1,5))
lhs_(5,2,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u
&_(1,2)
lhs_(5,3,2) = tmp11 * 2.0d+00 * (c3c4 - c1345) * tmp2 * u
&_(1,3)
lhs_(5,4,2) = tmp11 * 2.0d+00 * (con43 * c3c4 - c1345) *
&tmp2 * u_(1,4)
lhs_(5,5,2) = 1.0d+00 + tmp11 * 2.0d+00 * c1345 * tmp1 +
&tmp11 * 2.0d+00 * dz5
if (k .ne. 1) then
do j__1 = 1,5
lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs
&_(1,j__1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,
&j__1,3) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1
&,3)
lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs
&_(1,j__1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,
&j__1,3) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1
&,3)
lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs
&_(1,j__1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,
&j__1,3) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1
&,3)
lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs
&_(1,j__1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,
&j__1,3) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1
&,3)
lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs
&_(1,j__1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,
&j__1,3) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1
&,3)
enddo
endif
lhs_(1,1,3) = (-(tmp11)) * dz1
lhs_(1,2,3) = 0.
lhs_(1,3,3) = 0.
lhs_(1,4,3) = tmp22
lhs_(1,5,3) = 0.
lhs_(2,1,3) = tmp22 * ((-(u_(2,2) * u_(2,4))) * tm2) - tm
&p11 * ((-(c3c4)) * tm2 * u_(2,2))
lhs_(2,2,3) = tmp22 * u_(2,4) * tm1 - tmp11 * c3c4 * tm1
&- tmp11 * dz2
lhs_(2,3,3) = 0.
lhs_(2,4,3) = tmp22 * u_(2,2) * tm1
lhs_(2,5,3) = 0.
lhs_(3,1,3) = tmp22 * ((-(u_(2,3) * u_(2,4))) * tm2) - tm
&p11 * ((-(c3c4)) * tm2 * u_(2,3))
lhs_(3,2,3) = 0.
lhs_(3,3,3) = tmp22 * u_(2,4) * tm1 - tmp11 * c3c4 * tm1
&- tmp11 * dz3
lhs_(3,4,3) = tmp22 * u_(2,3) * tm1
lhs_(3,5,3) = 0.
lhs_(4,1,3) = tmp22 * ((-(u_(2,4) * u_(2,4) * tm2)) + 0.5
&0d+00 * c2 * ((u_(2,2) * u_(2,2) + u_(2,3) * u_(2,3) + u_(2,4) * u
&_(2,4)) * tm2)) - tmp11 * ((-(con43)) * c3c4 * tm2 * u_(2,4))
lhs_(4,2,3) = tmp22 * ((-(c2)) * u_(2,2) * tm1)
lhs_(4,3,3) = tmp22 * ((-(c2)) * u_(2,3) * tm1)
lhs_(4,4,3) = tmp22 * (2.0d+00 - c2) * u_(2,4) * tm1 - tm
&p11 * con43 * c3 * c4 * tm1 - tmp11 * dz4
lhs_(4,5,3) = tmp22 * c2
lhs_(5,1,3) = tmp22 * ((c2 * (u_(2,2) * u_(2,2) + u_(2,3)
& * u_(2,3) + u_(2,4) * u_(2,4)) * tm2 - c1 * (u_(2,5) * tm1)) * (u
&_(2,4) * tm1)) - tmp11 * ((-(c3c4 - c1345)) * tm3 * u_(2,2)** 2 -
&(c3c4 - c1345) * tm3 * u_(2,3)** 2 - (con43 * c3c4 - c1345) * tm3
&* u_(2,4)** 2 - c1345 * tm2 * u_(2,5))
lhs_(5,2,3) = tmp22 * ((-(c2)) * (u_(2,2) * u_(2,4)) * tm
&2) - tmp11 * (c3c4 - c1345) * tm2 * u_(2,2)
lhs_(5,3,3) = tmp22 * ((-(c2)) * (u_(2,3) * u_(2,4)) * tm
&2) - tmp11 * (c3c4 - c1345) * tm2 * u_(2,3)
lhs_(5,4,3) = tmp22 * (c1 * (u_(2,5) * tm1) - 0.50d+00 *
&c2 * ((u_(2,2) * u_(2,2) + u_(2,3) * u_(2,3) + 3.0d+00 * u_(2,4) *
& u_(2,4)) * tm2)) - tmp11 * (con43 * c3c4 - c1345) * tm2 * u_(2,4)
lhs_(5,5,3) = tmp22 * c1 * u_(2,4) * tm1 - tmp11 * c1345
&* tm1 - tmp11 * dz5
do m = 1,5
rhs_(m) = rhs(m,i,j,k)
enddo
do i__0 = 1,5
rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i,j,k
& - 1) - lhs_(i__0,2,1) * rhs(2,i,j,k - 1) - lhs_(i__0,3,1) * rhs(3
&,i,j,k - 1) - lhs_(i__0,4,1) * rhs(4,i,j,k - 1) - lhs_(i__0,5,1) *
& rhs(5,i,j,k - 1)
enddo
!---------------------------------------------------------------------
!
!---------------------------------------------------------------------
pivot = 1.00d0 / lhs_(1,1,2)
lhs_(1,2,2) = lhs_(1,2,2) * pivot
lhs_(1,3,2) = lhs_(1,3,2) * pivot
lhs_(1,4,2) = lhs_(1,4,2) * pivot
lhs_(1,5,2) = lhs_(1,5,2) * pivot
lhs_(1,1,3) = lhs_(1,1,3) * pivot
lhs_(1,2,3) = lhs_(1,2,3) * pivot
lhs_(1,3,3) = lhs_(1,3,3) * pivot
lhs_(1,4,3) = lhs_(1,4,3) * pivot
lhs_(1,5,3) = lhs_(1,5,3) * pivot
rhs_(1) = rhs_(1) * pivot
coeff = lhs_(2,1,2)
lhs_(2,2,2) = lhs_(2,2,2) - coeff * lhs_(1,2,2)
lhs_(2,3,2) = lhs_(2,3,2) - coeff * lhs_(1,3,2)
lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(1,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(1,5,2)
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(1,1,3)
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(1,2,3)
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(1,3,3)
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(1,4,3)
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(1,5,3)
rhs_(2) = rhs_(2) - coeff * rhs_(1)
coeff = lhs_(3,1,2)
lhs_(3,2,2) = lhs_(3,2,2) - coeff * lhs_(1,2,2)
lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(1,3,2)
lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(1,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(1,5,2)
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(1,1,3)
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(1,2,3)
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(1,3,3)
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(1,4,3)
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(1,5,3)
rhs_(3) = rhs_(3) - coeff * rhs_(1)
coeff = lhs_(4,1,2)
lhs_(4,2,2) = lhs_(4,2,2) - coeff * lhs_(1,2,2)
lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(1,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(1,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(1,5,2)
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(1,1,3)
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(1,2,3)
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(1,3,3)
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(1,4,3)
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(1,5,3)
rhs_(4) = rhs_(4) - coeff * rhs_(1)
coeff = lhs_(5,1,2)
lhs_(5,2,2) = lhs_(5,2,2) - coeff * lhs_(1,2,2)
lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(1,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(1,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(1,5,2)
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(1,1,3)
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(1,2,3)
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(1,3,3)
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(1,4,3)
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(1,5,3)
rhs_(5) = rhs_(5) - coeff * rhs_(1)
pivot = 1.00d0 / lhs_(2,2,2)
lhs_(2,3,2) = lhs_(2,3,2) * pivot
lhs_(2,4,2) = lhs_(2,4,2) * pivot
lhs_(2,5,2) = lhs_(2,5,2) * pivot
lhs_(2,1,3) = lhs_(2,1,3) * pivot
lhs_(2,2,3) = lhs_(2,2,3) * pivot
lhs_(2,3,3) = lhs_(2,3,3) * pivot
lhs_(2,4,3) = lhs_(2,4,3) * pivot
lhs_(2,5,3) = lhs_(2,5,3) * pivot
rhs_(2) = rhs_(2) * pivot
coeff = lhs_(1,2,2)
lhs_(1,3,2) = lhs_(1,3,2) - coeff * lhs_(2,3,2)
lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(2,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(2,5,2)
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(2,1,3)
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(2,2,3)
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(2,3,3)
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(2,4,3)
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(2,5,3)
rhs_(1) = rhs_(1) - coeff * rhs_(2)
coeff = lhs_(3,2,2)
lhs_(3,3,2) = lhs_(3,3,2) - coeff * lhs_(2,3,2)
lhs_(3,4,2) = lhs_(3,4,2) - coeff * lhs_(2,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(2,5,2)
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(2,1,3)
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(2,2,3)
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(2,3,3)
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(2,4,3)
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(2,5,3)
rhs_(3) = rhs_(3) - coeff * rhs_(2)
coeff = lhs_(4,2,2)
lhs_(4,3,2) = lhs_(4,3,2) - coeff * lhs_(2,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(2,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(2,5,2)
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(2,1,3)
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(2,2,3)
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(2,3,3)
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(2,4,3)
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(2,5,3)
rhs_(4) = rhs_(4) - coeff * rhs_(2)
coeff = lhs_(5,2,2)
lhs_(5,3,2) = lhs_(5,3,2) - coeff * lhs_(2,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(2,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(2,5,2)
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(2,1,3)
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(2,2,3)
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(2,3,3)
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(2,4,3)
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(2,5,3)
rhs_(5) = rhs_(5) - coeff * rhs_(2)
pivot = 1.00d0 / lhs_(3,3,2)
lhs_(3,4,2) = lhs_(3,4,2) * pivot
lhs_(3,5,2) = lhs_(3,5,2) * pivot
lhs_(3,1,3) = lhs_(3,1,3) * pivot
lhs_(3,2,3) = lhs_(3,2,3) * pivot
lhs_(3,3,3) = lhs_(3,3,3) * pivot
lhs_(3,4,3) = lhs_(3,4,3) * pivot
lhs_(3,5,3) = lhs_(3,5,3) * pivot
rhs_(3) = rhs_(3) * pivot
coeff = lhs_(1,3,2)
lhs_(1,4,2) = lhs_(1,4,2) - coeff * lhs_(3,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(3,5,2)
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(3,1,3)
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(3,2,3)
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(3,3,3)
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(3,4,3)
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(3,5,3)
rhs_(1) = rhs_(1) - coeff * rhs_(3)
coeff = lhs_(2,3,2)
lhs_(2,4,2) = lhs_(2,4,2) - coeff * lhs_(3,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(3,5,2)
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(3,1,3)
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(3,2,3)
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(3,3,3)
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(3,4,3)
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(3,5,3)
rhs_(2) = rhs_(2) - coeff * rhs_(3)
coeff = lhs_(4,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff * lhs_(3,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff * lhs_(3,5,2)
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(3,1,3)
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(3,2,3)
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(3,3,3)
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(3,4,3)
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(3,5,3)
rhs_(4) = rhs_(4) - coeff * rhs_(3)
coeff = lhs_(5,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff * lhs_(3,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(3,5,2)
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(3,1,3)
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(3,2,3)
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(3,3,3)
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(3,4,3)
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(3,5,3)
rhs_(5) = rhs_(5) - coeff * rhs_(3)
pivot = 1.00d0 / lhs_(4,4,2)
lhs_(4,5,2) = lhs_(4,5,2) * pivot
lhs_(4,1,3) = lhs_(4,1,3) * pivot
lhs_(4,2,3) = lhs_(4,2,3) * pivot
lhs_(4,3,3) = lhs_(4,3,3) * pivot
lhs_(4,4,3) = lhs_(4,4,3) * pivot
lhs_(4,5,3) = lhs_(4,5,3) * pivot
rhs_(4) = rhs_(4) * pivot
coeff = lhs_(1,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff * lhs_(4,5,2)
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(4,1,3)
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(4,2,3)
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(4,3,3)
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(4,4,3)
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(4,5,3)
rhs_(1) = rhs_(1) - coeff * rhs_(4)
coeff = lhs_(2,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff * lhs_(4,5,2)
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(4,1,3)
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(4,2,3)
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(4,3,3)
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(4,4,3)
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(4,5,3)
rhs_(2) = rhs_(2) - coeff * rhs_(4)
coeff = lhs_(3,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff * lhs_(4,5,2)
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(4,1,3)
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(4,2,3)
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(4,3,3)
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(4,4,3)
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(4,5,3)
rhs_(3) = rhs_(3) - coeff * rhs_(4)
coeff = lhs_(5,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff * lhs_(4,5,2)
lhs_(5,1,3) = lhs_(5,1,3) - coeff * lhs_(4,1,3)
lhs_(5,2,3) = lhs_(5,2,3) - coeff * lhs_(4,2,3)
lhs_(5,3,3) = lhs_(5,3,3) - coeff * lhs_(4,3,3)
lhs_(5,4,3) = lhs_(5,4,3) - coeff * lhs_(4,4,3)
lhs_(5,5,3) = lhs_(5,5,3) - coeff * lhs_(4,5,3)
rhs_(5) = rhs_(5) - coeff * rhs_(4)
pivot = 1.00d0 / lhs_(5,5,2)
lhs_(5,1,3) = lhs_(5,1,3) * pivot
lhs_(5,2,3) = lhs_(5,2,3) * pivot
lhs_(5,3,3) = lhs_(5,3,3) * pivot
lhs_(5,4,3) = lhs_(5,4,3) * pivot
lhs_(5,5,3) = lhs_(5,5,3) * pivot
rhs_(5) = rhs_(5) * pivot
coeff = lhs_(1,5,2)
lhs_(1,1,3) = lhs_(1,1,3) - coeff * lhs_(5,1,3)
lhs_(1,2,3) = lhs_(1,2,3) - coeff * lhs_(5,2,3)
lhs_(1,3,3) = lhs_(1,3,3) - coeff * lhs_(5,3,3)
lhs_(1,4,3) = lhs_(1,4,3) - coeff * lhs_(5,4,3)
lhs_(1,5,3) = lhs_(1,5,3) - coeff * lhs_(5,5,3)
rhs_(1) = rhs_(1) - coeff * rhs_(5)
coeff = lhs_(2,5,2)
lhs_(2,1,3) = lhs_(2,1,3) - coeff * lhs_(5,1,3)
lhs_(2,2,3) = lhs_(2,2,3) - coeff * lhs_(5,2,3)
lhs_(2,3,3) = lhs_(2,3,3) - coeff * lhs_(5,3,3)
lhs_(2,4,3) = lhs_(2,4,3) - coeff * lhs_(5,4,3)
lhs_(2,5,3) = lhs_(2,5,3) - coeff * lhs_(5,5,3)
rhs_(2) = rhs_(2) - coeff * rhs_(5)
coeff = lhs_(3,5,2)
lhs_(3,1,3) = lhs_(3,1,3) - coeff * lhs_(5,1,3)
lhs_(3,2,3) = lhs_(3,2,3) - coeff * lhs_(5,2,3)
lhs_(3,3,3) = lhs_(3,3,3) - coeff * lhs_(5,3,3)
lhs_(3,4,3) = lhs_(3,4,3) - coeff * lhs_(5,4,3)
lhs_(3,5,3) = lhs_(3,5,3) - coeff * lhs_(5,5,3)
rhs_(3) = rhs_(3) - coeff * rhs_(5)
coeff = lhs_(4,5,2)
lhs_(4,1,3) = lhs_(4,1,3) - coeff * lhs_(5,1,3)
lhs_(4,2,3) = lhs_(4,2,3) - coeff * lhs_(5,2,3)
lhs_(4,3,3) = lhs_(4,3,3) - coeff * lhs_(5,3,3)
lhs_(4,4,3) = lhs_(4,4,3) - coeff * lhs_(5,4,3)
lhs_(4,5,3) = lhs_(4,5,3) - coeff * lhs_(5,5,3)
rhs_(4) = rhs_(4) - coeff * rhs_(5)
do i__0 = 1,5
lhs__(i__0,1,i,j,k) = lhs_(i__0,1,3)
lhs__(i__0,2,i,j,k) = lhs_(i__0,2,3)
lhs__(i__0,3,i,j,k) = lhs_(i__0,3,3)
lhs__(i__0,4,i,j,k) = lhs_(i__0,4,3)
lhs__(i__0,5,i,j,k) = lhs_(i__0,5,3)
enddo
do m = 1,5
rhs(m,i,j,k) = rhs_(m)
enddo
else !! of big IF(k .ne. lastIter)
do n = 1,5
lhs_(1,n,1) = 0.0d0
lhs_(1,n,2) = 0.0d0
lhs_(1,n,3) = 0.0d0
lhs_(2,n,1) = 0.0d0
lhs_(2,n,2) = 0.0d0
lhs_(2,n,3) = 0.0d0
lhs_(3,n,1) = 0.0d0
lhs_(3,n,2) = 0.0d0
lhs_(3,n,3) = 0.0d0
lhs_(4,n,1) = 0.0d0
lhs_(4,n,2) = 0.0d0
lhs_(4,n,3) = 0.0d0
lhs_(5,n,1) = 0.0d0
lhs_(5,n,2) = 0.0d0
lhs_(5,n,3) = 0.0d0
enddo
do m = 1,5
lhs_(m,m,2) = 1.0d0
enddo
do m = 1,5
rhs_(m) = rhs(m,i,j,k)
enddo
do i__0 = 1,5
rhs_(i__0) = rhs_(i__0) - lhs_(i__0,1,1) * rhs(1,i,j,ksiz
&e - 1) - lhs_(i__0,2,1) * rhs(2,i,j,ksize - 1) - lhs_(i__0,3,1) *
&rhs(3,i,j,ksize - 1) - lhs_(i__0,4,1) * rhs(4,i,j,ksize - 1) - lhs
&_(i__0,5,1) * rhs(5,i,j,ksize - 1)
enddo
do j__1 = 1,5
lhs_(1,j__1,2) = lhs_(1,j__1,2) - lhs_(1,1,1) * lhs_(1,j_
&_1,3) - lhs_(1,2,1) * lhs_(2,j__1,3) - lhs_(1,3,1) * lhs_(3,j__1,3
&) - lhs_(1,4,1) * lhs_(4,j__1,3) - lhs_(1,5,1) * lhs_(5,j__1,3)
lhs_(2,j__1,2) = lhs_(2,j__1,2) - lhs_(2,1,1) * lhs_(1,j_
&_1,3) - lhs_(2,2,1) * lhs_(2,j__1,3) - lhs_(2,3,1) * lhs_(3,j__1,3
&) - lhs_(2,4,1) * lhs_(4,j__1,3) - lhs_(2,5,1) * lhs_(5,j__1,3)
lhs_(3,j__1,2) = lhs_(3,j__1,2) - lhs_(3,1,1) * lhs_(1,j_
&_1,3) - lhs_(3,2,1) * lhs_(2,j__1,3) - lhs_(3,3,1) * lhs_(3,j__1,3
&) - lhs_(3,4,1) * lhs_(4,j__1,3) - lhs_(3,5,1) * lhs_(5,j__1,3)
lhs_(4,j__1,2) = lhs_(4,j__1,2) - lhs_(4,1,1) * lhs_(1,j_
&_1,3) - lhs_(4,2,1) * lhs_(2,j__1,3) - lhs_(4,3,1) * lhs_(3,j__1,3
&) - lhs_(4,4,1) * lhs_(4,j__1,3) - lhs_(4,5,1) * lhs_(5,j__1,3)
lhs_(5,j__1,2) = lhs_(5,j__1,2) - lhs_(5,1,1) * lhs_(1,j_
&_1,3) - lhs_(5,2,1) * lhs_(2,j__1,3) - lhs_(5,3,1) * lhs_(3,j__1,3
&) - lhs_(5,4,1) * lhs_(4,j__1,3) - lhs_(5,5,1) * lhs_(5,j__1,3)
enddo
!---------------------------------------------------------------------
!
!---------------------------------------------------------------------
pivot__3 = 1.00d0 / lhs_(1,1,2)
lhs_(1,2,2) = lhs_(1,2,2) * pivot__3
lhs_(1,3,2) = lhs_(1,3,2) * pivot__3
lhs_(1,4,2) = lhs_(1,4,2) * pivot__3
lhs_(1,5,2) = lhs_(1,5,2) * pivot__3
rhs_(1) = rhs_(1) * pivot__3
coeff__2 = lhs_(2,1,2)
lhs_(2,2,2) = lhs_(2,2,2) - coeff__2 * lhs_(1,2,2)
lhs_(2,3,2) = lhs_(2,3,2) - coeff__2 * lhs_(1,3,2)
lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(1,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(1,5,2)
rhs_(2) = rhs_(2) - coeff__2 * rhs_(1)
coeff__2 = lhs_(3,1,2)
lhs_(3,2,2) = lhs_(3,2,2) - coeff__2 * lhs_(1,2,2)
lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(1,3,2)
lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(1,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(1,5,2)
rhs_(3) = rhs_(3) - coeff__2 * rhs_(1)
coeff__2 = lhs_(4,1,2)
lhs_(4,2,2) = lhs_(4,2,2) - coeff__2 * lhs_(1,2,2)
lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(1,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(1,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(1,5,2)
rhs_(4) = rhs_(4) - coeff__2 * rhs_(1)
coeff__2 = lhs_(5,1,2)
lhs_(5,2,2) = lhs_(5,2,2) - coeff__2 * lhs_(1,2,2)
lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(1,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(1,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(1,5,2)
rhs_(5) = rhs_(5) - coeff__2 * rhs_(1)
pivot__3 = 1.00d0 / lhs_(2,2,2)
lhs_(2,3,2) = lhs_(2,3,2) * pivot__3
lhs_(2,4,2) = lhs_(2,4,2) * pivot__3
lhs_(2,5,2) = lhs_(2,5,2) * pivot__3
rhs_(2) = rhs_(2) * pivot__3
coeff__2 = lhs_(1,2,2)
lhs_(1,3,2) = lhs_(1,3,2) - coeff__2 * lhs_(2,3,2)
lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(2,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(2,5,2)
rhs_(1) = rhs_(1) - coeff__2 * rhs_(2)
coeff__2 = lhs_(3,2,2)
lhs_(3,3,2) = lhs_(3,3,2) - coeff__2 * lhs_(2,3,2)
lhs_(3,4,2) = lhs_(3,4,2) - coeff__2 * lhs_(2,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(2,5,2)
rhs_(3) = rhs_(3) - coeff__2 * rhs_(2)
coeff__2 = lhs_(4,2,2)
lhs_(4,3,2) = lhs_(4,3,2) - coeff__2 * lhs_(2,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(2,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(2,5,2)
rhs_(4) = rhs_(4) - coeff__2 * rhs_(2)
coeff__2 = lhs_(5,2,2)
lhs_(5,3,2) = lhs_(5,3,2) - coeff__2 * lhs_(2,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(2,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(2,5,2)
rhs_(5) = rhs_(5) - coeff__2 * rhs_(2)
pivot__3 = 1.00d0 / lhs_(3,3,2)
lhs_(3,4,2) = lhs_(3,4,2) * pivot__3
lhs_(3,5,2) = lhs_(3,5,2) * pivot__3
rhs_(3) = rhs_(3) * pivot__3
coeff__2 = lhs_(1,3,2)
lhs_(1,4,2) = lhs_(1,4,2) - coeff__2 * lhs_(3,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(3,5,2)
rhs_(1) = rhs_(1) - coeff__2 * rhs_(3)
coeff__2 = lhs_(2,3,2)
lhs_(2,4,2) = lhs_(2,4,2) - coeff__2 * lhs_(3,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(3,5,2)
rhs_(2) = rhs_(2) - coeff__2 * rhs_(3)
coeff__2 = lhs_(4,3,2)
lhs_(4,4,2) = lhs_(4,4,2) - coeff__2 * lhs_(3,4,2)
lhs_(4,5,2) = lhs_(4,5,2) - coeff__2 * lhs_(3,5,2)
rhs_(4) = rhs_(4) - coeff__2 * rhs_(3)
coeff__2 = lhs_(5,3,2)
lhs_(5,4,2) = lhs_(5,4,2) - coeff__2 * lhs_(3,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(3,5,2)
rhs_(5) = rhs_(5) - coeff__2 * rhs_(3)
pivot__3 = 1.00d0 / lhs_(4,4,2)
lhs_(4,5,2) = lhs_(4,5,2) * pivot__3
rhs_(4) = rhs_(4) * pivot__3
coeff__2 = lhs_(1,4,2)
lhs_(1,5,2) = lhs_(1,5,2) - coeff__2 * lhs_(4,5,2)
rhs_(1) = rhs_(1) - coeff__2 * rhs_(4)
coeff__2 = lhs_(2,4,2)
lhs_(2,5,2) = lhs_(2,5,2) - coeff__2 * lhs_(4,5,2)
rhs_(2) = rhs_(2) - coeff__2 * rhs_(4)
coeff__2 = lhs_(3,4,2)
lhs_(3,5,2) = lhs_(3,5,2) - coeff__2 * lhs_(4,5,2)
rhs_(3) = rhs_(3) - coeff__2 * rhs_(4)
coeff__2 = lhs_(5,4,2)
lhs_(5,5,2) = lhs_(5,5,2) - coeff__2 * lhs_(4,5,2)
rhs_(5) = rhs_(5) - coeff__2 * rhs_(4)
pivot__3 = 1.00d0 / lhs_(5,5,2)
rhs_(5) = rhs_(5) * pivot__3
coeff__2 = lhs_(1,5,2)
rhs_(1) = rhs_(1) - coeff__2 * rhs_(5)
coeff__2 = lhs_(2,5,2)
rhs_(2) = rhs_(2) - coeff__2 * rhs_(5)
coeff__2 = lhs_(3,5,2)
rhs_(3) = rhs_(3) - coeff__2 * rhs_(5)
coeff__2 = lhs_(4,5,2)
rhs_(4) = rhs_(4) - coeff__2 * rhs_(5)
do m = 1,5
rhs(m,i,j,k) = rhs_(m)
enddo
endif
enddo
enddo
enddo
!DVM$ PARALLEL (k,j,i) ON rhs(*,i,j,k), private(rhs_,rhsp_,m)
!DVM$& ,ACROSS(rhs(0:0,0:0,0:0,0:1)),stage(stage_n)
do k = problem_size-2, 1, (-(1))
do j = 1,problem_size - 2
do i = 1,problem_size - 2
do m = 1,5
rhs_(m) = rhs(m,i,j,k)
rhsp_(m) = rhs(m,i,j,k + 1)
enddo
do m = 1,5
rhs_(m) = rhs_(m) - lhs__(m,1,i,j,k) * rhsp_(1)
rhs_(m) = rhs_(m) - lhs__(m,2,i,j,k) * rhsp_(2)
rhs_(m) = rhs_(m) - lhs__(m,3,i,j,k) * rhsp_(3)
rhs_(m) = rhs_(m) - lhs__(m,4,i,j,k) * rhsp_(4)
rhs_(m) = rhs_(m) - lhs__(m,5,i,j,k) * rhsp_(5)
enddo
do m = 1,5
rhs(m,i,j,k) = rhs_(m)
u(m,i,j,k) = u(m,i,j,k) + rhs_(m)
enddo
enddo
enddo
enddo
!DVM$ end region
return
end

View File

@@ -0,0 +1,21 @@
SHELL=/bin/sh
BENCHMARK=cg
BENCHMARKU=CG
include ../config/make.def
include ../sys/make.common
SOURCES = cg.fdv
OBJS = ${SOURCES:.fdv=.o}
${PROGRAM}: config $(OBJS)
${FLINK} -o ${PROGRAM} ${OBJS}
%.o: %.fdv npbparams.h globals.h
${F77} ${FFLAGS} -dvmIrregAnalysis -c -o $@ $<
clean:
rm -f npbparams.h
rm -f *.o *~
rm -f *.cu *.cuf *.c

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,105 @@
include 'npbparams.h'
c---------------------------------------------------------------------
c Note: please observe that in the routine conj_grad three
c implementations of the sparse matrix-vector multiply have
c been supplied. The default matrix-vector multiply is not
c loop unrolled. The alternate implementations are unrolled
c to a depth of 2 and unrolled to a depth of 8. Please
c experiment with these to find the fastest for your particular
c architecture. If reporting timing results, any of these three may
c be used without penalty.
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c Class specific parameters:
c It appears here for reference only.
c These are their values, however, this info is imported in the npbparams.h
c include file, which is written by the sys/setparams.c program.
c---------------------------------------------------------------------
C----------
C Class S:
C----------
CC parameter( na=1400,
CC > nonzer=7,
CC > shift=10.,
CC > niter=15,
CC > rcond=1.0d-1 )
C----------
C Class W:
C----------
CC parameter( na=7000,
CC > nonzer=8,
CC > shift=12.,
CC > niter=15,
CC > rcond=1.0d-1 )
C----------
C Class A:
C----------
CC parameter( na=14000,
CC > nonzer=11,
CC > shift=20.,
CC > niter=15,
CC > rcond=1.0d-1 )
C----------
C Class B:
C----------
CC parameter( na=75000,
CC > nonzer=13,
CC > shift=60.,
CC > niter=75,
CC > rcond=1.0d-1 )
C----------
C Class C:
C----------
CC parameter( na=150000,
CC > nonzer=15,
CC > shift=110.,
CC > niter=75,
CC > rcond=1.0d-1 )
C----------
C Class D:
C----------
CC parameter( na=1500000,
CC > nonzer=21,
CC > shift=500.,
CC > niter=100,
CC > rcond=1.0d-1 )
C----------
C Class E:
C----------
CC parameter( na=9000000,
CC > nonzer=26,
CC > shift=1500.,
CC > niter=100,
CC > rcond=1.0d-1 )
integer nz, naz
parameter( nz = na*(nonzer+1)*(nonzer+1) )
parameter( naz = na*(nonzer+1) )
common / partit_size / naa, nzz,
> firstrow,
> lastrow,
> firstcol,
> lastcol
integer naa, nzz,
> firstrow,
> lastrow,
> firstcol,
> lastcol
common /urando/ amult, tran
double precision amult, tran
external timer_read
double precision timer_read
integer T_init, T_bench, T_conj_grad, T_last
parameter (T_init=1, T_bench=2, T_conj_grad=3, T_last=3)
logical timeron
common /timers/ timeron

View File

@@ -0,0 +1,12 @@
@echo off
set CLASS=%1
set OPT=%2
CALL ..\sys\setparams CG %CLASS%
CALL %F77% %OPT% cg 1>out_%CLASS%.txt 2>err_%CLASS%.txt
if exist cg.exe (
copy cg.exe %BIN%\cg.%CLASS%.x.exe
del cg.exe
)

View File

@@ -0,0 +1,111 @@
subroutine print_results(name, class, n1, n2, n3, niter,
> t, mops, optype, verified, npbversion,
> compiletime, cs1, cs2, cs3, cs4, cs5, cs6, cs7)
implicit none
character name*(*)
character class*1
integer n1, n2, n3, niter, j
double precision t, mops
character optype*24, size*15
logical verified
character*(*) npbversion, compiletime,
> cs1, cs2, cs3, cs4, cs5, cs6, cs7
write (*, 2) name
2 format(//, ' ', A, ' Benchmark Completed.')
write (*, 3) Class
3 format(' Class = ', 12x, a12)
c If this is not a grid-based problem (EP, FT, CG), then
c we only print n1, which contains some measure of the
c problem size. In that case, n2 and n3 are both zero.
c Otherwise, we print the grid size n1xn2xn3
if ((n2 .eq. 0) .and. (n3 .eq. 0)) then
if (name(1:2) .eq. 'EP') then
write(size, '(f15.0)' ) 2.d0**n1
j = 15
if (size(j:j) .eq. '.') then
size(j:j) = ' '
j = j - 1
endif
write (*,42) size(1:j)
42 format(' Size = ',9x, a15)
else
write (*,44) n1
44 format(' Size = ',12x, i12)
endif
else
write (*, 4) n1,n2,n3
4 format(' Size = ',9x, i4,'x',i4,'x',i4)
endif
write (*, 5) niter
5 format(' Iterations = ', 12x, i12)
write (*, 6) t
6 format(' Time in seconds = ',12x, f12.2)
write (*,9) mops
9 format(' Mop/s total = ',12x, f12.2)
write(*, 11) optype
11 format(' Operation type = ', a24)
if (verified) then
write(*,12) ' SUCCESSFUL'
else
write(*,12) 'UNSUCCESSFUL'
endif
12 format(' Verification = ', 12x, a)
write(*,13) npbversion
13 format(' Version = ', 12x, a12)
write(*,14) compiletime
14 format(' Compile date = ', 12x, a12)
write (*,121) cs1
121 format(/, ' Compile options:', /,
> ' F77 = ', A)
write (*,122) cs2
122 format(' FLINK = ', A)
write (*,123) cs3
123 format(' F_LIB = ', A)
write (*,124) cs4
124 format(' F_INC = ', A)
write (*,125) cs5
125 format(' FFLAGS = ', A)
write (*,126) cs6
126 format(' FLINKFLAGS = ', A)
write(*, 127) cs7
127 format(' RAND = ', A)
write (*,130)
130 format(//' Please send all errors/feedbacks to:'//
> ' NPB Development Team'/
> ' npb@nas.nasa.gov'//)
c 130 format(//' Please send the results of this run to:'//
c > ' NPB Development Team '/
c > ' Internet: npb@nas.nasa.gov'/
c > ' '/
c > ' If email is not available, send this to:'//
c > ' MS T27A-1'/
c > ' NASA Ames Research Center'/
c > ' Moffett Field, CA 94035-1000'//
c > ' Fax: 650-604-3957'//)
return
end

View File

@@ -0,0 +1,137 @@
c---------------------------------------------------------------------
c---------------------------------------------------------------------
double precision function randlc (x, a)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c
c This routine returns a uniform pseudorandom double precision number in the
c range (0, 1) by using the linear congruential generator
c
c x_{k+1} = a x_k (mod 2^46)
c
c where 0 < x_k < 2^46 and 0 < a < 2^46. This scheme generates 2^44 numbers
c before repeating. The argument A is the same as 'a' in the above formula,
c and X is the same as x_0. A and X must be odd double precision integers
c in the range (1, 2^46). The returned value RANDLC is normalized to be
c between 0 and 1, i.e. RANDLC = 2^(-46) * x_1. X is updated to contain
c the new seed x_1, so that subsequent calls to RANDLC using the same
c arguments will generate a continuous sequence.
c
c This routine should produce the same results on any computer with at least
c 48 mantissa bits in double precision floating point data. On 64 bit
c systems, double precision should be disabled.
c
c David H. Bailey October 26, 1990
c
c---------------------------------------------------------------------
implicit none
double precision r23,r46,t23,t46,a,x,t1,t2,t3,t4,a1,a2,x1,x2,z
parameter (r23 = 0.5d0 ** 23, r46 = r23 ** 2, t23 = 2.d0 ** 23,
> t46 = t23 ** 2)
c---------------------------------------------------------------------
c Break A into two parts such that A = 2^23 * A1 + A2.
c---------------------------------------------------------------------
t1 = r23 * a
a1 = int (t1)
a2 = a - t23 * a1
c---------------------------------------------------------------------
c Break X into two parts such that X = 2^23 * X1 + X2, compute
c Z = A1 * X2 + A2 * X1 (mod 2^23), and then
c X = 2^23 * Z + A2 * X2 (mod 2^46).
c---------------------------------------------------------------------
t1 = r23 * x
x1 = int (t1)
x2 = x - t23 * x1
t1 = a1 * x2 + a2 * x1
t2 = int (r23 * t1)
z = t1 - t23 * t2
t3 = t23 * z + a2 * x2
t4 = int (r46 * t3)
x = t3 - t46 * t4
randlc = r46 * x
return
end
c---------------------------------------------------------------------
c---------------------------------------------------------------------
subroutine vranlc (n, x, a, y)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c
c This routine generates N uniform pseudorandom double precision numbers in
c the range (0, 1) by using the linear congruential generator
c
c x_{k+1} = a x_k (mod 2^46)
c
c where 0 < x_k < 2^46 and 0 < a < 2^46. This scheme generates 2^44 numbers
c before repeating. The argument A is the same as 'a' in the above formula,
c and X is the same as x_0. A and X must be odd double precision integers
c in the range (1, 2^46). The N results are placed in Y and are normalized
c to be between 0 and 1. X is updated to contain the new seed, so that
c subsequent calls to VRANLC using the same arguments will generate a
c continuous sequence. If N is zero, only initialization is performed, and
c the variables X, A and Y are ignored.
c
c This routine is the standard version designed for scalar or RISC systems.
c However, it should produce the same results on any single processor
c computer with at least 48 mantissa bits in double precision floating point
c data. On 64 bit systems, double precision should be disabled.
c
c---------------------------------------------------------------------
implicit none
integer i,n
double precision y,r23,r46,t23,t46,a,x,t1,t2,t3,t4,a1,a2,x1,x2,z
dimension y(*)
parameter (r23 = 0.5d0 ** 23, r46 = r23 ** 2, t23 = 2.d0 ** 23,
> t46 = t23 ** 2)
c---------------------------------------------------------------------
c Break A into two parts such that A = 2^23 * A1 + A2.
c---------------------------------------------------------------------
t1 = r23 * a
a1 = int (t1)
a2 = a - t23 * a1
c---------------------------------------------------------------------
c Generate N results. This loop is not vectorizable.
c---------------------------------------------------------------------
do i = 1, n
c---------------------------------------------------------------------
c Break X into two parts such that X = 2^23 * X1 + X2, compute
c Z = A1 * X2 + A2 * X1 (mod 2^23), and then
c X = 2^23 * Z + A2 * X2 (mod 2^46).
c---------------------------------------------------------------------
t1 = r23 * x
x1 = int (t1)
x2 = x - t23 * x1
t1 = a1 * x2 + a2 * x1
t2 = int (r23 * t1)
z = t1 - t23 * t2
t3 = t23 * z + a2 * x2
t4 = int (r46 * t3)
x = t3 - t46 * t4
y(i) = r46 * x
enddo
return
end

View File

@@ -0,0 +1,108 @@
c---------------------------------------------------------------------
c---------------------------------------------------------------------
subroutine timer_clear(n)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
implicit none
integer n
double precision start(64), elapsed(64)
common /tt/ start, elapsed
elapsed(n) = 0.0
return
end
c---------------------------------------------------------------------
c---------------------------------------------------------------------
subroutine timer_start(n)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
implicit none
external elapsed_time
double precision elapsed_time
integer n
double precision start(64), elapsed(64)
common /tt/ start, elapsed
start(n) = elapsed_time()
return
end
c---------------------------------------------------------------------
c---------------------------------------------------------------------
subroutine timer_stop(n)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
implicit none
external elapsed_time
double precision elapsed_time
integer n
double precision start(64), elapsed(64)
common /tt/ start, elapsed
double precision t, now
now = elapsed_time()
t = now - start(n)
elapsed(n) = elapsed(n) + t
return
end
c---------------------------------------------------------------------
c---------------------------------------------------------------------
double precision function timer_read(n)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
implicit none
integer n
double precision start(64), elapsed(64)
common /tt/ start, elapsed
timer_read = elapsed(n)
return
end
c---------------------------------------------------------------------
c---------------------------------------------------------------------
double precision function elapsed_time()
c---------------------------------------------------------------------
c---------------------------------------------------------------------
implicit none
double precision dvtime
c This function must measure wall clock time, not CPU time.
c Since there is no portable timer in Fortran (77)
c we call a routine compiled in C (though the C source may have
c to be tweaked).
! call wtime(t)
c The following is not ok for "official" results because it reports
c CPU time not wall clock time. It may be useful for developing/testing
c on timeshared Crays, though.
c call second(t)
elapsed_time = dvtime()
return
end

View File

@@ -0,0 +1,21 @@
SHELL=/bin/sh
BENCHMARK=ep
BENCHMARKU=EP
include ../config/make.def
include ../sys/make.common
SOURCES = ep.fdv
OBJS = ${SOURCES:.fdv=.o}
${PROGRAM}: config $(OBJS)
${FLINK} -o ${PROGRAM} ${OBJS}
%.o: %.fdv npbparams.h
${F77} ${FFLAGS} -c -o $@ $<
clean:
rm -f npbparams.h
rm -f *.o *~
rm -f *.cu *.cuf *.c *.f

View File

@@ -0,0 +1,565 @@
!-------------------------------------------------------------------------!
! !
! N A S P A R A L L E L B E N C H M A R K S 2.3 !
! !
! D V M V E R S I O N S !
! !
! E P !
! !
!-------------------------------------------------------------------------!
! !
! This benchmark is DVM version of the NPB EP code. !
! !
! Permission to use, copy, distribute and modify this software !
! for any purpose with or without fee is hereby granted. We !
! request, however, that all derived work reference the NAS !
! Parallel Benchmarks 2.3. This software is provided "as is" !
! without express or implied warranty. !
! !
! Information on NPB 2.3, including the technical report, the !
! original specifications, source code, results and information !
! on how to submit new results, is available at: !
! !
! http://www.nas.nasa.gov/NAS/NPB/ !
! !
! Send comments or suggestions to npb@nas.nasa.gov !
! Send bug reports to npb-bugs@nas.nasa.gov !
! !
! NAS Parallel Benchmarks Group !
! NASA Ames Research Center !
! Mail Stop: T27A-1 !
! Moffett Field, CA 94035-1000 !
! !
! E-mail: npb@nas.nasa.gov !
! Fax: (415) 604-3957 !
! !
!-------------------------------------------------------------------------!
c---------------------------------------------------------------------
c
c Author: P. O. Frederickson
c D. H. Bailey
c A. C. Woo
c---------------------------------------------------------------------
c---------------------------------------------------------------------
program epdv
c---------------------------------------------------------------------
C
c This is the serial version of the APP Benchmark 1,
c the "embarassingly parallel" benchmark.
c
c
c M is the Log_2 of the number of complex pairs of uniform (0, 1) random
c numbers. MK is the Log_2 of the size of each batch of uniform random
c numbers. MK can be set for convenience on a given system, since it does
c not affect the results.
implicit none
include 'npbparams.h'
cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
double precision y,r23,r46,t23,t46,a1,a2,z,ah
parameter (r23 = 0.5d0 ** 23, r46 = r23 ** 2, t23 = 2.d0 ** 23,
> t46 = t23 ** 2)
cccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccccc
double precision Mops, epsilon, a, s, t1, t2, t3, t4, x, x1,
> x2, q, sx, sy, tm, an, tt, gc, dum(3),
> timer_read
integer mk, mm, nn, nk, nq, np, ierr, node, no_nodes,
> i, ik, kk, l, k, nit, ierrcode, no_large_nodes,
> np_add, k_offset, j
logical verified, timers_enabled
parameter (timers_enabled = .false.)
external timer_read
double precision qq, t1h, t2h, y1, y2, xh
character*13 size
parameter (mk = 16, mm = m - mk, nn = 2 ** mm,
> nk = 2 ** mk, nq = 10, epsilon=1.d-8,
> a = 1220703125.d0, s = 271828183.d0)
c common/storage/ x(2*nk), q(0:nq-1), qq(10000)
common/storage/ x(2*nk), q(0:9), qq(10000)
!DVM$ TEMPLATE TEM(nn)
!DVM$ DISTRIBUTE TEM (BLOCK)
data dum /1.d0, 1.d0, 1.d0/
c Because the size of the problem is too large to store in a 32-bit
c integer for some classes, we put it into a string (for printing).
c Have to strip off the decimal point put in there by the floating
c point print statement (internal file)
write(*, 1000)
write(size, '(f12.0)' ) 2.d0**(m+1)
do j =13,1,-1
if (size(j:j) .eq. '.') size(j:j) = ' '
end do
write (*,1001) size
1000 format(//,' NAS Parallel Benchmarks 3.3 - DVMH version',
> ' - EP Benchmark', /)
1001 format(' Number of random numbers generated: ', a14)
1003 format(' Number of active processes: ', i12, /)
verified = .false.
c Compute the number of "batches" of random number pairs generated
c per processor. Adjust if the number of processors does not evenly
c divide the total number
np = nn
c Call the random number generator functions and initialize
c the x-array to reduce the effects of paging on the timings.
c Also, call all mathematical functions that are used. Make
c sure these initializations cannot be eliminated as dead code.
call vranlc(0, dum(1), dum(2), dum(3))
call randlc(dum(2), dum(3), dum(1))
do 5 i = 1, 2*nk
x(i) = -1.d99
5 continue
Mops = log(sqrt(abs(max(1.d0,1.d0))))
call timer_clear(1)
call timer_clear(2)
call timer_clear(3)
call timer_start(1)
!DVM$ INTERVAL 1
call vranlc(0, t1, a, x)
c Compute AN = A ^ (2 * NK) (mod 2^46).
t1 = a
do 100 i = 1, mk + 1
call randlc(t1, t1, t2)
100 continue
an = t1
tt = s
gc = 0.d0
sx = 0.d0
sy = 0.d0
do 110 i = 0, nq - 1
q(i) = 0.d0
110 continue
c Each instance of this loop may be performed independently. We compute
c the k offsets separately to take into account the fact that some nodes
c have more numbers to generate than others
k_offset = -1
!DVM$ region
!DVM$ PARALLEL (k) ON TEM(k),REDUCTION(SUM(q),SUM(sx),SUM(sy))
!DVM$*,private(xh,i,kk,ik,t1,t2,y1,y2,a1,a2,x1,x2
!DVM$*,l,t3,t4),cuda_block(256)
do k = 1, np
kk = k_offset + k
t1 = s
t2 = an
c Find starting seed t1 for this kk.
do i = 1, 100
ik = kk / 2
if (2 * ik .ne. kk) then
call randlc(t1, t2, t3)
endif
if (ik .eq. 0) exit
call randlc(t2, t2, t3)
kk = ik
enddo
xh = t1
a1 = int (r23 * a)
a2 = a - t23 * a1
do i = 1, nk
call randNext (xh, y1, a1, a2)
call randNext (xh, y2, a1, a2)
x1 = 2.d0 * y1 - 1.d0
x2 = 2.d0 * y2 - 1.d0
t1 = x1 * x1 + x2 * x2
if (t1 .le. 1.d0) then
t2 = sqrt(-2.d0 * log(t1) / t1)
t3 = (x1 * t2)
t4 = (x2 * t2)
l = max(abs(t3), abs(t4))
q(l) = q(l) + 1.d0
sx = sx + t3
sy = sy + t4
endif
enddo
enddo
!DVM$ end region
do 160 i = 0, nq - 1
gc = gc + q(i)
160 continue
!DVM$ END INTERVAL
call timer_stop(1)
tm = timer_read(1)
nit=0
if (m.eq.24) then
if((abs((sx- (-3.247834652034740D3))/sx).le.epsilon).and.
> (abs((sy- (-6.958407078382297D3))/sy).le.epsilon))
> verified = .TRUE.
elseif (m.eq.25) then
if ((abs((sx- (-2.863319731645753D+03))/sx).le.epsilon).and.
> (abs((sy- (-6.320053679109499D+03))/sy).le.epsilon))
> verified = .TRUE.
elseif (m.eq.28) then
if ((abs((sx- (-4.295875165629892D3))/sx).le.epsilon).and.
> (abs((sy- (-1.580732573678431D4))/sy).le.epsilon))
> verified = .TRUE.
elseif (m.eq.30) then
if ((abs((sx- (4.033815542441498D4))/sx).le.epsilon).and.
> (abs((sy- (-2.660669192809235D4))/sy).le.epsilon))
> verified = .true.
elseif (m.eq.32) then
if ((abs((sx- (4.764367927995374D+4))/sx).le.epsilon).and.
> (abs((sy- (-8.084072988043731D+4))/sy).le.epsilon))
> verified = .true.
elseif (m.eq.36) then
if ((abs((sx- (1.982481200946593D+5))/sx).le.epsilon).and.
> (abs((sy- (-1.020596636361769D+5))/sy).le.epsilon))
> verified = .true.
elseif (m.eq.40) then
if ((abs((sx- (-5.319717441530D+05))/sx).le.epsilon).and.
> (abs((sy- (-3.688834557731D+05))/sy).le.epsilon))
> verified = .true.
else
verified = .false.
endif
Mops = 2.d0**(m+1)/tm/1000000.d0
write (6,11) tm, m, gc, sx, sy, (i, q(i), i = 0, nq - 1)
11 format (' EP Benchmark Results:'//' CPU Time =',f10.4/' N = 2^',
> i5/' No. Gaussian Pairs =',f15.0/' Sums = ',1p,2d25.15/
> ' Counts:'/(i3,0p,f15.0))
call print_results('EP', class, m+1, 0, 0, nit,
> tm, Mops,
> 'Random numbers generated',
> verified, npbversion)
if (timers_enabled) then
print *, 'Total time: ', timer_read(1)
print *, 'Gaussian pairs: ', timer_read(2)
print *, 'Random numbers: ', timer_read(3)
endif
end
subroutine print_results(name, class, n1, n2, n3, niter,
> t, mops, optype, verified, npbversion)
implicit none
character*2 name
character*1 class
integer n1, n2, n3, niter, j
double precision t, mops
character optype*24, size*13
logical verified
character*(*) npbversion
write (*, 2) name
2 format(//, ' ', A2, ' Benchmark Completed.')
write (*, 3) Class
3 format(' Class = ', 12x, a12)
c If this is not a grid-based problem (EP, FT, CG), then
c we only print n1, which contains some measure of the
c problem size. In that case, n2 and n3 are both zero.
c Otherwise, we print the grid size n1xn2xn3
if ((n2 .eq. 0) .and. (n3 .eq. 0)) then
if (name(1:2) .eq. 'EP') then
write(size, '(f12.0)' ) 2.d0**n1
do j =13,1,-1
if (size(j:j) .eq. '.') size(j:j) = ' '
end do
write (*,42) size
42 format(' Size = ',12x, a14)
else
write (*,44) n1
44 format(' Size = ',12x, i12)
endif
else
write (*, 4) n1,n2,n3
4 format(' Size = ',12x, i3,'x',i3,'x',i3)
endif
write (*, 5) niter
5 format(' Iterations = ', 12x, i12)
write (*, 6) t
6 format(' Time in seconds = ',12x, f12.2)
write (*,9) mops
9 format(' Mop/s total = ',12x, f12.2)
write(*, 11) optype
11 format(' Operation type = ', a24)
if (verified) then
write(*,12) ' SUCCESSFUL'
else
write(*,12) 'UNSUCCESSFUL'
endif
12 format(' Verification = ', 12x, a)
write(*,13) npbversion
13 format(' Version = ', 12x, a12)
write (*,130)
130 format(//' Please send the results of this run to:'//
> ' NPB Development Team '/
> ' Internet: npb@nas.nasa.gov'/
> ' '/
> ' If email is not available, send this to:'//
> ' MS T27A-1'/
> ' NASA Ames Research Center'/
> ' Moffett Field, CA 94035-1000'//
> ' Fax: 415-604-3957'//)
return
end
c---------------------------------------------------------------------
c---------------------------------------------------------------------
pure subroutine randlc (x, a, ret)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
implicit none
intent(in)::a
intent(inout)::x
intent(out)::ret
double precision r23,r46,t23,t46,a,x,t1,t2,t3,t4,a1,a2,x1,x2,z
> ,ret
parameter (r23 = 0.5d0 ** 23, r46 = r23 ** 2, t23 = 2.d0 ** 23,
> t46 = t23 ** 2)
c---------------------------------------------------------------------
c Break A into two parts such that A = 2^23 * A1 + A2.
c---------------------------------------------------------------------
t1 = r23 * a
a1 = int (t1)
a2 = a - t23 * a1
c---------------------------------------------------------------------
c Break X into two parts such that X = 2^23 * X1 + X2, compute
c Z = A1 * X2 + A2 * X1 (mod 2^23), and then
c X = 2^23 * Z + A2 * X2 (mod 2^46).
c---------------------------------------------------------------------
t1 = r23 * x
x1 = int (t1)
x2 = x - t23 * x1
t1 = a1 * x2 + a2 * x1
t2 = int (r23 * t1)
z = t1 - t23 * t2
t3 = t23 * z + a2 * x2
t4 = int (r46 * t3)
x = t3 - t46 * t4
ret = r46 * x
return
end
c---------------------------------------------------------------------
c---------------------------------------------------------------------
pure subroutine randNext (x, ret, a1, a2)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
implicit none
intent(inout)::x
intent(in)::a1, a2
intent(out)::ret
double precision r23,r46,t23,t46,x,t1,t2,t3,t4,a1,a2,x1,x2,z
> ,ret
parameter (r23 = 0.5d0 ** 23, r46 = r23 ** 2, t23 = 2.d0 ** 23,
> t46 = t23 ** 2)
c---------------------------------------------------------------------
c Break X into two parts such that X = 2^23 * X1 + X2, compute
c Z = A1 * X2 + A2 * X1 (mod 2^23), and then
c X = 2^23 * Z + A2 * X2 (mod 2^46).
c---------------------------------------------------------------------
t1 = r23 * x
x1 = int (t1)
x2 = x - t23 * x1
t1 = a1 * x2 + a2 * x1
t2 = int (r23 * t1)
z = t1 - t23 * t2
t3 = t23 * z + a2 * x2
t4 = int (r46 * t3)
x = t3 - t46 * t4
ret = r46 * x
return
end
c---------------------------------------------------------------------
c---------------------------------------------------------------------
subroutine vranlc (n, x, a, y)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
implicit none
integer i,n
double precision y,r23,r46,t23,t46,a,x,t1,t2,t3,t4,a1,a2,x1,x2,z
dimension y(*)
parameter (r23 = 0.5d0 ** 23, r46 = r23 ** 2, t23 = 2.d0 ** 23,
> t46 = t23 ** 2)
c---------------------------------------------------------------------
c Break A into two parts such that A = 2^23 * A1 + A2.
c---------------------------------------------------------------------
t1 = r23 * a
a1 = int (t1)
a2 = a - t23 * a1
c---------------------------------------------------------------------
c Generate N results. This loop is not vectorizable.
c---------------------------------------------------------------------
do i = 1, n
c---------------------------------------------------------------------
c Break X into two parts such that X = 2^23 * X1 + X2, compute
c Z = A1 * X2 + A2 * X1 (mod 2^23), and then
c X = 2^23 * Z + A2 * X2 (mod 2^46).
c---------------------------------------------------------------------
t1 = r23 * x
x1 = int (t1)
x2 = x - t23 * x1
t1 = a1 * x2 + a2 * x1
t2 = int (r23 * t1)
z = t1 - t23 * t2
t3 = t23 * z + a2 * x2
t4 = int (r46 * t3)
x = t3 - t46 * t4
y(i) = r46 * x
enddo
return
end
c---------------------------------------------------------------------
c---------------------------------------------------------------------
subroutine timer_clear(n)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
implicit none
integer n
double precision start(64), elapsed(64)
common /tt/ start, elapsed
elapsed(n) = 0.0
return
end
c---------------------------------------------------------------------
c---------------------------------------------------------------------
subroutine timer_start(n)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
implicit none
external elapsed_time
double precision elapsed_time
integer n
double precision start(64), elapsed(64)
common /tt/ start, elapsed
!DVM$ BARRIER
start(n) = elapsed_time()
return
end
c---------------------------------------------------------------------
c---------------------------------------------------------------------
subroutine timer_stop(n)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
implicit none
external elapsed_time
double precision elapsed_time
integer n
double precision start(64), elapsed(64)
common /tt/ start, elapsed
double precision t, now
now = elapsed_time()
t = now - start(n)
elapsed(n) = elapsed(n) + t
return
end
c---------------------------------------------------------------------
c---------------------------------------------------------------------
double precision function timer_read(n)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
implicit none
integer n
double precision start(64), elapsed(64)
common /tt/ start, elapsed
timer_read = elapsed(n)
return
end
c---------------------------------------------------------------------
c---------------------------------------------------------------------
double precision function elapsed_time()
c---------------------------------------------------------------------
c---------------------------------------------------------------------
implicit none
double precision t
double precision dvtime
data t/0.d0/
elapsed_time = dvtime()
return
end

View File

@@ -0,0 +1,12 @@
@echo off
set CLASS=%1
set OPT=%2
CALL ..\sys\setparams EP %CLASS%
CALL %F77% %OPT% ep 1>out_%CLASS%.txt 2>err_%CLASS%.txt
if exist ep.exe (
copy ep.exe %BIN%\ep.%CLASS%.x.exe
del ep.exe
)

View File

@@ -0,0 +1,21 @@
SHELL=/bin/sh
BENCHMARK=ft
BENCHMARKU=FT
include ../config/make.def
include ../sys/make.common
SOURCES = ft.fdv
OBJS = ${SOURCES:.fdv=.o}
${PROGRAM}: config $(OBJS)
${FLINK} -o ${PROGRAM} ${OBJS}
%.o: %.fdv npbparams.h global.h
${F77} ${FFLAGS} -f90 -c -o $@ $<
clean:
rm -f npbparams.h
rm -f *.o *~
rm -f *.cu *.cuf *.c *.f

View File

@@ -0,0 +1,3 @@
integer dvm_debug
parameter (dvm_debug=0)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,80 @@
include 'npbparams.h'
! If processor array is 1x1 -> 0D grid decomposition
! Cache blocking params. These values are good for most
! RISC processors.
! FFT parameters:
! fftblock controls how many ffts are done at a time.
! The default is appropriate for most cache-based machines
! On vector machines, the FFT can be vectorized with vector
! length equal to the block size, so the block size should
! be as large as possible. This is the size of the smallest
! dimension of the problem: 128 for class A, 256 for class B and
! 512 for class C.
! we need a bunch of logic to keep track of how
! arrays are laid out.
! Note: this serial version is the derived from the parallel 0D case
! of the ft NPB.
! The computation proceeds logically as
! set up initial conditions
! fftx(1)
! transpose (1->2)
! ffty(2)
! transpose (2->3)
! fftz(3)
! time evolution
! fftz(3)
! transpose (3->2)
! ffty(2)
! transpose (2->1)
! fftx(1)
! compute residual(1)
! for the 0D, 1D, 2D strategies, the layouts look like xxx
!
! 0D 1D 2D
! 1: xyz xyz xyz
integer T_total, T_setup, T_fft, T_evolve, T_checksum, T_fftx, T_ffty, T_fftz, T_max
parameter (T_total = 1, T_setup = 2, T_fft = 3, T_evolve = 4, T_checksum = 5, T_max = 5)
logical timers_enabled
parameter (timers_enabled = .FALSE.)
logical more_memory
parameter (more_memory = .FALSE.)
external timer_read
double precision timer_read
external ilog2
integer ilog2
external randlc
double precision randlc
double precision seed, a, pi, alpha
parameter (seed = 314159265.d0, a = 1220703125.d0, pi = 3.141592653589793238d0, alpha=1.0d-6)
double complex u0(nxp,ny,nz), u1(nxp,ny,nz)
double precision twiddle(nxp,ny,nz)
double complex u(nxp)
double complex y1(maxdim),y2(maxdim)
common /arrays/ u,u0,u1,twiddle,y1,y2
!dvm$ distribute (*,*,*) :: twiddle
!dvm$ distribute (*,*,*) :: u0
!dvm$ distribute (*,*,*) :: u1
!dvm$ distribute (*) :: u

View File

@@ -0,0 +1,12 @@
@echo off
set CLASS=%1
set OPT=%2
CALL ..\sys\setparams FT %CLASS%
CALL %F77% %OPT% ft 1>out_%CLASS%.txt 2>err_%CLASS%.txt
if exist ft.exe (
copy ft.exe %BIN%\ft.%CLASS%.x.exe
del ft.exe
)

View File

@@ -0,0 +1,44 @@
SHELL=/bin/sh
BENCHMARK=lu
BENCHMARKU=LU
include ../config/make.def
include ../sys/make.common
OBJS = lu.o read_input.o \
domain.o setcoeff.o setbv.o exact.o setiv.o \
erhs.o ssor.o rhs.o l2norm.o error.o \
pintgr.o verify.o print_results.o timers.o
${PROGRAM}: config
${MAKE} exec
exec: $(OBJS)
${FLINK} ${FLINKFLAGS} -o ${PROGRAM} ${OBJS} ${F_LIB}
.f.o :
${F77} ${FFLAGS} -c -o $@ $<
lu.o: lu.f applu.incl npbparams.h
erhs.o: erhs.f applu.incl npbparams.h
error.o: error.f applu.incl npbparams.h
exact.o: exact.f applu.incl npbparams.h
l2norm.o: l2norm.f
pintgr.o: pintgr.f applu.incl npbparams.h
read_input.o: read_input.f applu.incl npbparams.h
rhs.o: rhs.f applu.incl npbparams.h
setbv.o: setbv.f applu.incl npbparams.h
setiv.o: setiv.f applu.incl npbparams.h
setcoeff.o: setcoeff.f applu.incl npbparams.h
ssor.o: ssor.f applu.incl npbparams.h
domain.o: domain.f applu.incl npbparams.h
verify.o: verify.f applu.incl npbparams.h
print_results.o: print_results.f
timers.o: timers.f
clean:
- /bin/rm -f npbparams.h
- /bin/rm -f *.o *DVMH* *~
- /bin/rm -f *.cu *.cuf

View File

@@ -0,0 +1,185 @@
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c--- applu.incl
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c npbparams.h defines parameters that depend on the class and
c number of nodes
c---------------------------------------------------------------------
include 'npbparams.h'
c---------------------------------------------------------------------
c parameters which can be overridden in runtime config file
c isiz1,isiz2,isiz3 give the maximum size
c ipr = 1 to print out verbose information
c omega = 2.0 is correct for all classes
c tolrsd is tolerance levels for steady state residuals
c---------------------------------------------------------------------
integer ipr_default,iS,jS,kS
parameter (ipr_default = 1)
double precision omega_default
parameter (omega_default = 1.2d0)
double precision tolrsd1_def, tolrsd2_def, tolrsd3_def,
> tolrsd4_def, tolrsd5_def
parameter (tolrsd1_def=1.0e-08,
> tolrsd2_def=1.0e-08, tolrsd3_def=1.0e-08,
> tolrsd4_def=1.0e-08, tolrsd5_def=1.0e-08)
double precision c1, c2, c3, c4, c5
parameter( c1 = 1.40d+00, c2 = 0.40d+00,
> c3 = 1.00d-01, c4 = 1.00d+00,
> c5 = 1.40d+00,
> iS =isiz1/2*2+1,jS= isiz2/2*2+1, kS=isiz3 )
c---------------------------------------------------------------------
c grid
c---------------------------------------------------------------------
integer nx, ny, nz
integer nx0, ny0, nz0
integer ist, iend
integer jst, jend
integer ii1, ii2
integer ji1, ji2
integer ki1, ki2
double precision dxi, deta, dzeta
double precision tx1, tx2, tx3
double precision ty1, ty2, ty3
double precision tz1, tz2, tz3
common/cgcon/ dxi, deta, dzeta,
> tx1, tx2, tx3,
> ty1, ty2, ty3,
> tz1, tz2, tz3,
> nx, ny, nz,
> nx0, ny0, nz0,
> ist, iend,
> jst, jend,
> ii1, ii2,
> ji1, ji2,
> ki1, ki2
c---------------------------------------------------------------------
c dissipation
c---------------------------------------------------------------------
double precision dx1, dx2, dx3, dx4, dx5
double precision dy1, dy2, dy3, dy4, dy5
double precision dz1, dz2, dz3, dz4, dz5
double precision dssp
common/disp/ dx1,dx2,dx3,dx4,dx5,
> dy1,dy2,dy3,dy4,dy5,
> dz1,dz2,dz3,dz4,dz5,
> dssp
c---------------------------------------------------------------------
c field variables and residuals
c to improve cache performance, second two dimensions padded by 1
c for even number sizes only.
c note: corresponding array (called "v") in routines blts, buts,
c and l2norm are similarly padded
c---------------------------------------------------------------------
!DVM$ ALIGN frct(iEX1,iEX2,iEX3,iEX4) WITH dvmh_temp0(iEX1,iEX2,iEX3,iE
!DVM$&X4)
!DVM$ ALIGN qs(iEX1,iEX2,iEX3) WITH dvmh_temp0(*,iEX1,iEX2,iEX3)
!DVM$ ALIGN rho_i(iEX1,iEX2,iEX3) WITH dvmh_temp0(*,iEX1,iEX2,iEX3)
!DVM$ ALIGN rsd(iEX1,iEX2,iEX3,iEX4) WITH dvmh_temp0(iEX1,iEX2,iEX3,iEX
!DVM$&4)
!DVM$ ALIGN u(iEX1,iEX2,iEX3,iEX4) WITH dvmh_temp0(iEX1,iEX2,iEX3,iEX4)
!DVM$ DYNAMIC u,rsd,frct,qs,rho_i
!DVM$ SHADOW qs(1:1,1:1,1:1)
!DVM$ SHADOW rho_i(1:1,1:1,1:1)
!DVM$ SHADOW rsd(0:0,2:2,2:2,2:2)
!DVM$ SHADOW frct(0:0,2:2,2:2,2:2)
!DVM$ SHADOW u(0:0,2:2,2:2,2:2)
!DVM$ TEMPLATE, COMMON :: dvmh_temp0(1:6,0:iS+1,0:jS+1,0:kS+1)
!DVM$ TEMPLATE, COMMON :: dvmh_temp0_r1(1:6,0:iS+1,0:jS+1,0:kS+1)
!DVM$ TEMPLATE, COMMON :: dvmh_temp0_r2(1:6,0:iS+1,0:jS+1,0:kS+1)
!DVM$ TEMPLATE, COMMON :: dvmh_temp0_r3(1:6,0:iS+1,0:jS+1,0:kS+1)
!DVM$ TEMPLATE, COMMON :: dvmh_temp0_r0(1:6,0:iS+1,0:jS+1,0:kS+1)
!DVM$ DISTRIBUTE dvmh_temp0(*,BLOCK,BLOCK,BLOCK)
!DVM$ DISTRIBUTE dvmh_temp0_r1(*,BLOCK,BLOCK,*)
!DVM$ DISTRIBUTE dvmh_temp0_r2(*,BLOCK,*,BLOCK)
!DVM$ DISTRIBUTE dvmh_temp0_r3(*,*,BLOCK,BLOCK)
!DVM$ DISTRIBUTE dvmh_temp0_r0(*,*,*,BLOCK)
!DVM$ DYNAMIC dvmh_temp0, dvmh_temp0_r1, dvmh_temp0_r2, dvmh_temp0_r3,
!DVM$&dvmh_temp0_r0
double precision u(5,isiz1/2*2+1,
> isiz2/2*2+1,
> isiz3),
> rsd(5,isiz1/2*2+1,
> isiz2/2*2+1,
> isiz3),
> frct(5,isiz1/2*2+1,
> isiz2/2*2+1,
> isiz3),
> flux(5,isiz1),
> qs(isiz1,isiz2,isiz3),
> rho_i(isiz1/2*2+1,isiz2/2*2+1,isiz3)
common/cvar/ u, rsd, frct, flux,
> qs, rho_i
c---------------------------------------------------------------------
c output control parameters
c---------------------------------------------------------------------
integer ipr, inorm
common/cprcon/ ipr, inorm
c---------------------------------------------------------------------
c newton-raphson iteration control parameters
c---------------------------------------------------------------------
integer itmax, invert
double precision dt, omega, tolrsd(5),
> rsdnm(5), errnm(5), frc, ttotal
common/ctscon/ dt, omega, tolrsd,
> rsdnm, errnm, frc, ttotal,
> itmax, invert
double precision a(5,5,isiz1/2*2+1,isiz2),
> b(5,5,isiz1/2*2+1,isiz2),
> c(5,5,isiz1/2*2+1,isiz2),
> d(5,5,isiz1/2*2+1,isiz2)
common/cjac/ a, b, c, d
c---------------------------------------------------------------------
c coefficients of the exact solution
c---------------------------------------------------------------------
double precision ce(5,13)
common/cexact/ ce
c---------------------------------------------------------------------
c timers
c---------------------------------------------------------------------
integer t_rhsx,t_rhsy,t_rhsz,t_rhs,t_jacld,t_blts,
> t_jacu,t_buts,t_add,t_l2norm,t_last,t_total
parameter (t_total = 1)
parameter (t_rhsx = 2)
parameter (t_rhsy = 3)
parameter (t_rhsz = 4)
parameter (t_rhs = 5)
parameter (t_jacld = 6)
parameter (t_blts = 7)
parameter (t_jacu = 8)
parameter (t_buts = 9)
parameter (t_add = 10)
parameter (t_l2norm = 11)
parameter (t_last = 11)
logical timeron
double precision maxtime
common/timer/maxtime,timeron
c---------------------------------------------------------------------
c end of include file
c---------------------------------------------------------------------

View File

@@ -0,0 +1,79 @@
! *** generated by SAPFOR with version 1124 and build date: Apr 18 2019 20:56:38
!---------------------------------------------------------------------
!---------------------------------------------------------------------
subroutine domain ()
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! local variables
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!--- applu.incl
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! npbparams.h defines parameters that depend on the class and
! number of nodes
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! end of include file
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!--- applu.incl
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! npbparams.h defines parameters that depend on the class and
! number of nodes
!---------------------------------------------------------------------
implicit none
!---------------------------------------------------------------------
! end of include file
include 'applu.incl'
!---------------------------------------------------------------------
nx = nx0
ny = ny0
nz = nz0
!---------------------------------------------------------------------
! check the sub-domain size
!---------------------------------------------------------------------
if (nx .lt. 4 .or. ny .lt. 4 .or. nz .lt. 4) then
write (unit = *,fmt = 2001) nx,ny,nz
2001 format (5x,'SUBDOMAIN SIZE IS TOO SMALL - ', /5x,'
&ADJUST PROBLEM SIZE OR NUMBER OF PROCESSORS', /5x,'SO THAT
&NX, NY AND NZ ARE GREATER THAN OR EQUAL', /5x,'TO 4 THEY AR
&E CURRENTLY', 3I3)
stop
endif
if (nx .gt. isiz1 .or. ny .gt. isiz2 .or. nz .gt. isiz3) then
write (unit = *,fmt = 2002) nx,ny,nz
2002 format (5x,'SUBDOMAIN SIZE IS TOO LARGE - ', /5x,'
&ADJUST PROBLEM SIZE OR NUMBER OF PROCESSORS', /5x,'SO THAT
&NX, NY AND NZ ARE LESS THAN OR EQUAL TO ', /5x,'ISIZ1, ISIZ
&2 AND ISIZ3 RESPECTIVELY. THEY ARE', /5x,'CURRENTLY', 3I4)
stop
endif
!---------------------------------------------------------------------
! set up the start and end in i and j extents for all processors
!---------------------------------------------------------------------
ist = 2
iend = nx - 1
jst = 2
jend = ny - 1
ii1 = 2
ii2 = nx0 - 1
ji1 = 2
ji2 = ny0 - 2
ki1 = 3
ki2 = nz0 - 1
return
end

View File

@@ -0,0 +1,369 @@
! *** generated by SAPFOR with version 1124 and build date: Apr 18 2019 20:56:38
!---------------------------------------------------------------------
!---------------------------------------------------------------------
subroutine erhs ()
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!
! compute the right hand side based on exact solution
!
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! local variables
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!--- applu.incl
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! npbparams.h defines parameters that depend on the class and
! number of nodes
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! end of include file
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!--- applu.incl
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! npbparams.h defines parameters that depend on the class and
! number of nodes
!---------------------------------------------------------------------
implicit none
!---------------------------------------------------------------------
! end of include file
include 'applu.incl'
!---------------------------------------------------------------------
integer i,j,k,m
double precision xi,eta,zeta
double precision q
double precision u21,u31,u41
double precision tmp
double precision u21i,u31i,u41i,u51i
double precision u21j,u31j,u41j,u51j
double precision u21k,u31k,u41k,u51k
double precision u21im1,u31im1,u41im1,u51im1
double precision u21jm1,u31jm1,u41jm1,u51jm1
double precision u21km1,u31km1,u41km1,u51km1
!DVM$ PARALLEL (k,j,i,m) ON frct(m,i,j,k), PRIVATE (m,i,j,k)
do k = 1,nz
do j = 1,ny
do i = 1,nx
do m = 1,5
frct(m,i,j,k) = 0.0d+00
enddo
enddo
enddo
enddo
!DVM$ PARALLEL (k,j,i) ON rsd(*,i,j,k), PRIVATE (m,i,j,k,xi,zeta,eta)
do k = 1,nz
do j = 1,ny
do i = 1,nx
zeta = dble (k - 1) / (nz - 1)
eta = dble (j - 1) / (ny0 - 1)
xi = dble (i - 1) / (nx0 - 1)
do m = 1,5
rsd(m,i,j,k) = ce(m,1) + (ce(m,2) + (ce(m,5) + (ce(m,8
&) + ce(m,11) * xi) * xi) * xi) * xi + (ce(m,3) + (ce(m,6) + (ce(m,
&9) + ce(m,12) * eta) * eta) * eta) * eta + (ce(m,4) + (ce(m,7) + (
&ce(m,10) + ce(m,13) * zeta) * zeta) * zeta) * zeta
enddo
enddo
enddo
enddo
!---------------------------------------------------------------------
! xi-direction flux differences
!---------------------------------------------------------------------
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0_r3(iEX0,iEX1,iEX2,iE
!DVM$&X3) :: rsd
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0_r3(iEX0,iEX1,iEX2,iE
!DVM$&X3) :: frct
!DVM$ PARALLEL (k,j) ON frct(*,*,j,k), PRIVATE (m,i,j,q,tmp,k,flux,u31i,
!DVM$&u41i,u51i,u21i,u21,u31im1,u41im1,u21im1,u51im1)
do k = 2,nz - 1
do j = jst,jend
do i = 1,nx
flux(1,i) = rsd(2,i,j,k)
u21 = rsd(2,i,j,k) / rsd(1,i,j,k)
q = 0.50d+00 * (rsd(2,i,j,k) * rsd(2,i,j,k) + rsd(3,i,j,k
&) * rsd(3,i,j,k) + rsd(4,i,j,k) * rsd(4,i,j,k)) / rsd(1,i,j,k)
flux(2,i) = rsd(2,i,j,k) * u21 + c2 * (rsd(5,i,j,k) - q)
flux(3,i) = rsd(3,i,j,k) * u21
flux(4,i) = rsd(4,i,j,k) * u21
flux(5,i) = (c1 * rsd(5,i,j,k) - c2 * q) * u21
enddo
do i = ist,iend
do m = 1,5
frct(m,i,j,k) = frct(m,i,j,k) - tx2 * (flux(m,i + 1) -
& flux(m,i - 1))
enddo
enddo
do i = ist,nx
tmp = 1.0d+00 / rsd(1,i,j,k)
u21i = tmp * rsd(2,i,j,k)
u31i = tmp * rsd(3,i,j,k)
u41i = tmp * rsd(4,i,j,k)
u51i = tmp * rsd(5,i,j,k)
tmp = 1.0d+00 / rsd(1,i - 1,j,k)
u21im1 = tmp * rsd(2,i - 1,j,k)
u31im1 = tmp * rsd(3,i - 1,j,k)
u41im1 = tmp * rsd(4,i - 1,j,k)
u51im1 = tmp * rsd(5,i - 1,j,k)
flux(2,i) = 4.0d+00 / 3.0d+00 * tx3 * (u21i - u21im1)
flux(3,i) = tx3 * (u31i - u31im1)
flux(4,i) = tx3 * (u41i - u41im1)
flux(5,i) = 0.50d+00 * (1.0d+00 - c1 * c5) * tx3 * (u21i*
&* 2 + u31i** 2 + u41i** 2 - (u21im1** 2 + u31im1** 2 + u41im1** 2)
&) + 1.0d+00 / 6.0d+00 * tx3 * (u21i** 2 - u21im1** 2) + c1 * c5 *
&tx3 * (u51i - u51im1)
enddo
do i = ist,iend
frct(1,i,j,k) = frct(1,i,j,k) + dx1 * tx1 * (rsd(1,i - 1,
&j,k) - 2.0d+00 * rsd(1,i,j,k) + rsd(1,i + 1,j,k))
frct(2,i,j,k) = frct(2,i,j,k) + tx3 * c3 * c4 * (flux(2,i
& + 1) - flux(2,i)) + dx2 * tx1 * (rsd(2,i - 1,j,k) - 2.0d+00 * rsd
&(2,i,j,k) + rsd(2,i + 1,j,k))
frct(3,i,j,k) = frct(3,i,j,k) + tx3 * c3 * c4 * (flux(3,i
& + 1) - flux(3,i)) + dx3 * tx1 * (rsd(3,i - 1,j,k) - 2.0d+00 * rsd
&(3,i,j,k) + rsd(3,i + 1,j,k))
frct(4,i,j,k) = frct(4,i,j,k) + tx3 * c3 * c4 * (flux(4,i
& + 1) - flux(4,i)) + dx4 * tx1 * (rsd(4,i - 1,j,k) - 2.0d+00 * rsd
&(4,i,j,k) + rsd(4,i + 1,j,k))
frct(5,i,j,k) = frct(5,i,j,k) + tx3 * c3 * c4 * (flux(5,i
& + 1) - flux(5,i)) + dx5 * tx1 * (rsd(5,i - 1,j,k) - 2.0d+00 * rsd
&(5,i,j,k) + rsd(5,i + 1,j,k))
enddo
!---------------------------------------------------------------------
! Fourth-order dissipation
!---------------------------------------------------------------------
do m = 1,5
frct(m,2,j,k) = frct(m,2,j,k) - dssp * ((+(5.0d+00)) * rs
&d(m,2,j,k) - 4.0d+00 * rsd(m,3,j,k) + rsd(m,4,j,k))
frct(m,3,j,k) = frct(m,3,j,k) - dssp * ((-(4.0d+00)) * rs
&d(m,2,j,k) + 6.0d+00 * rsd(m,3,j,k) - 4.0d+00 * rsd(m,4,j,k) + rsd
&(m,5,j,k))
enddo
do i = 4,nx - 3
do m = 1,5
frct(m,i,j,k) = frct(m,i,j,k) - dssp * (rsd(m,i - 2,j,
&k) - 4.0d+00 * rsd(m,i - 1,j,k) + 6.0d+00 * rsd(m,i,j,k) - 4.0d+00
& * rsd(m,i + 1,j,k) + rsd(m,i + 2,j,k))
enddo
enddo
do m = 1,5
frct(m,nx - 2,j,k) = frct(m,nx - 2,j,k) - dssp * (rsd(m,n
&x - 4,j,k) - 4.0d+00 * rsd(m,nx - 3,j,k) + 6.0d+00 * rsd(m,nx - 2,
&j,k) - 4.0d+00 * rsd(m,nx - 1,j,k))
frct(m,nx - 1,j,k) = frct(m,nx - 1,j,k) - dssp * (rsd(m,n
&x - 3,j,k) - 4.0d+00 * rsd(m,nx - 2,j,k) + 5.0d+00 * rsd(m,nx - 1,
&j,k))
enddo
enddo
enddo
!---------------------------------------------------------------------
! eta-direction flux differences
!---------------------------------------------------------------------
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0(iEX0,iEX1,iEX2,iEX3)
!DVM$& :: rsd
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0(iEX0,iEX1,iEX2,iEX3)
!DVM$& :: frct
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0_r2(iEX0,iEX1,iEX2,iE
!DVM$&X3) :: rsd
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0_r2(iEX0,iEX1,iEX2,iE
!DVM$&X3) :: frct
!DVM$ PARALLEL (k,i) ON frct(*,i,*,k), PRIVATE (m,i,u31,j,q,tmp,u31j,u41
!DVM$&j,u41jm1,u51jm1,u21j,u31jm1,k,u21jm1,u51j,flux)
do k = 2,nz - 1
do i = ist,iend
do j = 1,ny
flux(1,j) = rsd(3,i,j,k)
u31 = rsd(3,i,j,k) / rsd(1,i,j,k)
q = 0.50d+00 * (rsd(2,i,j,k) * rsd(2,i,j,k) + rsd(3,i,j,k
&) * rsd(3,i,j,k) + rsd(4,i,j,k) * rsd(4,i,j,k)) / rsd(1,i,j,k)
flux(2,j) = rsd(2,i,j,k) * u31
flux(3,j) = rsd(3,i,j,k) * u31 + c2 * (rsd(5,i,j,k) - q)
flux(4,j) = rsd(4,i,j,k) * u31
flux(5,j) = (c1 * rsd(5,i,j,k) - c2 * q) * u31
enddo
do j = jst,jend
do m = 1,5
frct(m,i,j,k) = frct(m,i,j,k) - ty2 * (flux(m,j + 1) -
& flux(m,j - 1))
enddo
enddo
do j = jst,ny
tmp = 1.0d+00 / rsd(1,i,j,k)
u21j = tmp * rsd(2,i,j,k)
u31j = tmp * rsd(3,i,j,k)
u41j = tmp * rsd(4,i,j,k)
u51j = tmp * rsd(5,i,j,k)
tmp = 1.0d+00 / rsd(1,i,j - 1,k)
u21jm1 = tmp * rsd(2,i,j - 1,k)
u31jm1 = tmp * rsd(3,i,j - 1,k)
u41jm1 = tmp * rsd(4,i,j - 1,k)
u51jm1 = tmp * rsd(5,i,j - 1,k)
flux(2,j) = ty3 * (u21j - u21jm1)
flux(3,j) = 4.0d+00 / 3.0d+00 * ty3 * (u31j - u31jm1)
flux(4,j) = ty3 * (u41j - u41jm1)
flux(5,j) = 0.50d+00 * (1.0d+00 - c1 * c5) * ty3 * (u21j*
&* 2 + u31j** 2 + u41j** 2 - (u21jm1** 2 + u31jm1** 2 + u41jm1** 2)
&) + 1.0d+00 / 6.0d+00 * ty3 * (u31j** 2 - u31jm1** 2) + c1 * c5 *
&ty3 * (u51j - u51jm1)
enddo
do j = jst,jend
frct(1,i,j,k) = frct(1,i,j,k) + dy1 * ty1 * (rsd(1,i,j -
&1,k) - 2.0d+00 * rsd(1,i,j,k) + rsd(1,i,j + 1,k))
frct(2,i,j,k) = frct(2,i,j,k) + ty3 * c3 * c4 * (flux(2,j
& + 1) - flux(2,j)) + dy2 * ty1 * (rsd(2,i,j - 1,k) - 2.0d+00 * rsd
&(2,i,j,k) + rsd(2,i,j + 1,k))
frct(3,i,j,k) = frct(3,i,j,k) + ty3 * c3 * c4 * (flux(3,j
& + 1) - flux(3,j)) + dy3 * ty1 * (rsd(3,i,j - 1,k) - 2.0d+00 * rsd
&(3,i,j,k) + rsd(3,i,j + 1,k))
frct(4,i,j,k) = frct(4,i,j,k) + ty3 * c3 * c4 * (flux(4,j
& + 1) - flux(4,j)) + dy4 * ty1 * (rsd(4,i,j - 1,k) - 2.0d+00 * rsd
&(4,i,j,k) + rsd(4,i,j + 1,k))
frct(5,i,j,k) = frct(5,i,j,k) + ty3 * c3 * c4 * (flux(5,j
& + 1) - flux(5,j)) + dy5 * ty1 * (rsd(5,i,j - 1,k) - 2.0d+00 * rsd
&(5,i,j,k) + rsd(5,i,j + 1,k))
enddo
!---------------------------------------------------------------------
! fourth-order dissipation
!---------------------------------------------------------------------
do m = 1,5
frct(m,i,2,k) = frct(m,i,2,k) - dssp * ((+(5.0d+00)) * rs
&d(m,i,2,k) - 4.0d+00 * rsd(m,i,3,k) + rsd(m,i,4,k))
frct(m,i,3,k) = frct(m,i,3,k) - dssp * ((-(4.0d+00)) * rs
&d(m,i,2,k) + 6.0d+00 * rsd(m,i,3,k) - 4.0d+00 * rsd(m,i,4,k) + rsd
&(m,i,5,k))
enddo
do j = 4,ny - 3
do m = 1,5
frct(m,i,j,k) = frct(m,i,j,k) - dssp * (rsd(m,i,j - 2,
&k) - 4.0d+00 * rsd(m,i,j - 1,k) + 6.0d+00 * rsd(m,i,j,k) - 4.0d+00
& * rsd(m,i,j + 1,k) + rsd(m,i,j + 2,k))
enddo
enddo
do m = 1,5
frct(m,i,ny - 2,k) = frct(m,i,ny - 2,k) - dssp * (rsd(m,i
&,ny - 4,k) - 4.0d+00 * rsd(m,i,ny - 3,k) + 6.0d+00 * rsd(m,i,ny -
&2,k) - 4.0d+00 * rsd(m,i,ny - 1,k))
frct(m,i,ny - 1,k) = frct(m,i,ny - 1,k) - dssp * (rsd(m,i
&,ny - 3,k) - 4.0d+00 * rsd(m,i,ny - 2,k) + 5.0d+00 * rsd(m,i,ny -
&1,k))
enddo
enddo
enddo
!---------------------------------------------------------------------
! zeta-direction flux differences
!---------------------------------------------------------------------
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0(iEX0,iEX1,iEX2,iEX3)
!DVM$& :: rsd
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0(iEX0,iEX1,iEX2,iEX3)
!DVM$& :: frct
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0_r1(iEX0,iEX1,iEX2,iE
!DVM$&X3) :: rsd
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0_r1(iEX0,iEX1,iEX2,iE
!DVM$&X3) :: frct
!DVM$ PARALLEL (j,i) ON frct(*,i,j,*), PRIVATE (m,i,j,q,tmp,u41,k,u51k,u
!DVM$&31km1,u21k,u21km1,u41k,u31k,u51km1,u41km1,flux)
do j = jst,jend
do i = ist,iend
do k = 1,nz
flux(1,k) = rsd(4,i,j,k)
u41 = rsd(4,i,j,k) / rsd(1,i,j,k)
q = 0.50d+00 * (rsd(2,i,j,k) * rsd(2,i,j,k) + rsd(3,i,j,k
&) * rsd(3,i,j,k) + rsd(4,i,j,k) * rsd(4,i,j,k)) / rsd(1,i,j,k)
flux(2,k) = rsd(2,i,j,k) * u41
flux(3,k) = rsd(3,i,j,k) * u41
flux(4,k) = rsd(4,i,j,k) * u41 + c2 * (rsd(5,i,j,k) - q)
flux(5,k) = (c1 * rsd(5,i,j,k) - c2 * q) * u41
enddo
do k = 2,nz - 1
do m = 1,5
frct(m,i,j,k) = frct(m,i,j,k) - tz2 * (flux(m,k + 1) -
& flux(m,k - 1))
enddo
enddo
do k = 2,nz
tmp = 1.0d+00 / rsd(1,i,j,k)
u21k = tmp * rsd(2,i,j,k)
u31k = tmp * rsd(3,i,j,k)
u41k = tmp * rsd(4,i,j,k)
u51k = tmp * rsd(5,i,j,k)
tmp = 1.0d+00 / rsd(1,i,j,k - 1)
u21km1 = tmp * rsd(2,i,j,k - 1)
u31km1 = tmp * rsd(3,i,j,k - 1)
u41km1 = tmp * rsd(4,i,j,k - 1)
u51km1 = tmp * rsd(5,i,j,k - 1)
flux(2,k) = tz3 * (u21k - u21km1)
flux(3,k) = tz3 * (u31k - u31km1)
flux(4,k) = 4.0d+00 / 3.0d+00 * tz3 * (u41k - u41km1)
flux(5,k) = 0.50d+00 * (1.0d+00 - c1 * c5) * tz3 * (u21k*
&* 2 + u31k** 2 + u41k** 2 - (u21km1** 2 + u31km1** 2 + u41km1** 2)
&) + 1.0d+00 / 6.0d+00 * tz3 * (u41k** 2 - u41km1** 2) + c1 * c5 *
&tz3 * (u51k - u51km1)
enddo
do k = 2,nz - 1
frct(1,i,j,k) = frct(1,i,j,k) + dz1 * tz1 * (rsd(1,i,j,k
&+ 1) - 2.0d+00 * rsd(1,i,j,k) + rsd(1,i,j,k - 1))
frct(2,i,j,k) = frct(2,i,j,k) + tz3 * c3 * c4 * (flux(2,k
& + 1) - flux(2,k)) + dz2 * tz1 * (rsd(2,i,j,k + 1) - 2.0d+00 * rsd
&(2,i,j,k) + rsd(2,i,j,k - 1))
frct(3,i,j,k) = frct(3,i,j,k) + tz3 * c3 * c4 * (flux(3,k
& + 1) - flux(3,k)) + dz3 * tz1 * (rsd(3,i,j,k + 1) - 2.0d+00 * rsd
&(3,i,j,k) + rsd(3,i,j,k - 1))
frct(4,i,j,k) = frct(4,i,j,k) + tz3 * c3 * c4 * (flux(4,k
& + 1) - flux(4,k)) + dz4 * tz1 * (rsd(4,i,j,k + 1) - 2.0d+00 * rsd
&(4,i,j,k) + rsd(4,i,j,k - 1))
frct(5,i,j,k) = frct(5,i,j,k) + tz3 * c3 * c4 * (flux(5,k
& + 1) - flux(5,k)) + dz5 * tz1 * (rsd(5,i,j,k + 1) - 2.0d+00 * rsd
&(5,i,j,k) + rsd(5,i,j,k - 1))
enddo
!---------------------------------------------------------------------
! fourth-order dissipation
!---------------------------------------------------------------------
do m = 1,5
frct(m,i,j,2) = frct(m,i,j,2) - dssp * ((+(5.0d+00)) * rs
&d(m,i,j,2) - 4.0d+00 * rsd(m,i,j,3) + rsd(m,i,j,4))
frct(m,i,j,3) = frct(m,i,j,3) - dssp * ((-(4.0d+00)) * rs
&d(m,i,j,2) + 6.0d+00 * rsd(m,i,j,3) - 4.0d+00 * rsd(m,i,j,4) + rsd
&(m,i,j,5))
enddo
do k = 4,nz - 3
do m = 1,5
frct(m,i,j,k) = frct(m,i,j,k) - dssp * (rsd(m,i,j,k -
&2) - 4.0d+00 * rsd(m,i,j,k - 1) + 6.0d+00 * rsd(m,i,j,k) - 4.0d+00
& * rsd(m,i,j,k + 1) + rsd(m,i,j,k + 2))
enddo
enddo
do m = 1,5
frct(m,i,j,nz - 2) = frct(m,i,j,nz - 2) - dssp * (rsd(m,i
&,j,nz - 4) - 4.0d+00 * rsd(m,i,j,nz - 3) + 6.0d+00 * rsd(m,i,j,nz
&- 2) - 4.0d+00 * rsd(m,i,j,nz - 1))
frct(m,i,j,nz - 1) = frct(m,i,j,nz - 1) - dssp * (rsd(m,i
&,j,nz - 3) - 4.0d+00 * rsd(m,i,j,nz - 2) + 5.0d+00 * rsd(m,i,j,nz
&- 1))
enddo
enddo
enddo
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0(iEX0,iEX1,iEX2,iEX3)
!DVM$& :: rsd
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0(iEX0,iEX1,iEX2,iEX3)
!DVM$& :: frct
return
end

View File

@@ -0,0 +1,77 @@
! *** generated by SAPFOR with version 1124 and build date: Apr 18 2019 20:56:38
!---------------------------------------------------------------------
!---------------------------------------------------------------------
subroutine error ()
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!
! compute the solution error
!
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! local variables
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!--- applu.incl
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! npbparams.h defines parameters that depend on the class and
! number of nodes
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! end of include file
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!--- applu.incl
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! npbparams.h defines parameters that depend on the class and
! number of nodes
!---------------------------------------------------------------------
implicit none
!---------------------------------------------------------------------
! end of include file
include 'applu.incl'
!---------------------------------------------------------------------
integer i,j,k,m
double precision tmp
double precision u000ijk(5)
do m = 1,5
errnm(m) = 0.0d+00
enddo
!DVM$ PARALLEL (k,j,i) ON u(*,i,j,k), PRIVATE (tmp,m,k,u000ijk,i,j),REDU
!DVM$&CTION (sum (errnm))
do k = 2,nz - 1
do j = jst,jend
do i = ist,iend
call exact(i,j,k,u000ijk)
do m = 1,5
tmp = u000ijk(m) - u(m,i,j,k)
errnm(m) = errnm(m) + tmp** 2
enddo
enddo
enddo
enddo
do m = 1,5
errnm(m) = sqrt (errnm(m) / ((nx0 - 2) * (ny0 - 2) * (nz0 - 2))
&)
enddo
! write (*,1002) ( errnm(m), m = 1, 5 )
1002 format (1x/1x,'RMS-norm of error in soln. to ', 'first pde = ',1p
&e12.5/, 1x,'RMS-norm of error in soln. to ', 'second pde = ',1pe12
&.5/, 1x,'RMS-norm of error in soln. to ', 'third pde = ',1pe12.5/
&, 1x,'RMS-norm of error in soln. to ', 'fourth pde = ',1pe12.5/, 1
&x,'RMS-norm of error in soln. to ', 'fifth pde = ',1pe12.5)
return
end

View File

@@ -0,0 +1,64 @@
! *** generated by SAPFOR with version 1124 and build date: Apr 18 2019 20:56:38
!---------------------------------------------------------------------
!---------------------------------------------------------------------
subroutine exact (i, j, k, u000ijk)
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!
! compute the exact solution at (i,j,k)
!
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! input parameters
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!--- applu.incl
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! npbparams.h defines parameters that depend on the class and
! number of nodes
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! end of include file
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!--- applu.incl
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! npbparams.h defines parameters that depend on the class and
! number of nodes
!---------------------------------------------------------------------
implicit none
!---------------------------------------------------------------------
! end of include file
include 'applu.incl'
!---------------------------------------------------------------------
integer i,j,k
double precision u000ijk(*)
!---------------------------------------------------------------------
! local variables
!---------------------------------------------------------------------
integer m
double precision xi,eta,zeta
xi = dble (i - 1) / (nx0 - 1)
eta = dble (j - 1) / (ny0 - 1)
zeta = dble (k - 1) / (nz - 1)
do m = 1,5
u000ijk(m) = ce(m,1) + (ce(m,2) + (ce(m,5) + (ce(m,8) + ce(m,11
&) * xi) * xi) * xi) * xi + (ce(m,3) + (ce(m,6) + (ce(m,9) + ce(m,1
&2) * eta) * eta) * eta) * eta + (ce(m,4) + (ce(m,7) + (ce(m,10) +
&ce(m,13) * zeta) * zeta) * zeta) * zeta
enddo
return
end

View File

@@ -0,0 +1,69 @@
! *** generated by SAPFOR with version 1124 and build date: Apr 18 2019 20:56:38
!---------------------------------------------------------------------
!---------------------------------------------------------------------
subroutine l2norm (ldx, ldy, ldz, nx0, ny0, nz0, ist, iend, jst, j
&end, v, sum)
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! to compute the l2-norm of vector v.
!---------------------------------------------------------------------
implicit none
!---------------------------------------------------------------------
! input parameters
!---------------------------------------------------------------------
integer ldx,ldy,ldz
integer nx0,ny0,nz0
integer ist,iend
integer jst,jend
!DVM$ INHERIT v
!DVM$ DYNAMIC v
!---------------------------------------------------------------------
! To improve cache performance, second two dimensions padded by 1
! for even number sizes only. Only needed in v.
!---------------------------------------------------------------------
!DVM$ TEMPLATE, COMMON :: dvmh_temp0(1:6,0:163,0:163,-1:163)
!DVM$ TEMPLATE, COMMON :: dvmh_temp0_r1(1:6,0:163,0:163,-1:163)
!DVM$ TEMPLATE, COMMON :: dvmh_temp0_r2(1:6,0:163,0:163,-1:163)
!DVM$ TEMPLATE, COMMON :: dvmh_temp0_r3(1:6,0:163,0:163,-1:163)
!DVM$ TEMPLATE, COMMON :: dvmh_temp0_r0(1:6,0:163,0:163,-1:163)
!DVM$ DISTRIBUTE dvmh_temp0(*,BLOCK,BLOCK,BLOCK)
!DVM$ DISTRIBUTE dvmh_temp0_r1(*,BLOCK,BLOCK,*)
!DVM$ DISTRIBUTE dvmh_temp0_r2(*,BLOCK,*,BLOCK)
!DVM$ DISTRIBUTE dvmh_temp0_r3(*,*,BLOCK,BLOCK)
!DVM$ DISTRIBUTE dvmh_temp0_r0(*,*,*,BLOCK)
!DVM$ DYNAMIC dvmh_temp0, dvmh_temp0_r1, dvmh_temp0_r2, dvmh_temp0_r3,
!DVM$&dvmh_temp0_r0
double precision v(5,ldx / 2 * 2 + 1,ldy / 2 * 2 + 1,*),sum(5)
!---------------------------------------------------------------------
! local variables
!---------------------------------------------------------------------
integer i,j,k,m
do m = 1,5
sum(m) = 0.0d+00
enddo
!DVM$ region
!DVM$ PARALLEL (k,j,i,m) ON v(m,i,j,k), PRIVATE (m,j,i,k),REDUCTION (sum
!DVM$& (sum))
do k = 2,nz0 - 1
do j = jst,jend
do i = ist,iend
do m = 1,5
sum(m) = sum(m) + v(m,i,j,k) * v(m,i,j,k)
enddo
enddo
enddo
enddo
!DVM$ end region
do m = 1,5
sum(m) = sqrt (sum(m) / ((nx0 - 2) * (ny0 - 2) * (nz0 - 2)))
enddo
return
end

View File

@@ -0,0 +1,212 @@
! *** generated by SAPFOR with version 1124 and build date: Apr 18 2019 20:56:38
!-------------------------------------------------------------------------!
! !
! N A S P A R A L L E L B E N C H M A R K S 3.3 !
! !
! S E R I A L V E R S I O N !
! !
! L U !
! !
!-------------------------------------------------------------------------!
! !
! This benchmark is a serial version of the NPB LU code. !
! Refer to NAS Technical Reports 95-020 for details. !
! !
! Permission to use, copy, distribute and modify this software !
! for any purpose with or without fee is hereby granted. We !
! request, however, that all derived work reference the NAS !
! Parallel Benchmarks 3.3. This software is provided "as is" !
! without express or implied warranty. !
! !
! Information on NPB 3.3, including the technical report, the !
! original specifications, source code, results and information !
! on how to submit new results, is available at: !
! !
! http://www.nas.nasa.gov/Software/NPB/ !
! !
! Send comments or suggestions to npb@nas.nasa.gov !
! !
! NAS Parallel Benchmarks Group !
! NASA Ames Research Center !
! Mail Stop: T27A-1 !
! Moffett Field, CA 94035-1000 !
! !
! E-mail: npb@nas.nasa.gov !
! Fax: (650) 604-3957 !
! !
!-------------------------------------------------------------------------!
!---------------------------------------------------------------------
!
! Authors: S. Weeratunga
! V. Venkatakrishnan
! E. Barszcz
! M. Yarrow
!
!---------------------------------------------------------------------
!---------------------------------------------------------------------
program applu
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!
! driver for the performance evaluation of the solver for
! five coupled parabolic/elliptic partial differential equations.
!
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!--- applu.incl
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! npbparams.h defines parameters that depend on the class and
! number of nodes
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! end of include file
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!--- applu.incl
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! npbparams.h defines parameters that depend on the class and
! number of nodes
!---------------------------------------------------------------------
implicit none
!---------------------------------------------------------------------
! end of include file
include 'applu.incl'
!---------------------------------------------------------------------
character class
logical verified
double precision mflops
double precision t,tmax,timer_read,trecs(t_last)
external timer_read
integer i,fstatus
character t_names(t_last)*8
!---------------------------------------------------------------------
! Setup info for timers
!---------------------------------------------------------------------
open (unit = 2,file = 'timer.flag',status = 'old',iostat = fstatus
&)
if (fstatus .eq. 0) then
timeron = .TRUE.
t_names(t_total) = 'total'
t_names(t_rhsx) = 'rhsx'
t_names(t_rhsy) = 'rhsy'
t_names(t_rhsz) = 'rhsz'
t_names(t_rhs) = 'rhs'
t_names(t_jacld) = 'jacld'
t_names(t_blts) = 'blts'
t_names(t_jacu) = 'jacu'
t_names(t_buts) = 'buts'
t_names(t_add) = 'add'
t_names(t_l2norm) = 'l2norm'
close (unit = 2)
else
timeron = .FALSE.
endif
!---------------------------------------------------------------------
! read input data
!---------------------------------------------------------------------
call read_input()
!---------------------------------------------------------------------
! set up domain sizes
!---------------------------------------------------------------------
call domain()
!---------------------------------------------------------------------
! set up coefficients
!---------------------------------------------------------------------
call setcoeff()
!---------------------------------------------------------------------
! set the boundary values for dependent variables
!---------------------------------------------------------------------
call setbv()
!---------------------------------------------------------------------
! set the initial values for dependent variables
!---------------------------------------------------------------------
call setiv()
!---------------------------------------------------------------------
! compute the forcing term based on prescribed exact solution
!---------------------------------------------------------------------
call erhs()
!---------------------------------------------------------------------
! perform one SSOR iteration to touch all pages
!---------------------------------------------------------------------
!DVM$ actual()
call ssor(1)
!DVM$ get_actual()
!---------------------------------------------------------------------
! reset the boundary and initial values
!---------------------------------------------------------------------
call setbv()
call setiv()
!---------------------------------------------------------------------
! perform the SSOR iterations
!--------------------------------------------------------------------
!DVM$ interval 1
!DVM$ actual()
call ssor(itmax)
!DVM$ get_actual()
!DVM$ end interval
!---------------------------------------------------------------------
! compute the solution error
!---------------------------------------------------------------------
call error()
!---------------------------------------------------------------------
! compute the surface integral
!---------------------------------------------------------------------
call pintgr()
!---------------------------------------------------------------------
! verification test
!---------------------------------------------------------------------
call verify(rsdnm,errnm,frc,class,verified)
mflops = float (itmax) * (1984.77 * float (nx0) * float (ny0) * fl
&oat (nz0) - 10923.3 * (float (nx0 + ny0 + nz0) / 3.)** 2 + 27770.9
& * float (nx0 + ny0 + nz0) / 3. - 144010.) / (maxtime * 1000000.)
call print_results('LU',class,nx0,ny0,nz0,itmax,maxtime,mflops,'
& floating point',verified,npbversion,compiletime,cs1,cs2,cs
&3,cs4,cs5,cs6,'(none)')
!---------------------------------------------------------------------
! More timers
!---------------------------------------------------------------------
if (.not.(timeron)) goto 999
do i = 1,t_last
trecs(i) = timer_read (i)
enddo
tmax = maxtime
if (tmax .eq. 0.) tmax = 1.0
write (unit = *,fmt = 800)
800 format(' SECTION Time (secs)')
do i = 1,t_last
write (unit = *,fmt = 810) t_names(i),trecs(i),trecs(i) * 100.
&/ tmax
if (i .eq. t_rhs) then
t = trecs(t_rhsx) + trecs(t_rhsy) + trecs(t_rhsz)
write (unit = *,fmt = 820) 'sub-rhs',t,t * 100. / tmax
t = trecs(i) - t
write (unit = *,fmt = 820) 'rest-rhs',t,t * 100. / tmax
endif
810 format(2x,a8,':',f9.3,' (',f6.2,'%)')
820 format(5x,'--> ',a8,':',f9.3,' (',f6.2,'%)')
enddo
999 continue
end

View File

@@ -0,0 +1,12 @@
@echo off
set CLASS=%1
set OPT=%2
CALL ..\sys\setparams LU %CLASS%
CALL %F77% %OPT% -f90 lu 1>out_%CLASS%.txt 2>err_%CLASS%.txt
if exist lu.exe (
copy lu.exe %BIN%\lu.%CLASS%.x.exe
del lu.exe
)

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,187 @@
! *** generated by SAPFOR with version 1124 and build date: Apr 18 2019 20:56:38
!---------------------------------------------------------------------
!---------------------------------------------------------------------
subroutine pintgr ()
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! local variables
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!--- applu.incl
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! npbparams.h defines parameters that depend on the class and
! number of nodes
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! end of include file
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!--- applu.incl
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! npbparams.h defines parameters that depend on the class and
! number of nodes
!---------------------------------------------------------------------
implicit none
!---------------------------------------------------------------------
! end of include file
include 'applu.incl'
!---------------------------------------------------------------------
integer i,j,k
integer ibeg,ifin,ifin1
integer jbeg,jfin,jfin1
!DVM$ ALIGN phi1(iEX1,iEX2) WITH dvmh_temp0(*,iEX1,*,iEX2)
!DVM$ ALIGN phi2(iEX1,iEX2) WITH dvmh_temp0(*,iEX1,*,iEX2)
!DVM$ DYNAMIC phi1,phi2
double precision phi1(0:isiz2 + 1,0:isiz3 + 1),phi2(0:isiz2 + 1,0
&:isiz3 + 1)
!DVM$ SHADOW phi2( 0:1,0:1 )
!DVM$ SHADOW phi1( 0:1,0:1 )
double precision frc1,frc2,frc3
!---------------------------------------------------------------------
! set up the sub-domains for integeration in each processor
!---------------------------------------------------------------------
ibeg = ii1
ifin = ii2
jbeg = ji1
jfin = ji2
ifin1 = ifin - 1
jfin1 = jfin - 1
!---------------------------------------------------------------------
! initialize
!---------------------------------------------------------------------
!DVM$ PARALLEL (i,k) ON phi1(i,k), PRIVATE (k,i)
do i = 0,isiz2 + 1
do k = 0,isiz3 + 1
phi1(i,k) = 0.
phi2(i,k) = 0.
enddo
enddo
!DVM$ PARALLEL (j,i) ON phi1(i,j), REMOTE_ACCESS (u(:,:,:,:)),PRIVATE (k
!DVM$&,j,i)
do j = jbeg,jfin
do i = ibeg,ifin
k = ki1
phi1(i,j) = c2 * (u(5,i,j,k) - 0.50d+00 * (u(2,i,j,k)** 2 +
&u(3,i,j,k)** 2 + u(4,i,j,k)** 2) / u(1,i,j,k))
k = ki2
phi2(i,j) = c2 * (u(5,i,j,k) - 0.50d+00 * (u(2,i,j,k)** 2 +
&u(3,i,j,k)** 2 + u(4,i,j,k)** 2) / u(1,i,j,k))
enddo
enddo
frc1 = 0.0d+00
!DVM$ PARALLEL (j,i) ON phi1(i,j), PRIVATE (j,i),SHADOW_RENEW (phi1(CORN
!DVM$&ER),phi2(CORNER)),REDUCTION (sum (frc1))
do j = jbeg,jfin1
do i = ibeg,ifin1
frc1 = frc1 + (phi1(i,j) + phi1(i + 1,j) + phi1(i,j + 1) + p
&hi1(i + 1,j + 1) + phi2(i,j) + phi2(i + 1,j) + phi2(i,j + 1) + phi
&2(i + 1,j + 1))
enddo
enddo
frc1 = dxi * deta * frc1
!---------------------------------------------------------------------
! initialize
!---------------------------------------------------------------------
!DVM$ PARALLEL (i,k) ON phi1(i,k), PRIVATE (k,i)
do i = 0,isiz2 + 1
do k = 0,isiz3 + 1
phi1(i,k) = 0.
phi2(i,k) = 0.
enddo
enddo
if (jbeg .eq. ji1) then
!DVM$ PARALLEL (k,i) ON phi1(i,k), REMOTE_ACCESS (u(:,:,:,:)),PRIVATE
!DVM$& (k,i)
do k = ki1,ki2
do i = ibeg,ifin
phi1(i,k) = c2 * (u(5,i,jbeg,k) - 0.50d+00 * (u(2,i,jbeg,
&k)** 2 + u(3,i,jbeg,k)** 2 + u(4,i,jbeg,k)** 2) / u(1,i,jbeg,k))
enddo
enddo
endif
if (jfin .eq. ji2) then
!DVM$ PARALLEL (k,i) ON phi2(i,k), REMOTE_ACCESS (u(:,:,:,:)),PRIVATE
!DVM$& (k,i)
do k = ki1,ki2
do i = ibeg,ifin
phi2(i,k) = c2 * (u(5,i,jfin,k) - 0.50d+00 * (u(2,i,jfin,
&k)** 2 + u(3,i,jfin,k)** 2 + u(4,i,jfin,k)** 2) / u(1,i,jfin,k))
enddo
enddo
endif
frc2 = 0.0d+00
!DVM$ PARALLEL (k,i) ON phi1(i,k), PRIVATE (k,i),SHADOW_RENEW (phi1(CORN
!DVM$&ER),phi2(CORNER)),REDUCTION (sum (frc2))
do k = ki1,ki2 - 1
do i = ibeg,ifin1
frc2 = frc2 + (phi1(i,k) + phi1(i + 1,k) + phi1(i,k + 1) + p
&hi1(i + 1,k + 1) + phi2(i,k) + phi2(i + 1,k) + phi2(i,k + 1) + phi
&2(i + 1,k + 1))
enddo
enddo
frc2 = dxi * dzeta * frc2
!---------------------------------------------------------------------
! initialize
!---------------------------------------------------------------------
!DVM$ PARALLEL (i,k) ON phi1(i,k), PRIVATE (k,i)
do i = 0,isiz2 + 1
do k = 0,isiz3 + 1
phi1(i,k) = 0.
phi2(i,k) = 0.
enddo
enddo
if (ibeg .eq. ii1) then
!DVM$ PARALLEL (k,j) ON phi1(j,k), REMOTE_ACCESS (u(:,:,:,:)),PRIVATE
!DVM$& (k,j)
do k = ki1,ki2
do j = jbeg,jfin
phi1(j,k) = c2 * (u(5,ibeg,j,k) - 0.50d+00 * (u(2,ibeg,j,
&k)** 2 + u(3,ibeg,j,k)** 2 + u(4,ibeg,j,k)** 2) / u(1,ibeg,j,k))
enddo
enddo
endif
if (ifin .eq. ii2) then
!DVM$ PARALLEL (k,j) ON phi2(j,k), REMOTE_ACCESS (u(:,:,:,:)),PRIVATE
!DVM$& (k,j)
do k = ki1,ki2
do j = jbeg,jfin
phi2(j,k) = c2 * (u(5,ifin,j,k) - 0.50d+00 * (u(2,ifin,j,
&k)** 2 + u(3,ifin,j,k)** 2 + u(4,ifin,j,k)** 2) / u(1,ifin,j,k))
enddo
enddo
endif
frc3 = 0.0d+00
!DVM$ PARALLEL (k,j) ON phi1(j,k), PRIVATE (k,j),SHADOW_RENEW (phi1(CORN
!DVM$&ER),phi2(CORNER)),REDUCTION (sum (frc3))
do k = ki1,ki2 - 1
do j = jbeg,jfin1
frc3 = frc3 + (phi1(j,k) + phi1(j + 1,k) + phi1(j,k + 1) + p
&hi1(j + 1,k + 1) + phi2(j,k) + phi2(j + 1,k) + phi2(j,k + 1) + phi
&2(j + 1,k + 1))
enddo
enddo
frc3 = deta * dzeta * frc3
frc = 0.25d+00 * (frc1 + frc2 + frc3)
! write (*,1001) frc
return
! 1001 format (//5x,'surface integral = ',1pe12.5//)
end

View File

@@ -0,0 +1,111 @@
subroutine print_results(name, class, n1, n2, n3, niter,
> t, mops, optype, verified, npbversion,
> compiletime, cs1, cs2, cs3, cs4, cs5, cs6, cs7)
implicit none
character name*(*)
character class*1
integer n1, n2, n3, niter, j
double precision t, mops
character optype*24, size*15
logical verified
character*(*) npbversion, compiletime,
> cs1, cs2, cs3, cs4, cs5, cs6, cs7
write (*, 2) name
2 format(//, ' ', A, ' Benchmark Completed.')
write (*, 3) Class
3 format(' Class = ', 12x, a12)
c If this is not a grid-based problem (EP, FT, CG), then
c we only print n1, which contains some measure of the
c problem size. In that case, n2 and n3 are both zero.
c Otherwise, we print the grid size n1xn2xn3
if ((n2 .eq. 0) .and. (n3 .eq. 0)) then
if (name(1:2) .eq. 'EP') then
write(size, '(f15.0)' ) 2.d0**n1
j = 15
if (size(j:j) .eq. '.') then
size(j:j) = ' '
j = j - 1
endif
write (*,42) size(1:j)
42 format(' Size = ',9x, a15)
else
write (*,44) n1
44 format(' Size = ',12x, i12)
endif
else
write (*, 4) n1,n2,n3
4 format(' Size = ',9x, i4,'x',i4,'x',i4)
endif
write (*, 5) niter
5 format(' Iterations = ', 12x, i12)
write (*, 6) t
6 format(' Time in seconds = ',12x, f12.2)
write (*,9) mops
9 format(' Mop/s total = ',12x, f12.2)
write(*, 11) optype
11 format(' Operation type = ', a24)
if (verified) then
write(*,12) ' SUCCESSFUL'
else
write(*,12) 'UNSUCCESSFUL'
endif
12 format(' Verification = ', 12x, a)
write(*,13) npbversion
13 format(' Version = ', 12x, a12)
write(*,14) compiletime
14 format(' Compile date = ', 12x, a12)
write (*,121) cs1
121 format(/, ' Compile options:', /,
> ' F77 = ', A)
write (*,122) cs2
122 format(' FLINK = ', A)
write (*,123) cs3
123 format(' F_LIB = ', A)
write (*,124) cs4
124 format(' F_INC = ', A)
write (*,125) cs5
125 format(' FFLAGS = ', A)
write (*,126) cs6
126 format(' FLINKFLAGS = ', A)
write(*, 127) cs7
127 format(' RAND = ', A)
write (*,130)
130 format(//' Please send all errors/feedbacks to:'//
> ' NPB Development Team'/
> ' npb@nas.nasa.gov'//)
c 130 format(//' Please send the results of this run to:'//
c > ' NPB Development Team '/
c > ' Internet: npb@nas.nasa.gov'/
c > ' '/
c > ' If email is not available, send this to:'//
c > ' MS T27A-1'/
c > ' NASA Ames Research Center'/
c > ' Moffett Field, CA 94035-1000'//
c > ' Fax: 650-604-3957'//)
return
end

View File

@@ -0,0 +1,115 @@
! *** generated by SAPFOR with version 1124 and build date: Apr 18 2019 20:56:38
!---------------------------------------------------------------------
!---------------------------------------------------------------------
subroutine read_input ()
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!--- applu.incl
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! npbparams.h defines parameters that depend on the class and
! number of nodes
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! end of include file
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!--- applu.incl
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! npbparams.h defines parameters that depend on the class and
! number of nodes
!---------------------------------------------------------------------
implicit none
!---------------------------------------------------------------------
! end of include file
include 'applu.incl'
!---------------------------------------------------------------------
integer fstatus
!---------------------------------------------------------------------
! if input file does not exist, it uses defaults
! ipr = 1 for detailed progress output
! inorm = how often the norm is printed (once every inorm iterations)
! itmax = number of pseudo time steps
! dt = time step
! omega 1 over-relaxation factor for SSOR
! tolrsd = steady state residual tolerance levels
! nx, ny, nz = number of grid points in x, y, z directions
!---------------------------------------------------------------------
write (unit = *,fmt = 1000)
open (unit = 3,file = 'inputlu.data',status = 'old',access = 'sequ
&ential',form = 'formatted',iostat = fstatus)
if (fstatus .eq. 0) then
write (unit = *,fmt = *) 'Reading from input file inputlu.data'
read (unit = 3,fmt = *)
read (unit = 3,fmt = *)
read (unit = 3,fmt = *) ipr,inorm
read (unit = 3,fmt = *)
read (unit = 3,fmt = *)
read (unit = 3,fmt = *) itmax
read (unit = 3,fmt = *)
read (unit = 3,fmt = *)
read (unit = 3,fmt = *) dt
read (unit = 3,fmt = *)
read (unit = 3,fmt = *)
read (unit = 3,fmt = *) omega
read (unit = 3,fmt = *)
read (unit = 3,fmt = *)
read (unit = 3,fmt = *) tolrsd(1),tolrsd(2),tolrsd(3),tolrsd(4)
&,tolrsd(5)
read (unit = 3,fmt = *)
read (unit = 3,fmt = *)
read (unit = 3,fmt = *) nx0,ny0,nz0
close (unit = 3)
else
ipr = ipr_default
inorm = inorm_default
itmax = itmax_default
dt = dt_default
omega = omega_default
tolrsd(1) = tolrsd1_def
tolrsd(2) = tolrsd2_def
tolrsd(3) = tolrsd3_def
tolrsd(4) = tolrsd4_def
tolrsd(5) = tolrsd5_def
nx0 = isiz1
ny0 = isiz2
nz0 = isiz3
endif
!---------------------------------------------------------------------
! check problem size
!---------------------------------------------------------------------
if (nx0 .lt. 4 .or. ny0 .lt. 4 .or. nz0 .lt. 4) then
write (unit = *,fmt = 2001)
2001 format (5x,'PROBLEM SIZE IS TOO SMALL - ', /
&5x,'SET EACH OF NX, NY AND NZ AT LEAST EQUAL TO 5')
stop
endif
if (nx0 .gt. isiz1 .or. ny0 .gt. isiz2 .or. nz0 .gt. isiz3) then
write (unit = *,fmt = 2002)
2002 format (5x,'PROBLEM SIZE IS TOO LARGE - ', /
&5x,'NX, NY AND NZ SHOULD BE EQUAL TO ', /5x,'ISIZ1, ISIZ
&2 AND ISIZ3 RESPECTIVELY')
stop
endif
write (unit = *,fmt = 1001) nx0,ny0,nz0
write (unit = *,fmt = 1002) itmax
write (unit = *,fmt = *)
1000 format(//,' NAS Parallel Benchmarks (NPB3.3-SER)', ' - LU
& Benchmark', /)
1001 format(' Size: ', i4, 'x', i4, 'x', i4)
1002 format(' Iterations: ', i4)
return
end

View File

@@ -0,0 +1,420 @@
c---------------------------------------------------------------------
c---------------------------------------------------------------------
subroutine rhs
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c compute the right hand sides
c---------------------------------------------------------------------
implicit none
include 'applu.incl'
c---------------------------------------------------------------------
c local variables
c---------------------------------------------------------------------
integer i, j, k, m, p
double precision q
double precision tmp, utmp(6,isiz3), rtmp(5,isiz3)
double precision u21, u31, u41
double precision u21i, u31i, u41i, u51i
double precision u21j, u31j, u41j, u51j
double precision u21k, u31k, u41k, u51k
double precision u21im1, u31im1, u41im1, u51im1
double precision u21jm1, u31jm1, u41jm1, u51jm1
double precision u21km1, u31km1, u41km1, u51km1
double precision flu(5,-1:1)
if (timeron) call timer_start(t_rhs)
!DVM$ region
!DVM$ PARALLEL (k,j,i) ON qs(i,j,k), PRIVATE (j,i,tmp,k,m)
!DVM$&,SHADOW_COMPUTE,SHADOW_RENEW (u)
do k = 1, nz
do j = 1, ny
do i = 1, nx
do m = 1, 5
rsd(m,i,j,k) = - frct(m,i,j,k)
end do
tmp = 1.0d+00 / u(1,i,j,k)
qs(i,j,k) = 0.50d+00 * ( u(2,i,j,k) * u(2,i,j,k)
> + u(3,i,j,k) * u(3,i,j,k)
> + u(4,i,j,k) * u(4,i,j,k) )
> * tmp
end do
end do
end do
! if (timeron) call timer_start(t_rhsx)
!DVM$ PARALLEL (k,j,i) on rsd(*,i,j,k),
!DVM$&PRIVATE(p, u21, q, m, tmp, u21i, u31i, u41i, u51i, u21im1,
!DVM$&u31im1, u41im1, u51im1, u31, u21j, u31j, u41j, u51j, u21jm1,
!DVM$&u41jm1, u51jm1, u41, u21k, u31k, u41k, u51k, u21km1, u31km1,
!DVM$&u51km1, u31jm1,u41km1,flu), cuda_block (32,4)
do k = 2, nz - 1
do j = jst, jend
do i = ist, iend
do p = -1, 1, 2
flu(1,p) = u(2,i+p,j,k)
u21 = u(2,i+p,j,k) / u(1,i+p,j,k)
q = qs(i+p,j,k)
flu(2,p) = u(2,i+p,j,k) * u21 + c2 *
> ( u(5,i+p,j,k) - q )
flu(3,p) = u(3,i+p,j,k) * u21
flu(4,p) = u(4,i+p,j,k) * u21
flu(5,p) = ( c1 * u(5,i+p,j,k) - c2 * q ) * u21
end do
do m = 1, 5
rsd(m,i,j,k) = rsd(m,i,j,k)
> - tx2 * ( flu(m,1) - flu(m,-1) )
end do
do p = 0, 1
tmp = 1.0d+00/ u(1,i+p,j,k)
u21i = tmp * u(2,i+p,j,k)
u31i = tmp * u(3,i+p,j,k)
u41i = tmp * u(4,i+p,j,k)
u51i = tmp * u(5,i+p,j,k)
tmp = 1.0d+00/ u(1,i-1+p,j,k)
u21im1 = tmp * u(2,i-1+p,j,k)
u31im1 = tmp * u(3,i-1+p,j,k)
u41im1 = tmp * u(4,i-1+p,j,k)
u51im1 = tmp * u(5,i-1+p,j,k)
flu(2,p) = (4.0d+00/3.0d+00) * tx3 * (u21i-u21im1)
flu(3,p) = tx3 * ( u31i - u31im1 )
flu(4,p) = tx3 * ( u41i - u41im1 )
flu(5,p) = 0.50d+00 * ( 1.0d+00 - c1*c5)
> * tx3 * ( ( u21i **2 + u31i **2+u41i **2)
> - ( u21im1**2 + u31im1**2+u41im1**2))
> + (1.0d+00/6.0d+00)
> * tx3 * ( u21i**2 - u21im1**2 )
> + c1 * c5 * tx3 * ( u51i - u51im1 )
enddo
rsd(1,i,j,k) = rsd(1,i,j,k)
> + dx1 * tx1 * ( u(1,i-1,j,k)
> - 2.0d+00 * u(1,i,j,k)
> + u(1,i+1,j,k) )
rsd(2,i,j,k) = rsd(2,i,j,k)
> + tx3 * c3 * c4 * ( flu(2,1) - flu(2,0) )
> + dx2 * tx1 * ( u(2,i-1,j,k)
> - 2.0d+00 * u(2,i,j,k)
> + u(2,i+1,j,k) )
rsd(3,i,j,k) = rsd(3,i,j,k)
> + tx3 * c3 * c4 * ( flu(3,1) - flu(3,0) )
> + dx3 * tx1 * ( u(3,i-1,j,k)
> - 2.0d+00 * u(3,i,j,k)
> + u(3,i+1,j,k) )
rsd(4,i,j,k) = rsd(4,i,j,k)
> + tx3 * c3 * c4 * ( flu(4,1) - flu(4,0) )
> + dx4 * tx1 * ( u(4,i-1,j,k)
> - 2.0d+00 * u(4,i,j,k)
> + u(4,i+1,j,k) )
rsd(5,i,j,k) = rsd(5,i,j,k)
> + tx3 * c3 * c4 * ( flu(5,1) - flu(5,0) )
> + dx5 * tx1 * ( u(5,i-1,j,k)
> - 2.0d+00 * u(5,i,j,k)
> + u(5,i+1,j,k) )
if (i .eq. 2)then
do m = 1, 5
rsd(m,2,j,k) = rsd(m,2,j,k)
> - dssp * ( + 5.0d+00 * u(m,2,j,k)
> - 4.0d+00 * u(m,3,j,k)
> + u(m,4,j,k) )
enddo
else if (i .eq. 3)then
do m = 1, 5
rsd(m,3,j,k) = rsd(m,3,j,k)
> - dssp * ( - 4.0d+00 * u(m,2,j,k)
> + 6.0d+00 * u(m,3,j,k)
> - 4.0d+00 * u(m,4,j,k)
> + u(m,5,j,k) )
enddo
else if (i .eq. nx-2)then
do m = 1, 5
rsd(m,nx-2,j,k) = rsd(m,nx-2,j,k)
> - dssp * ( u(m,nx-4,j,k)
> - 4.0d+00 * u(m,nx-3,j,k)
> + 6.0d+00 * u(m,nx-2,j,k)
> - 4.0d+00 * u(m,nx-1,j,k) )
enddo
else if (i .eq. nx-1)then
do m = 1, 5
rsd(m,nx-1,j,k) = rsd(m,nx-1,j,k)
> - dssp * ( u(m,nx-3,j,k)
> - 4.0d+00 * u(m,nx-2,j,k)
> + 5.0d+00 * u(m,nx-1,j,k) )
enddo
else
do m = 1, 5
rsd(m,i,j,k) = rsd(m,i,j,k)
> - dssp * ( u(m,i-2,j,k)
> - 4.0d+00 * u(m,i-1,j,k)
> + 6.0d+00 * u(m,i,j,k)
> - 4.0d+00 * u(m,i+1,j,k)
> + u(m,i+2,j,k) )
end do
endif
! end do
! end do
! end do
! if (timeron) call timer_stop(t_rhsx)
! if (timeron) call timer_start(t_rhsy)
! do k = 2, nz - 1
! do j = jst, jend
! do i = ist, iend
do p = -1, 1, 2
flu(1,p) = u(3,i,j+p,k)
u31 = u(3,i,j+p,k) / u(1,i,j+p,k)
q = qs(i,j+p,k)
flu(2,p) = u(2,i,j+p,k) * u31
flu(3,p) = u(3,i,j+p,k) * u31 + c2 * (u(5,i,j+p,k)-q)
flu(4,p) = u(4,i,j+p,k) * u31
flu(5,p) = ( c1 * u(5,i,j+p,k) - c2 * q ) * u31
end do
do m = 1, 5
rsd(m,i,j,k) = rsd(m,i,j,k)
> - ty2 * ( flu(m,1) - flu(m,-1) )
end do
do p = 0, 1
tmp = 1.0d+00/ u(1,i,j+p,k)
u21j = tmp * u(2,i,j+p,k)
u31j = tmp * u(3,i,j+p,k)
u41j = tmp * u(4,i,j+p,k)
u51j = tmp * u(5,i,j+p,k)
tmp = 1.0d+00/ u(1,i,j-1+p,k)
u21jm1 = tmp * u(2,i,j-1+p,k)
u31jm1 = tmp * u(3,i,j-1+p,k)
u41jm1 = tmp * u(4,i,j-1+p,k)
u51jm1 = tmp * u(5,i,j-1+p,k)
flu(2,p) = ty3 * ( u21j - u21jm1 )
flu(3,p) = (4.0d+00/3.0d+00) * ty3 * (u31j-u31jm1)
flu(4,p) = ty3 * ( u41j - u41jm1 )
flu(5,p) = 0.50d+00 * ( 1.0d+00 - c1*c5 )
> * ty3 * ( ( u21j **2 + u31j **2 + u41j **2 )
> - ( u21jm1**2 + u31jm1**2 + u41jm1**2 ) )
> + (1.0d+00/6.0d+00)
> * ty3 * ( u31j**2 - u31jm1**2 )
> + c1 * c5 * ty3 * ( u51j - u51jm1 )
enddo
rsd(1,i,j,k) = rsd(1,i,j,k)
> + dy1 * ty1 * ( u(1,i,j-1,k)
> - 2.0d+00 * u(1,i,j,k)
> + u(1,i,j+1,k) )
rsd(2,i,j,k) = rsd(2,i,j,k)
> + ty3 * c3 * c4 * ( flu(2,1) - flu(2,0) )
> + dy2 * ty1 * ( u(2,i,j-1,k)
> - 2.0d+00 * u(2,i,j,k)
> + u(2,i,j+1,k) )
rsd(3,i,j,k) = rsd(3,i,j,k)
> + ty3 * c3 * c4 * ( flu(3,1) - flu(3,0) )
> + dy3 * ty1 * ( u(3,i,j-1,k)
> - 2.0d+00 * u(3,i,j,k)
> + u(3,i,j+1,k) )
rsd(4,i,j,k) = rsd(4,i,j,k)
> + ty3 * c3 * c4 * ( flu(4,1) - flu(4,0) )
> + dy4 * ty1 * ( u(4,i,j-1,k)
> - 2.0d+00 * u(4,i,j,k)
> + u(4,i,j+1,k) )
rsd(5,i,j,k) = rsd(5,i,j,k)
> + ty3 * c3 * c4 * ( flu(5,1) - flu(5,0) )
> + dy5 * ty1 * ( u(5,i,j-1,k)
> - 2.0d+00 * u(5,i,j,k)
> + u(5,i,j+1,k) )
if (j .eq. 2) then
do m = 1, 5
rsd(m,i,2,k) = rsd(m,i,2,k)
> - dssp * ( + 5.0d+00 * u(m,i,2,k)
> - 4.0d+00 * u(m,i,3,k)
> + u(m,i,4,k) )
enddo
elseif (j .eq. 3) then
do m = 1, 5
rsd(m,i,3,k) = rsd(m,i,3,k)
> - dssp * ( - 4.0d+00 * u(m,i,2,k)
> + 6.0d+00 * u(m,i,3,k)
> - 4.0d+00 * u(m,i,4,k)
> + u(m,i,5,k) )
end do
elseif (j .eq. ny-2) then
do m = 1, 5
rsd(m,i,ny-2,k) = rsd(m,i,ny-2,k)
> - dssp * ( u(m,i,ny-4,k)
> - 4.0d+00 * u(m,i,ny-3,k)
> + 6.0d+00 * u(m,i,ny-2,k)
> - 4.0d+00 * u(m,i,ny-1,k) )
enddo
elseif (j .eq. ny-1) then
do m = 1, 5
rsd(m,i,ny-1,k) = rsd(m,i,ny-1,k)
> - dssp * ( u(m,i,ny-3,k)
> - 4.0d+00 * u(m,i,ny-2,k)
> + 5.0d+00 * u(m,i,ny-1,k) )
end do
else
do m = 1, 5
rsd(m,i,j,k) = rsd(m,i,j,k)
> - dssp * ( u(m,i,j-2,k)
> - 4.0d+00 * u(m,i,j-1,k)
> + 6.0d+00 * u(m,i,j,k)
> - 4.0d+00 * u(m,i,j+1,k)
> + u(m,i,j+2,k) )
end do
endif
! end do
! end do
! end do
! if (timeron) call timer_stop(t_rhsy)
! if (timeron) call timer_start(t_rhsz)
! do k = 2, nz - 1
! do j = jst, jend
! do i = ist, iend
do p=-1,1,2
flu(1,p) = u(4,i,j,k+p)
u41 = u(4,i,j,k+p) / u(1,i,j,k+p)
q = qs(i,j,k+p)
flu(2,p) = u(2,i,j,k+p) * u41
flu(3,p) = u(3,i,j,k+p) * u41
flu(4,p) = u(4,i,j,k+p) * u41 + c2 * (u(5,i,j,k+p)-q)
flu(5,p) = ( c1 * u(5,i,j,k+p) - c2 * q ) * u41
enddo
do m = 1, 5
rsd(m,i,j,k) = rsd(m,i,j,k)
> - tz2 * ( flu(m,1) - flu(m,-1) )
end do
do p=0,1
tmp = 1.0d+00/ u(1,i,j,k+p)
u21k = tmp * u(2,i,j,k+p)
u31k = tmp * u(3,i,j,k+p)
u41k = tmp * u(4,i,j,k+p)
u51k = tmp * u(5,i,j,k+p)
tmp = 1.0d+00/ u(1,i,j,k-1+p)
u21km1 = tmp * u(2,i,j,k-1+p)
u31km1 = tmp * u(3,i,j,k-1+p)
u41km1 = tmp * u(4,i,j,k-1+p)
u51km1 = tmp * u(5,i,j,k-1+p)
flu(2,p) = tz3 * ( u21k - u21km1 )
flu(3,p) = tz3 * ( u31k - u31km1 )
flu(4,p) = (4.0d+00/3.0d+00) * tz3 * (u41k-u41km1)
flu(5,p) = 0.50d+00 * ( 1.0d+00 - c1*c5 )
> * tz3 * ( ( u21k **2 + u31k **2 + u41k **2 )
> - ( u21km1**2 + u31km1**2 + u41km1**2 ) )
> + (1.0d+00/6.0d+00)
> * tz3 * ( u41k**2 - u41km1**2 )
> + c1 * c5 * tz3 * ( u51k - u51km1 )
enddo
rsd(1,i,j,k) = rsd(1,i,j,k)
> + dz1 * tz1 * ( u(1,i,j,k-1)
> - 2.0d+00 * u(1,i,j,k)
> + u(1,i,j,k+1) )
rsd(2,i,j,k) = rsd(2,i,j,k)
> + tz3 * c3 * c4 * ( flu(2,1) - flu(2,0) )
> + dz2 * tz1 * ( u(2,i,j,k-1)
> - 2.0d+00 * u(2,i,j,k)
> + u(2,i,j,k+1) )
rsd(3,i,j,k) = rsd(3,i,j,k)
> + tz3 * c3 * c4 * ( flu(3,1) - flu(3,0) )
> + dz3 * tz1 * ( u(3,i,j,k-1)
> - 2.0d+00 * u(3,i,j,k)
> + u(3,i,j,k+1) )
rsd(4,i,j,k) = rsd(4,i,j,k)
> + tz3 * c3 * c4 * ( flu(4,1) - flu(4,0) )
> + dz4 * tz1 * ( u(4,i,j,k-1)
> - 2.0d+00 * u(4,i,j,k)
> + u(4,i,j,k+1) )
rsd(5,i,j,k) = rsd(5,i,j,k)
> + tz3 * c3 * c4 * ( flu(5,1) - flu(5,0) )
> + dz5 * tz1 * ( u(5,i,j,k-1)
> - 2.0d+00 * u(5,i,j,k)
> + u(5,i,j,k+1) )
if (k .eq. 2) then
do m = 1, 5
rsd(m,i,j,2) = rsd(m,i,j,2)
> - dssp * ( + 5.0d+00 * u(m,i,j,2)
> - 4.0d+00 * u(m,i,j,3)
> + u(m,i,j,4) )
end do
elseif (k .eq. 3) then
do m = 1, 5
rsd(m,i,j,3) = rsd(m,i,j,3)
> - dssp * ( - 4.0d+00 * u(m,i,j,2)
> + 6.0d+00 * u(m,i,j,3)
> - 4.0d+00 * u(m,i,j,4)
> + u(m,i,j,5) )
end do
elseif (k .eq. nz-2) then
do m = 1, 5
rsd(m,i,j,nz-2) = rsd(m,i,j,nz-2)
> - dssp * ( u(m,i,j,nz-4)
> - 4.0d+00 * u(m,i,j,nz-3)
> + 6.0d+00 * u(m,i,j,nz-2)
> - 4.0d+00 * u(m,i,j,nz-1) )
end do
elseif (k .eq. nz-1) then
do m = 1, 5
rsd(m,i,j,nz-1) = rsd(m,i,j,nz-1)
> - dssp * ( u(m,i,j,nz-3)
> - 4.0d+00 * u(m,i,j,nz-2)
> + 5.0d+00 * u(m,i,j,nz-1) )
end do
else
do m = 1, 5
rsd(m,i,j,k) = rsd(m,i,j,k)
> - dssp * ( u(m,i,j,k-2)
> - 4.0d+00 * u(m,i,j,k-1)
> + 6.0d+00 * u(m,i,j,k)
> - 4.0d+00 * u(m,i,j,k+1)
> + u(m,i,j,k+2) )
end do
endif
end do
end do
end do
!DVM$ end region
! if (timeron) call timer_stop(t_rhsz)
if (timeron) call timer_stop(t_rhs)
return
end

View File

@@ -0,0 +1,536 @@
! *** generated by SAPFOR with version 1124 and build date: Apr 18 2019 20:56:38
!---------------------------------------------------------------------
!---------------------------------------------------------------------
subroutine rhs ()
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! compute the right hand sides
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! local variables
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!--- applu.incl
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! npbparams.h defines parameters that depend on the class and
! number of nodes
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! end of include file
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!--- applu.incl
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! npbparams.h defines parameters that depend on the class and
! number of nodes
!---------------------------------------------------------------------
implicit none
!---------------------------------------------------------------------
! end of include file
include 'applu.incl'
!---------------------------------------------------------------------
integer i,j,k,m
!DVM$ ALIGN flux_br3(iEX1,iEX2,iEX3,iEX4) WITH dvmh_temp0(iEX1,iEX3,iEX
!DVM$&4,iEX2)
!DVM$ DYNAMIC flux_br3
double precision ,allocatable:: flux_br3(:,:,:,:)
!DVM$ SHADOW flux_br3( 0:0,1:1,0:0,0:0 )
!DVM$ ALIGN flux_br2(iEX1,iEX2,iEX3,iEX4) WITH dvmh_temp0(iEX1,iEX3,iEX
!DVM$&2,iEX4)
!DVM$ DYNAMIC flux_br2
double precision ,allocatable:: flux_br2(:,:,:,:)
!DVM$ SHADOW flux_br2( 0:0,1:1,0:0,0:0 )
!DVM$ ALIGN flux_br1(iEX1,iEX2,iEX3,iEX4) WITH dvmh_temp0(iEX1,iEX2
!DVM$&,iEX3,iEX4)
!DVM$ DYNAMIC flux_br1
double precision ,allocatable:: flux_br1(:,:,:,:)
!DVM$ SHADOW flux_br1( 0:0,1:1,0:0,0:0 )
double precision q
double precision tmp,utmp(6,isiz3),rtmp(5,isiz3)
!DVM$ ALIGN rtmp_br1(iEX1,iEX2,iEX3,iEX4) WITH dvmh_temp0(iEX1,iEX3,iEX
!DVM$&4,iEX2)
!DVM$ DYNAMIC rtmp_br1
double precision ,allocatable:: rtmp_br1(:,:,:,:)
!DVM$ ALIGN utmp_br1(iEX1,iEX2,iEX3,iEX4) WITH dvmh_temp0(iEX1,iEX3,iEX
!DVM$&4,iEX2)
!DVM$ DYNAMIC utmp_br1
double precision ,allocatable:: utmp_br1(:,:,:,:)
!DVM$ SHADOW utmp_br1( 0:0,2:2,0:0,0:0 )
double precision u21,u31,u41
double precision u21i,u31i,u41i,u51i
double precision u21j,u31j,u41j,u51j
double precision u21k,u31k,u41k,u51k
double precision u21im1,u31im1,u41im1,u51im1
double precision u21jm1,u31jm1,u41jm1,u51jm1
double precision u21km1,u31km1,u41km1,u51km1
!DVM$ interval 11
if (timeron) call timer_start(t_rhs)
!DVM$ region
!DVM$ PARALLEL (k,j,i) ON qs(i,j,k), PRIVATE (j,i,tmp,k,m)
!DVM$&,SHADOW_COMPUTE,SHADOW_RENEW (u)
do k = 1,nz
do j = 1,ny
do i = 1,nx
tmp = 1.0d+00 / u(1,i,j,k)
qs(i,j,k) = 0.50d+00 * (u(2,i,j,k) * u(2,i,j,k) + u(3,i,j
&,k) * u(3,i,j,k) + u(4,i,j,k) * u(4,i,j,k)) * tmp
tmp = 1.0d+00 / u(1,i,j,k)
rho_i(i,j,k) = tmp
do m = 1,5
rsd(m,i,j,k) = (-(frct(m,i,j,k)))
enddo
enddo
enddo
enddo
!DVM$ end region
allocate(flux_br1(5,isiz1,2:isiz2 - 1,2:isiz3 - 1))
!---------------------------------------------------------------------
! xi-direction flux differences
!---------------------------------------------------------------------
!DVM$ region
!DVM$ PARALLEL (k,j,i) ON flux_br1(*,i,j,k), PRIVATE (j,i,k,q,u21),
!DVM$&SHADOW_COMPUTE
do k = 2,nz - 1
do j = jst,jend
do i = 1,nx
flux_br1(1,i,j,k) = u(2,i,j,k)
u21 = u(2,i,j,k) * rho_i(i,j,k)
q = qs(i,j,k)
flux_br1(2,i,j,k) = u(2,i,j,k) * u21 + c2 * (u(5,i,j,k) -
& q)
flux_br1(3,i,j,k) = u(3,i,j,k) * u21
flux_br1(4,i,j,k) = u(4,i,j,k) * u21
flux_br1(5,i,j,k) = (c1 * u(5,i,j,k) - c2 * q) * u21
enddo
enddo
enddo
!DVM$ PARALLEL (k,j,i,m) ON rsd(m,i,j,k), PRIVATE (j,i,k,m)
do k = 2,nz - 1
do j = jst,jend
do i = ist,iend
do m = 1,5
rsd(m,i,j,k) = rsd(m,i,j,k) - tx2 * (flux_br1(m,i + 1,
&j,k) - flux_br1(m,i - 1,j,k))
enddo
enddo
enddo
enddo
!DVM$ PARALLEL (k,j,i) ON flux_br1(*,i,j,k), PRIVATE (j,i,tmp,k,u21i,u31
!DVM$&i,u41im1,u51im1,u51i,u41i,u21im1,u31im1),SHADOW_COMPUTE
do k = 2,nz - 1
do j = jst,jend
do i = ist,nx
tmp = rho_i(i,j,k)
u21i = tmp * u(2,i,j,k)
u31i = tmp * u(3,i,j,k)
u41i = tmp * u(4,i,j,k)
u51i = tmp * u(5,i,j,k)
tmp = rho_i(i - 1,j,k)
u21im1 = tmp * u(2,i - 1,j,k)
u31im1 = tmp * u(3,i - 1,j,k)
u41im1 = tmp * u(4,i - 1,j,k)
u51im1 = tmp * u(5,i - 1,j,k)
flux_br1(2,i,j,k) = 4.0d+00 / 3.0d+00 * tx3 * (u21i - u21
&im1)
flux_br1(3,i,j,k) = tx3 * (u31i - u31im1)
flux_br1(4,i,j,k) = tx3 * (u41i - u41im1)
flux_br1(5,i,j,k) = 0.50d+00 * (1.0d+00 - c1 * c5) * tx3
&* (u21i** 2 + u31i** 2 + u41i** 2 - (u21im1** 2 + u31im1** 2 + u41
&im1** 2)) + 1.0d+00 / 6.0d+00 * tx3 * (u21i** 2 - u21im1** 2) + c1
& * c5 * tx3 * (u51i - u51im1)
enddo
enddo
enddo
!DVM$ PARALLEL (k,j,i) ON rsd(*,i,j,k), PRIVATE (j,i,k)
do k = 2,nz - 1
do j = jst,jend
do i = ist,iend
rsd(1,i,j,k) = rsd(1,i,j,k) + dx1 * tx1 * (u(1,i - 1,j,k)
& - 2.0d+00 * u(1,i,j,k) + u(1,i + 1,j,k))
rsd(2,i,j,k) = rsd(2,i,j,k) + tx3 * c3 * c4 * (flux_br1(2
&,i + 1,j,k) - flux_br1(2,i,j,k)) + dx2 * tx1 * (u(2,i - 1,j,k) - 2
&.0d+00 * u(2,i,j,k) + u(2,i + 1,j,k))
rsd(3,i,j,k) = rsd(3,i,j,k) + tx3 * c3 * c4 * (flux_br1(3
&,i + 1,j,k) - flux_br1(3,i,j,k)) + dx3 * tx1 * (u(3,i - 1,j,k) - 2
&.0d+00 * u(3,i,j,k) + u(3,i + 1,j,k))
rsd(4,i,j,k) = rsd(4,i,j,k) + tx3 * c3 * c4 * (flux_br1(4
&,i + 1,j,k) - flux_br1(4,i,j,k)) + dx4 * tx1 * (u(4,i - 1,j,k) - 2
&.0d+00 * u(4,i,j,k) + u(4,i + 1,j,k))
rsd(5,i,j,k) = rsd(5,i,j,k) + tx3 * c3 * c4 * (flux_br1(5
&,i + 1,j,k) - flux_br1(5,i,j,k)) + dx5 * tx1 * (u(5,i - 1,j,k) - 2
&.0d+00 * u(5,i,j,k) + u(5,i + 1,j,k))
enddo
enddo
enddo
!---------------------------------------------------------------------
! Fourth-order dissipation
!---------------------------------------------------------------------
!DVM$ PARALLEL (k,j,m) ON rsd(m,2,j,k), PRIVATE (j,k,m)
do k = 2,nz - 1
do j = jst,jend
do m = 1,5
rsd(m,2,j,k) = rsd(m,2,j,k) - dssp * ((+(5.0d+00)) * u(m,
&2,j,k) - 4.0d+00 * u(m,3,j,k) + u(m,4,j,k))
enddo
enddo
enddo
!DVM$ PARALLEL (k,j,m) ON rsd(m,3,j,k), PRIVATE (j,k,m)
do k = 2,nz - 1
do j = jst,jend
do m = 1,5
rsd(m,3,j,k) = rsd(m,3,j,k) - dssp * ((-(4.0d+00)) * u(m,
&2,j,k) + 6.0d+00 * u(m,3,j,k) - 4.0d+00 * u(m,4,j,k) + u(m,5,j,k))
enddo
enddo
enddo
!DVM$ PARALLEL (k,j,i,m) ON rsd(m,i,j,k), PRIVATE (j,i,k,m)
do k = 2,nz - 1
do j = jst,jend
do i = 4,nx - 3
do m = 1,5
rsd(m,i,j,k) = rsd(m,i,j,k) - dssp * (u(m,i - 2,j,k) -
& 4.0d+00 * u(m,i - 1,j,k) + 6.0d+00 * u(m,i,j,k) - 4.0d+00 * u(m,i
& + 1,j,k) + u(m,i + 2,j,k))
enddo
enddo
enddo
enddo
!DVM$ PARALLEL (k,j,m) ON rsd(m,nx + -2,j,k), PRIVATE (j,k,m)
do k = 2,nz - 1
do j = jst,jend
do m = 1,5
rsd(m,nx - 2,j,k) = rsd(m,nx - 2,j,k) - dssp * (u(m,nx -
&4,j,k) - 4.0d+00 * u(m,nx - 3,j,k) + 6.0d+00 * u(m,nx - 2,j,k) - 4
&.0d+00 * u(m,nx - 1,j,k))
enddo
enddo
enddo
!DVM$ PARALLEL (k,j,m) ON rsd(m,nx + -1,j,k), PRIVATE (j,k,m)
do k = 2,nz - 1
do j = jst,jend
do m = 1,5
rsd(m,nx - 1,j,k) = rsd(m,nx - 1,j,k) - dssp * (u(m,nx -
&3,j,k) - 4.0d+00 * u(m,nx - 2,j,k) + 5.0d+00 * u(m,nx - 1,j,k))
enddo
enddo
enddo
!DVM$ end region
deallocate(flux_br1)
allocate(flux_br2(5,isiz1,2:isiz1 - 1,2:isiz3 - 1))
!---------------------------------------------------------------------
! eta-direction flux differences
!---------------------------------------------------------------------
!DVM$ region
!DVM$ PARALLEL (k,i,j) ON flux_br2(*,j,i,k), PRIVATE (j,i,k,q,u31),
!DVM$& SHADOW_COMPUTE
do k = 2,nz - 1
do i = ist,iend
do j = 1,ny
flux_br2(1,j,i,k) = u(3,i,j,k)
u31 = u(3,i,j,k) * rho_i(i,j,k)
q = qs(i,j,k)
flux_br2(2,j,i,k) = u(2,i,j,k) * u31
flux_br2(3,j,i,k) = u(3,i,j,k) * u31 + c2 * (u(5,i,j,k) -
& q)
flux_br2(4,j,i,k) = u(4,i,j,k) * u31
flux_br2(5,j,i,k) = (c1 * u(5,i,j,k) - c2 * q) * u31
enddo
enddo
enddo
!DVM$ PARALLEL (k,i,j,m) ON rsd(m,i,j,k), PRIVATE (j,i,k,m)
do k = 2,nz - 1
do i = ist,iend
do j = jst,jend
do m = 1,5
rsd(m,i,j,k) = rsd(m,i,j,k) - ty2 * (flux_br2(m,j + 1,
&i,k) - flux_br2(m,j - 1,i,k))
enddo
enddo
enddo
enddo
!DVM$ PARALLEL (k,i,j) ON flux_br2(*,j,i,k), PRIVATE (u51j,j,u31j,u41j,u
!DVM$&51jm1,u21j,i,tmp,u41jm1,u21jm1,u31jm1,k),SHADOW_COMPUTE
do k = 2,nz - 1
do i = ist,iend
do j = jst,ny
tmp = rho_i(i,j,k)
u21j = tmp * u(2,i,j,k)
u31j = tmp * u(3,i,j,k)
u41j = tmp * u(4,i,j,k)
u51j = tmp * u(5,i,j,k)
tmp = rho_i(i,j - 1,k)
u21jm1 = tmp * u(2,i,j - 1,k)
u31jm1 = tmp * u(3,i,j - 1,k)
u41jm1 = tmp * u(4,i,j - 1,k)
u51jm1 = tmp * u(5,i,j - 1,k)
flux_br2(2,j,i,k) = ty3 * (u21j - u21jm1)
flux_br2(3,j,i,k) = 4.0d+00 / 3.0d+00 * ty3 * (u31j - u31
&jm1)
flux_br2(4,j,i,k) = ty3 * (u41j - u41jm1)
flux_br2(5,j,i,k) = 0.50d+00 * (1.0d+00 - c1 * c5) * ty3
&* (u21j** 2 + u31j** 2 + u41j** 2 - (u21jm1** 2 + u31jm1** 2 + u41
&jm1** 2)) + 1.0d+00 / 6.0d+00 * ty3 * (u31j** 2 - u31jm1** 2) + c1
& * c5 * ty3 * (u51j - u51jm1)
enddo
enddo
enddo
!DVM$ PARALLEL (k,i,j) ON rsd(*,i,j,k), PRIVATE (j,i,k)
do k = 2,nz - 1
do i = ist,iend
do j = jst,jend
rsd(1,i,j,k) = rsd(1,i,j,k) + dy1 * ty1 * (u(1,i,j - 1,k)
& - 2.0d+00 * u(1,i,j,k) + u(1,i,j + 1,k))
rsd(2,i,j,k) = rsd(2,i,j,k) + ty3 * c3 * c4 * (flux_br2(2
&,j + 1,i,k) - flux_br2(2,j,i,k)) + dy2 * ty1 * (u(2,i,j - 1,k) - 2
&.0d+00 * u(2,i,j,k) + u(2,i,j + 1,k))
rsd(3,i,j,k) = rsd(3,i,j,k) + ty3 * c3 * c4 * (flux_br2(3
&,j + 1,i,k) - flux_br2(3,j,i,k)) + dy3 * ty1 * (u(3,i,j - 1,k) - 2
&.0d+00 * u(3,i,j,k) + u(3,i,j + 1,k))
rsd(4,i,j,k) = rsd(4,i,j,k) + ty3 * c3 * c4 * (flux_br2(4
&,j + 1,i,k) - flux_br2(4,j,i,k)) + dy4 * ty1 * (u(4,i,j - 1,k) - 2
&.0d+00 * u(4,i,j,k) + u(4,i,j + 1,k))
rsd(5,i,j,k) = rsd(5,i,j,k) + ty3 * c3 * c4 * (flux_br2(5
&,j + 1,i,k) - flux_br2(5,j,i,k)) + dy5 * ty1 * (u(5,i,j - 1,k) - 2
&.0d+00 * u(5,i,j,k) + u(5,i,j + 1,k))
enddo
enddo
enddo
!DVM$ PARALLEL (k,i,m) ON rsd(m,i,2,k), PRIVATE (i,k,m)
do k = 2,nz - 1
do i = ist,iend
do m = 1,5
rsd(m,i,2,k) = rsd(m,i,2,k) - dssp * ((+(5.0d+00)) * u(m,
&i,2,k) - 4.0d+00 * u(m,i,3,k) + u(m,i,4,k))
enddo
enddo
enddo
!DVM$ PARALLEL (k,i,m) ON rsd(m,i,3,k), PRIVATE (i,k,m)
do k = 2,nz - 1
do i = ist,iend
do m = 1,5
rsd(m,i,3,k) = rsd(m,i,3,k) - dssp * ((-(4.0d+00)) * u(m,
&i,2,k) + 6.0d+00 * u(m,i,3,k) - 4.0d+00 * u(m,i,4,k) + u(m,i,5,k))
enddo
enddo
enddo
!DVM$ PARALLEL (k,i,j,m) ON rsd(m,i,j,k), PRIVATE (j,i,k,m)
do k = 2,nz - 1
do i = ist,iend
do j = 4,ny - 3
do m = 1,5
rsd(m,i,j,k) = rsd(m,i,j,k) - dssp * (u(m,i,j - 2,k) -
& 4.0d+00 * u(m,i,j - 1,k) + 6.0d+00 * u(m,i,j,k) - 4.0d+00 * u(m,i
&,j + 1,k) + u(m,i,j + 2,k))
enddo
enddo
enddo
enddo
!DVM$ PARALLEL (k,i,m) ON rsd(m,i,ny + -2,k), PRIVATE (i,k,m)
do k = 2,nz - 1
do i = ist,iend
do m = 1,5
rsd(m,i,ny - 2,k) = rsd(m,i,ny - 2,k) - dssp * (u(m,i,ny
&- 4,k) - 4.0d+00 * u(m,i,ny - 3,k) + 6.0d+00 * u(m,i,ny - 2,k) - 4
&.0d+00 * u(m,i,ny - 1,k))
enddo
enddo
enddo
!DVM$ PARALLEL (k,i,m) ON rsd(m,i,ny + -1,k), PRIVATE (i,k,m)
do k = 2,nz - 1
do i = ist,iend
do m = 1,5
rsd(m,i,ny - 1,k) = rsd(m,i,ny - 1,k) - dssp * (u(m,i,ny
&- 3,k) - 4.0d+00 * u(m,i,ny - 2,k) + 5.0d+00 * u(m,i,ny - 1,k))
enddo
enddo
enddo
!DVM$ end region
deallocate(flux_br2)
allocate(utmp_br1(6,isiz3,2:isiz1 - 1,2:isiz2 - 1))
allocate(rtmp_br1(5,isiz3,2:isiz1 - 1,2:isiz2 - 1))
allocate(flux_br3(5,isiz1,2:isiz1 - 1,2:isiz2 - 1))
!---------------------------------------------------------------------
! zeta-direction flux differences
!---------------------------------------------------------------------
!DVM$ region
!DVM$ PARALLEL (j,i,k) ON utmp_br1(*,k,i,j), PRIVATE (j,i,k),
!DVM$& SHADOW_COMPUTE
do j = jst,jend
do i = ist,iend
do k = 1,nz
utmp_br1(1,k,i,j) = u(1,i,j,k)
utmp_br1(2,k,i,j) = u(2,i,j,k)
utmp_br1(3,k,i,j) = u(3,i,j,k)
utmp_br1(4,k,i,j) = u(4,i,j,k)
utmp_br1(5,k,i,j) = u(5,i,j,k)
utmp_br1(6,k,i,j) = rho_i(i,j,k)
enddo
enddo
enddo
!DVM$ PARALLEL (j,i,k) ON flux_br3(*,k,i,j), PRIVATE (j,i,k,q,u41),
!DVM$& SHADOW_COMPUTE
do j = jst,jend
do i = ist,iend
do k = 1,nz
flux_br3(1,k,i,j) = utmp_br1(4,k,i,j)
u41 = utmp_br1(4,k,i,j) * utmp_br1(6,k,i,j)
q = qs(i,j,k)
flux_br3(2,k,i,j) = utmp_br1(2,k,i,j) * u41
flux_br3(3,k,i,j) = utmp_br1(3,k,i,j) * u41
flux_br3(4,k,i,j) = utmp_br1(4,k,i,j) * u41 + c2 * (utmp_
&br1(5,k,i,j) - q)
flux_br3(5,k,i,j) = (c1 * utmp_br1(5,k,i,j) - c2 * q) * u
&41
enddo
enddo
enddo
!DVM$ PARALLEL (j,i,k,m) ON rtmp_br1(m,k,i,j), PRIVATE (j,i,k,m)
do j = jst,jend
do i = ist,iend
do k = 2,nz - 1
do m = 1,5
rtmp_br1(m,k,i,j) = rsd(m,i,j,k) - tz2 * (flux_br3(m,k
& + 1,i,j) - flux_br3(m,k - 1,i,j))
enddo
enddo
enddo
enddo
!DVM$ PARALLEL (j,i,k) ON flux_br3(*,k,i,j), PRIVATE (j,i,tmp,k,u21km1,u
!DVM$&41km1,u51k,u51km1,u31km1,u31k,u41k,u21k),SHADOW_COMPUTE
do j = jst,jend
do i = ist,iend
do k = 2,nz
tmp = utmp_br1(6,k,i,j)
u21k = tmp * utmp_br1(2,k,i,j)
u31k = tmp * utmp_br1(3,k,i,j)
u41k = tmp * utmp_br1(4,k,i,j)
u51k = tmp * utmp_br1(5,k,i,j)
tmp = utmp_br1(6,k - 1,i,j)
u21km1 = tmp * utmp_br1(2,k - 1,i,j)
u31km1 = tmp * utmp_br1(3,k - 1,i,j)
u41km1 = tmp * utmp_br1(4,k - 1,i,j)
u51km1 = tmp * utmp_br1(5,k - 1,i,j)
flux_br3(2,k,i,j) = tz3 * (u21k - u21km1)
flux_br3(3,k,i,j) = tz3 * (u31k - u31km1)
flux_br3(4,k,i,j) = 4.0d+00 / 3.0d+00 * tz3 * (u41k - u41
&km1)
flux_br3(5,k,i,j) = 0.50d+00 * (1.0d+00 - c1 * c5) * tz3
&* (u21k** 2 + u31k** 2 + u41k** 2 - (u21km1** 2 + u31km1** 2 + u41
&km1** 2)) + 1.0d+00 / 6.0d+00 * tz3 * (u41k** 2 - u41km1** 2) + c1
& * c5 * tz3 * (u51k - u51km1)
enddo
enddo
enddo
!DVM$ PARALLEL (j,i,k) ON rtmp_br1(*,k,i,j), PRIVATE (j,i,k)
do j = jst,jend
do i = ist,iend
do k = 2,nz - 1
rtmp_br1(1,k,i,j) = rtmp_br1(1,k,i,j) + dz1 * tz1 * (utmp
&_br1(1,k - 1,i,j) - 2.0d+00 * utmp_br1(1,k,i,j) + utmp_br1(1,k + 1
&,i,j))
rtmp_br1(2,k,i,j) = rtmp_br1(2,k,i,j) + tz3 * c3 * c4 * (
&flux_br3(2,k + 1,i,j) - flux_br3(2,k,i,j)) + dz2 * tz1 * (utmp_br1
&(2,k - 1,i,j) - 2.0d+00 * utmp_br1(2,k,i,j) + utmp_br1(2,k + 1,i,j
&))
rtmp_br1(3,k,i,j) = rtmp_br1(3,k,i,j) + tz3 * c3 * c4 * (
&flux_br3(3,k + 1,i,j) - flux_br3(3,k,i,j)) + dz3 * tz1 * (utmp_br1
&(3,k - 1,i,j) - 2.0d+00 * utmp_br1(3,k,i,j) + utmp_br1(3,k + 1,i,j
&))
rtmp_br1(4,k,i,j) = rtmp_br1(4,k,i,j) + tz3 * c3 * c4 * (
&flux_br3(4,k + 1,i,j) - flux_br3(4,k,i,j)) + dz4 * tz1 * (utmp_br1
&(4,k - 1,i,j) - 2.0d+00 * utmp_br1(4,k,i,j) + utmp_br1(4,k + 1,i,j
&))
rtmp_br1(5,k,i,j) = rtmp_br1(5,k,i,j) + tz3 * c3 * c4 * (
&flux_br3(5,k + 1,i,j) - flux_br3(5,k,i,j)) + dz5 * tz1 * (utmp_br1
&(5,k - 1,i,j) - 2.0d+00 * utmp_br1(5,k,i,j) + utmp_br1(5,k + 1,i,j
&))
enddo
enddo
enddo
!---------------------------------------------------------------------
! fourth-order dissipation
!---------------------------------------------------------------------
!DVM$ PARALLEL (j,i,m) ON rsd(m,i,j,2), PRIVATE (j,i,m)
do j = jst,jend
do i = ist,iend
do m = 1,5
rsd(m,i,j,2) = rtmp_br1(m,2,i,j) - dssp * ((+(5.0d+00)) *
& utmp_br1(m,2,i,j) - 4.0d+00 * utmp_br1(m,3,i,j) + utmp_br1(m,4,i,
&j))
enddo
enddo
enddo
!DVM$ PARALLEL (j,i,m) ON rsd(m,i,j,3), PRIVATE (j,i,m)
do j = jst,jend
do i = ist,iend
do m = 1,5
rsd(m,i,j,3) = rtmp_br1(m,3,i,j) - dssp * ((-(4.0d+00)) *
& utmp_br1(m,2,i,j) + 6.0d+00 * utmp_br1(m,3,i,j) - 4.0d+00 * utmp_
&br1(m,4,i,j) + utmp_br1(m,5,i,j))
enddo
enddo
enddo
!DVM$ PARALLEL (j,i,k,m) ON rsd(m,i,j,k), PRIVATE (j,i,k,m)
do j = jst,jend
do i = ist,iend
do k = 4,nz - 3
do m = 1,5
rsd(m,i,j,k) = rtmp_br1(m,k,i,j) - dssp * (utmp_br1(m,
&k - 2,i,j) - 4.0d+00 * utmp_br1(m,k - 1,i,j) + 6.0d+00 * utmp_br1(
&m,k,i,j) - 4.0d+00 * utmp_br1(m,k + 1,i,j) + utmp_br1(m,k + 2,i,j)
&)
enddo
enddo
enddo
enddo
!DVM$ PARALLEL (j,i,m) ON rsd(m,i,j,nz + -2), PRIVATE (j,i,m)
do j = jst,jend
do i = ist,iend
do m = 1,5
rsd(m,i,j,nz - 2) = rtmp_br1(m,nz - 2,i,j) - dssp * (utmp
&_br1(m,nz - 4,i,j) - 4.0d+00 * utmp_br1(m,nz - 3,i,j) + 6.0d+00 *
&utmp_br1(m,nz - 2,i,j) - 4.0d+00 * utmp_br1(m,nz - 1,i,j))
enddo
enddo
enddo
!DVM$ PARALLEL (j,i,m) ON rsd(m,i,j,nz + -1), PRIVATE (j,i,m)
do j = jst,jend
do i = ist,iend
do m = 1,5
rsd(m,i,j,nz - 1) = rtmp_br1(m,nz - 1,i,j) - dssp * (utmp
&_br1(m,nz - 3,i,j) - 4.0d+00 * utmp_br1(m,nz - 2,i,j) + 5.0d+00 *
&utmp_br1(m,nz - 1,i,j))
enddo
enddo
enddo
!DVM$ end region
deallocate(flux_br3)
deallocate(rtmp_br1)
deallocate(utmp_br1)
if (timeron) call timer_stop(t_rhs)
!DVM$ end interval
return
end

View File

@@ -0,0 +1,415 @@
c---------------------------------------------------------------------
c---------------------------------------------------------------------
subroutine rhs
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c compute the right hand sides
c---------------------------------------------------------------------
implicit none
include 'applu.incl'
c---------------------------------------------------------------------
c local variables
c---------------------------------------------------------------------
integer i, j, k, m, p
double precision q
double precision tmp, utmp(6,isiz3), rtmp(5,isiz3)
double precision u21, u31, u41
double precision u21i, u31i, u41i, u51i
double precision u21j, u31j, u41j, u51j
double precision u21k, u31k, u41k, u51k
double precision u21im1, u31im1, u41im1, u51im1
double precision u21jm1, u31jm1, u41jm1, u51jm1
double precision u21km1, u31km1, u41km1, u51km1
double precision flu(5,-1:1)
if (timeron) call timer_start(t_rhs)
!DVM$ PARALLEL (k,j,i) ON qs(i,j,k), PRIVATE (j,i,tmp,k,m)
!DVM$&,SHADOW_COMPUTE,SHADOW_RENEW (u)
do k = 1, nz
do j = 1, ny
do i = 1, nx
do m = 1, 5
rsd(m,i,j,k) = - frct(m,i,j,k)
end do
tmp = 1.0d+00 / u(1,i,j,k)
rho_i(i,j,k) = tmp
qs(i,j,k) = 0.50d+00 * ( u(2,i,j,k) * u(2,i,j,k)
> + u(3,i,j,k) * u(3,i,j,k)
> + u(4,i,j,k) * u(4,i,j,k) )
> * tmp
end do
end do
end do
! if (timeron) call timer_start(t_rhsx)
!DVM$ PARALLEL (k,j,i) on rsd(*,i,j,k)
do k = 2, nz - 1
do j = jst, jend
do i = ist, iend
do p = -1, 1, 2
flu(1,p) = u(2,i+p,j,k)
u21 = u(2,i+p,j,k) * rho_i(i+p,j,k)
q = qs(i+p,j,k)
flu(2,p) = u(2,i+p,j,k) * u21 + c2 *
> ( u(5,i+p,j,k) - q )
flu(3,p) = u(3,i+p,j,k) * u21
flu(4,p) = u(4,i+p,j,k) * u21
flu(5,p) = ( c1 * u(5,i+p,j,k) - c2 * q ) * u21
end do
do m = 1, 5
rsd(m,i,j,k) = rsd(m,i,j,k)
> - tx2 * ( flu(m,1) - flu(m,-1) )
end do
do p = 0, 1
tmp = rho_i(i+p,j,k)
u21i = tmp * u(2,i+p,j,k)
u31i = tmp * u(3,i+p,j,k)
u41i = tmp * u(4,i+p,j,k)
u51i = tmp * u(5,i+p,j,k)
tmp = rho_i(i-1+p,j,k)
u21im1 = tmp * u(2,i-1+p,j,k)
u31im1 = tmp * u(3,i-1+p,j,k)
u41im1 = tmp * u(4,i-1+p,j,k)
u51im1 = tmp * u(5,i-1+p,j,k)
flu(2,p) = (4.0d+00/3.0d+00) * tx3 * (u21i-u21im1)
flu(3,p) = tx3 * ( u31i - u31im1 )
flu(4,p) = tx3 * ( u41i - u41im1 )
flu(5,p) = 0.50d+00 * ( 1.0d+00 - c1*c5)
> * tx3 * ( ( u21i **2 + u31i **2+u41i **2)
> - ( u21im1**2 + u31im1**2+u41im1**2))
> + (1.0d+00/6.0d+00)
> * tx3 * ( u21i**2 - u21im1**2 )
> + c1 * c5 * tx3 * ( u51i - u51im1 )
enddo
rsd(1,i,j,k) = rsd(1,i,j,k)
> + dx1 * tx1 * ( u(1,i-1,j,k)
> - 2.0d+00 * u(1,i,j,k)
> + u(1,i+1,j,k) )
rsd(2,i,j,k) = rsd(2,i,j,k)
> + tx3 * c3 * c4 * ( flu(2,1) - flu(2,0) )
> + dx2 * tx1 * ( u(2,i-1,j,k)
> - 2.0d+00 * u(2,i,j,k)
> + u(2,i+1,j,k) )
rsd(3,i,j,k) = rsd(3,i,j,k)
> + tx3 * c3 * c4 * ( flu(3,1) - flu(3,0) )
> + dx3 * tx1 * ( u(3,i-1,j,k)
> - 2.0d+00 * u(3,i,j,k)
> + u(3,i+1,j,k) )
rsd(4,i,j,k) = rsd(4,i,j,k)
> + tx3 * c3 * c4 * ( flu(4,1) - flu(4,0) )
> + dx4 * tx1 * ( u(4,i-1,j,k)
> - 2.0d+00 * u(4,i,j,k)
> + u(4,i+1,j,k) )
rsd(5,i,j,k) = rsd(5,i,j,k)
> + tx3 * c3 * c4 * ( flu(5,1) - flu(5,0) )
> + dx5 * tx1 * ( u(5,i-1,j,k)
> - 2.0d+00 * u(5,i,j,k)
> + u(5,i+1,j,k) )
if (i .eq. 2)then
do m = 1, 5
rsd(m,2,j,k) = rsd(m,2,j,k)
> - dssp * ( + 5.0d+00 * u(m,2,j,k)
> - 4.0d+00 * u(m,3,j,k)
> + u(m,4,j,k) )
enddo
else if (i .eq. 3)then
do m = 1, 5
rsd(m,3,j,k) = rsd(m,3,j,k)
> - dssp * ( - 4.0d+00 * u(m,2,j,k)
> + 6.0d+00 * u(m,3,j,k)
> - 4.0d+00 * u(m,4,j,k)
> + u(m,5,j,k) )
enddo
else if (i .eq. nx-2)then
do m = 1, 5
rsd(m,nx-2,j,k) = rsd(m,nx-2,j,k)
> - dssp * ( u(m,nx-4,j,k)
> - 4.0d+00 * u(m,nx-3,j,k)
> + 6.0d+00 * u(m,nx-2,j,k)
> - 4.0d+00 * u(m,nx-1,j,k) )
enddo
else if (i .eq. nx-1)then
do m = 1, 5
rsd(m,nx-1,j,k) = rsd(m,nx-1,j,k)
> - dssp * ( u(m,nx-3,j,k)
> - 4.0d+00 * u(m,nx-2,j,k)
> + 5.0d+00 * u(m,nx-1,j,k) )
enddo
else
do m = 1, 5
rsd(m,i,j,k) = rsd(m,i,j,k)
> - dssp * ( u(m,i-2,j,k)
> - 4.0d+00 * u(m,i-1,j,k)
> + 6.0d+00 * u(m,i,j,k)
> - 4.0d+00 * u(m,i+1,j,k)
> + u(m,i+2,j,k) )
end do
endif
! end do
! end do
! end do
! if (timeron) call timer_stop(t_rhsx)
! if (timeron) call timer_start(t_rhsy)
! do k = 2, nz - 1
! do j = jst, jend
! do i = ist, iend
do p = -1, 1, 2
flu(1,p) = u(3,i,j+p,k)
u31 = u(3,i,j+p,k) * rho_i(i,j+p,k)
q = qs(i,j+p,k)
flu(2,p) = u(2,i,j+p,k) * u31
flu(3,p) = u(3,i,j+p,k) * u31 + c2 * (u(5,i,j+p,k)-q)
flu(4,p) = u(4,i,j+p,k) * u31
flu(5,p) = ( c1 * u(5,i,j+p,k) - c2 * q ) * u31
end do
do m = 1, 5
rsd(m,i,j,k) = rsd(m,i,j,k)
> - ty2 * ( flu(m,1) - flu(m,-1) )
end do
do p = 0, 1
tmp = rho_i(i,j+p,k)
u21j = tmp * u(2,i,j+p,k)
u31j = tmp * u(3,i,j+p,k)
u41j = tmp * u(4,i,j+p,k)
u51j = tmp * u(5,i,j+p,k)
tmp = rho_i(i,j-1+p,k)
u21jm1 = tmp * u(2,i,j-1+p,k)
u31jm1 = tmp * u(3,i,j-1+p,k)
u41jm1 = tmp * u(4,i,j-1+p,k)
u51jm1 = tmp * u(5,i,j-1+p,k)
flu(2,p) = ty3 * ( u21j - u21jm1 )
flu(3,p) = (4.0d+00/3.0d+00) * ty3 * (u31j-u31jm1)
flu(4,p) = ty3 * ( u41j - u41jm1 )
flu(5,p) = 0.50d+00 * ( 1.0d+00 - c1*c5 )
> * ty3 * ( ( u21j **2 + u31j **2 + u41j **2 )
> - ( u21jm1**2 + u31jm1**2 + u41jm1**2 ) )
> + (1.0d+00/6.0d+00)
> * ty3 * ( u31j**2 - u31jm1**2 )
> + c1 * c5 * ty3 * ( u51j - u51jm1 )
enddo
rsd(1,i,j,k) = rsd(1,i,j,k)
> + dy1 * ty1 * ( u(1,i,j-1,k)
> - 2.0d+00 * u(1,i,j,k)
> + u(1,i,j+1,k) )
rsd(2,i,j,k) = rsd(2,i,j,k)
> + ty3 * c3 * c4 * ( flu(2,1) - flu(2,0) )
> + dy2 * ty1 * ( u(2,i,j-1,k)
> - 2.0d+00 * u(2,i,j,k)
> + u(2,i,j+1,k) )
rsd(3,i,j,k) = rsd(3,i,j,k)
> + ty3 * c3 * c4 * ( flu(3,1) - flu(3,0) )
> + dy3 * ty1 * ( u(3,i,j-1,k)
> - 2.0d+00 * u(3,i,j,k)
> + u(3,i,j+1,k) )
rsd(4,i,j,k) = rsd(4,i,j,k)
> + ty3 * c3 * c4 * ( flu(4,1) - flu(4,0) )
> + dy4 * ty1 * ( u(4,i,j-1,k)
> - 2.0d+00 * u(4,i,j,k)
> + u(4,i,j+1,k) )
rsd(5,i,j,k) = rsd(5,i,j,k)
> + ty3 * c3 * c4 * ( flu(5,1) - flu(5,0) )
> + dy5 * ty1 * ( u(5,i,j-1,k)
> - 2.0d+00 * u(5,i,j,k)
> + u(5,i,j+1,k) )
if (j .eq. 2) then
do m = 1, 5
rsd(m,i,2,k) = rsd(m,i,2,k)
> - dssp * ( + 5.0d+00 * u(m,i,2,k)
> - 4.0d+00 * u(m,i,3,k)
> + u(m,i,4,k) )
enddo
elseif (j .eq. 3) then
do m = 1, 5
rsd(m,i,3,k) = rsd(m,i,3,k)
> - dssp * ( - 4.0d+00 * u(m,i,2,k)
> + 6.0d+00 * u(m,i,3,k)
> - 4.0d+00 * u(m,i,4,k)
> + u(m,i,5,k) )
end do
elseif (j .eq. ny-2) then
do m = 1, 5
rsd(m,i,ny-2,k) = rsd(m,i,ny-2,k)
> - dssp * ( u(m,i,ny-4,k)
> - 4.0d+00 * u(m,i,ny-3,k)
> + 6.0d+00 * u(m,i,ny-2,k)
> - 4.0d+00 * u(m,i,ny-1,k) )
enddo
elseif (j .eq. ny-1) then
do m = 1, 5
rsd(m,i,ny-1,k) = rsd(m,i,ny-1,k)
> - dssp * ( u(m,i,ny-3,k)
> - 4.0d+00 * u(m,i,ny-2,k)
> + 5.0d+00 * u(m,i,ny-1,k) )
end do
else
do m = 1, 5
rsd(m,i,j,k) = rsd(m,i,j,k)
> - dssp * ( u(m,i,j-2,k)
> - 4.0d+00 * u(m,i,j-1,k)
> + 6.0d+00 * u(m,i,j,k)
> - 4.0d+00 * u(m,i,j+1,k)
> + u(m,i,j+2,k) )
end do
endif
! end do
! end do
! end do
! if (timeron) call timer_stop(t_rhsy)
! if (timeron) call timer_start(t_rhsz)
! do k = 2, nz - 1
! do j = jst, jend
! do i = ist, iend
do p=-1,1,2
flu(1,p) = u(4,i,j,k+p)
u41 = u(4,i,j,k+p) * rho_i(i,j,k+p)
q = qs(i,j,k+p)
flu(2,p) = u(2,i,j,k+p) * u41
flu(3,p) = u(3,i,j,k+p) * u41
flu(4,p) = u(4,i,j,k+p) * u41 + c2 * (u(5,i,j,k+p)-q)
flu(5,p) = ( c1 * u(5,i,j,k+p) - c2 * q ) * u41
enddo
do m = 1, 5
rsd(m,i,j,k) = rsd(m,i,j,k)
> - tz2 * ( flu(m,1) - flu(m,-1) )
end do
do p=0,1
tmp = rho_i(i,j,k+p)
u21k = tmp * u(2,i,j,k+p)
u31k = tmp * u(3,i,j,k+p)
u41k = tmp * u(4,i,j,k+p)
u51k = tmp * u(5,i,j,k+p)
tmp = rho_i(i,j,k-1+p)
u21km1 = tmp * u(2,i,j,k-1+p)
u31km1 = tmp * u(3,i,j,k-1+p)
u41km1 = tmp * u(4,i,j,k-1+p)
u51km1 = tmp * u(5,i,j,k-1+p)
flu(2,p) = tz3 * ( u21k - u21km1 )
flu(3,p) = tz3 * ( u31k - u31km1 )
flu(4,p) = (4.0d+00/3.0d+00) * tz3 * (u41k-u41km1)
flu(5,p) = 0.50d+00 * ( 1.0d+00 - c1*c5 )
> * tz3 * ( ( u21k **2 + u31k **2 + u41k **2 )
> - ( u21km1**2 + u31km1**2 + u41km1**2 ) )
> + (1.0d+00/6.0d+00)
> * tz3 * ( u41k**2 - u41km1**2 )
> + c1 * c5 * tz3 * ( u51k - u51km1 )
enddo
rsd(1,i,j,k) = rsd(1,i,j,k)
> + dz1 * tz1 * ( u(1,i,j,k-1)
> - 2.0d+00 * u(1,i,j,k)
> + u(1,i,j,k+1) )
rsd(2,i,j,k) = rsd(2,i,j,k)
> + tz3 * c3 * c4 * ( flu(2,1) - flu(2,0) )
> + dz2 * tz1 * ( u(2,i,j,k-1)
> - 2.0d+00 * u(2,i,j,k)
> + u(2,i,j,k+1) )
rsd(3,i,j,k) = rsd(3,i,j,k)
> + tz3 * c3 * c4 * ( flu(3,1) - flu(3,0) )
> + dz3 * tz1 * ( u(3,i,j,k-1)
> - 2.0d+00 * u(3,i,j,k)
> + u(3,i,j,k+1) )
rsd(4,i,j,k) = rsd(4,i,j,k)
> + tz3 * c3 * c4 * ( flu(4,1) - flu(4,0) )
> + dz4 * tz1 * ( u(4,i,j,k-1)
> - 2.0d+00 * u(4,i,j,k)
> + u(4,i,j,k+1) )
rsd(5,i,j,k) = rsd(5,i,j,k)
> + tz3 * c3 * c4 * ( flu(5,1) - flu(5,0) )
> + dz5 * tz1 * ( u(5,i,j,k-1)
> - 2.0d+00 * u(5,i,j,k)
> + u(5,i,j,k+1) )
if (k .eq. 2) then
do m = 1, 5
rsd(m,i,j,2) = rsd(m,i,j,2)
> - dssp * ( + 5.0d+00 * u(m,i,j,2)
> - 4.0d+00 * u(m,i,j,3)
> + u(m,i,j,4) )
end do
elseif (k .eq. 3) then
do m = 1, 5
rsd(m,i,j,3) = rsd(m,i,j,3)
> - dssp * ( - 4.0d+00 * u(m,i,j,2)
> + 6.0d+00 * u(m,i,j,3)
> - 4.0d+00 * u(m,i,j,4)
> + u(m,i,j,5) )
end do
elseif (k .eq. nz-2) then
do m = 1, 5
rsd(m,i,j,nz-2) = rsd(m,i,j,nz-2)
> - dssp * ( u(m,i,j,nz-4)
> - 4.0d+00 * u(m,i,j,nz-3)
> + 6.0d+00 * u(m,i,j,nz-2)
> - 4.0d+00 * u(m,i,j,nz-1) )
end do
elseif (k .eq. nz-1) then
do m = 1, 5
rsd(m,i,j,nz-1) = rsd(m,i,j,nz-1)
> - dssp * ( u(m,i,j,nz-3)
> - 4.0d+00 * u(m,i,j,nz-2)
> + 5.0d+00 * u(m,i,j,nz-1) )
end do
else
do m = 1, 5
rsd(m,i,j,k) = rsd(m,i,j,k)
> - dssp * ( u(m,i,j,k-2)
> - 4.0d+00 * u(m,i,j,k-1)
> + 6.0d+00 * u(m,i,j,k)
> - 4.0d+00 * u(m,i,j,k+1)
> + u(m,i,j,k+2) )
end do
endif
end do
end do
end do
! if (timeron) call timer_stop(t_rhsz)
if (timeron) call timer_stop(t_rhs)
return
end

View File

@@ -0,0 +1,104 @@
! *** generated by SAPFOR with version 1124 and build date: Apr 18 2019 20:56:38
!---------------------------------------------------------------------
!---------------------------------------------------------------------
subroutine setbv ()
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! set the boundary values of dependent variables
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! local variables
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!--- applu.incl
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! npbparams.h defines parameters that depend on the class and
! number of nodes
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! end of include file
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!--- applu.incl
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! npbparams.h defines parameters that depend on the class and
! number of nodes
!---------------------------------------------------------------------
implicit none
!---------------------------------------------------------------------
! end of include file
include 'applu.incl'
!---------------------------------------------------------------------
integer i,j,k,m
double precision temp1(5),temp2(5)
!---------------------------------------------------------------------
! set the dependent variable values along the top and bottom faces
!---------------------------------------------------------------------
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0_r1(iEX0,iEX1,iEX2,iE
!DVM$&X3) :: u
!DVM$ PARALLEL (j,i) ON u(*,i,j,*), PRIVATE (m,i,j,temp2,temp1)
do j = 1,ny
do i = 1,nx
call exact(i,j,1,temp1)
call exact(i,j,nz,temp2)
do m = 1,5
u(m,i,j,1) = temp1(m)
u(m,i,j,nz) = temp2(m)
enddo
enddo
enddo
!---------------------------------------------------------------------
! set the dependent variable values along north and south faces
!---------------------------------------------------------------------
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0(iEX0,iEX1,iEX2,iEX3)
!DVM$& :: u
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0_r2(iEX0,iEX1,iEX2,iE
!DVM$&X3) :: u
!DVM$ PARALLEL (k,i) ON u(*,i,*,k), PRIVATE (m,i,temp2,k,temp1)
do k = 1,nz
do i = 1,nx
call exact(i,1,k,temp1)
call exact(i,ny,k,temp2)
do m = 1,5
u(m,i,1,k) = temp1(m)
u(m,i,ny,k) = temp2(m)
enddo
enddo
enddo
!---------------------------------------------------------------------
! set the dependent variable values along east and west faces
!---------------------------------------------------------------------
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0(iEX0,iEX1,iEX2,iEX3)
!DVM$& :: u
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0_r3(iEX0,iEX1,iEX2,iE
!DVM$&X3) :: u
!DVM$ PARALLEL (k,j) ON u(*,*,j,k), PRIVATE (m,j,temp2,k,temp1)
do k = 1,nz
do j = 1,ny
call exact(1,j,k,temp1)
call exact(nx,j,k,temp2)
do m = 1,5
u(m,1,j,k) = temp1(m)
u(m,nx,j,k) = temp2(m)
enddo
enddo
enddo
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0(iEX0,iEX1,iEX2,iEX3)
!DVM$& :: u
return
end

View File

@@ -0,0 +1,166 @@
! *** generated by SAPFOR with version 1124 and build date: Apr 18 2019 20:56:38
!---------------------------------------------------------------------
!---------------------------------------------------------------------
subroutine setcoeff ()
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! local variables
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! set up coefficients
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!--- applu.incl
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! npbparams.h defines parameters that depend on the class and
! number of nodes
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! end of include file
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!--- applu.incl
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! npbparams.h defines parameters that depend on the class and
! number of nodes
!---------------------------------------------------------------------
implicit none
!---------------------------------------------------------------------
! end of include file
include 'applu.incl'
!---------------------------------------------------------------------
dxi = 1.0d+00 / (nx0 - 1)
deta = 1.0d+00 / (ny0 - 1)
dzeta = 1.0d+00 / (nz0 - 1)
tx1 = 1.0d+00 / (dxi * dxi)
tx2 = 1.0d+00 / (2.0d+00 * dxi)
tx3 = 1.0d+00 / dxi
ty1 = 1.0d+00 / (deta * deta)
ty2 = 1.0d+00 / (2.0d+00 * deta)
ty3 = 1.0d+00 / deta
tz1 = 1.0d+00 / (dzeta * dzeta)
tz2 = 1.0d+00 / (2.0d+00 * dzeta)
tz3 = 1.0d+00 / dzeta
!---------------------------------------------------------------------
! diffusion coefficients
!---------------------------------------------------------------------
dx1 = 0.75d+00
dx2 = dx1
dx3 = dx1
dx4 = dx1
dx5 = dx1
dy1 = 0.75d+00
dy2 = dy1
dy3 = dy1
dy4 = dy1
dy5 = dy1
dz1 = 1.00d+00
dz2 = dz1
dz3 = dz1
dz4 = dz1
dz5 = dz1
!---------------------------------------------------------------------
! fourth difference dissipation
!---------------------------------------------------------------------
dssp = max (dx1,dy1,dz1) / 4.0d+00
!---------------------------------------------------------------------
! coefficients of the exact solution to the first pde
!---------------------------------------------------------------------
ce(1,1) = 2.0d+00
ce(1,2) = 0.0d+00
ce(1,3) = 0.0d+00
ce(1,4) = 4.0d+00
ce(1,5) = 5.0d+00
ce(1,6) = 3.0d+00
ce(1,7) = 5.0d-01
ce(1,8) = 2.0d-02
ce(1,9) = 1.0d-02
ce(1,10) = 3.0d-02
ce(1,11) = 5.0d-01
ce(1,12) = 4.0d-01
ce(1,13) = 3.0d-01
!---------------------------------------------------------------------
! coefficients of the exact solution to the second pde
!---------------------------------------------------------------------
ce(2,1) = 1.0d+00
ce(2,2) = 0.0d+00
ce(2,3) = 0.0d+00
ce(2,4) = 0.0d+00
ce(2,5) = 1.0d+00
ce(2,6) = 2.0d+00
ce(2,7) = 3.0d+00
ce(2,8) = 1.0d-02
ce(2,9) = 3.0d-02
ce(2,10) = 2.0d-02
ce(2,11) = 4.0d-01
ce(2,12) = 3.0d-01
ce(2,13) = 5.0d-01
!---------------------------------------------------------------------
! coefficients of the exact solution to the third pde
!---------------------------------------------------------------------
ce(3,1) = 2.0d+00
ce(3,2) = 2.0d+00
ce(3,3) = 0.0d+00
ce(3,4) = 0.0d+00
ce(3,5) = 0.0d+00
ce(3,6) = 2.0d+00
ce(3,7) = 3.0d+00
ce(3,8) = 4.0d-02
ce(3,9) = 3.0d-02
ce(3,10) = 5.0d-02
ce(3,11) = 3.0d-01
ce(3,12) = 5.0d-01
ce(3,13) = 4.0d-01
!---------------------------------------------------------------------
! coefficients of the exact solution to the fourth pde
!---------------------------------------------------------------------
ce(4,1) = 2.0d+00
ce(4,2) = 2.0d+00
ce(4,3) = 0.0d+00
ce(4,4) = 0.0d+00
ce(4,5) = 0.0d+00
ce(4,6) = 2.0d+00
ce(4,7) = 3.0d+00
ce(4,8) = 3.0d-02
ce(4,9) = 5.0d-02
ce(4,10) = 4.0d-02
ce(4,11) = 2.0d-01
ce(4,12) = 1.0d-01
ce(4,13) = 3.0d-01
!---------------------------------------------------------------------
! coefficients of the exact solution to the fifth pde
!---------------------------------------------------------------------
ce(5,1) = 5.0d+00
ce(5,2) = 4.0d+00
ce(5,3) = 3.0d+00
ce(5,4) = 2.0d+00
ce(5,5) = 1.0d-01
ce(5,6) = 4.0d-01
ce(5,7) = 3.0d-01
ce(5,8) = 5.0d-02
ce(5,9) = 4.0d-02
ce(5,10) = 3.0d-02
ce(5,11) = 1.0d-01
ce(5,12) = 3.0d-01
ce(5,13) = 2.0d-01
return
end

View File

@@ -0,0 +1,82 @@
! *** generated by SAPFOR with version 1124 and build date: Apr 18 2019 20:56:38
!---------------------------------------------------------------------
!---------------------------------------------------------------------
subroutine setiv ()
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!
! set the initial values of independent variables based on tri-linear
! interpolation of boundary values in the computational space.
!
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! local variables
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!--- applu.incl
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! npbparams.h defines parameters that depend on the class and
! number of nodes
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! end of include file
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!--- applu.incl
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! npbparams.h defines parameters that depend on the class and
! number of nodes
!---------------------------------------------------------------------
implicit none
!---------------------------------------------------------------------
! end of include file
include 'applu.incl'
!---------------------------------------------------------------------
integer i,j,k,m
double precision xi,eta,zeta
double precision pxi,peta,pzeta
double precision ue_1jk(5),ue_nx0jk(5),ue_i1k(5),ue_iny0k(5),ue_i
&j1(5),ue_ijnz(5)
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0_r0(iEX0,iEX1,iEX2,iE
!DVM$&X3) :: u
!DVM$ PARALLEL (k) ON u(*,*,*,k), PRIVATE (xi,m,peta,pxi,pzeta,i,j,eta,u
!DVM$&e_ij1,zeta,ue_i1k,ue_iny0k,k,ue_1jk,ue_nx0jk,ue_ijnz)
do k = 2,nz - 1
zeta = dble (k - 1) / (nz - 1)
do j = 2,ny - 1
eta = dble (j - 1) / (ny0 - 1)
do i = 2,nx - 1
xi = dble (i - 1) / (nx0 - 1)
call exact(1,j,k,ue_1jk)
call exact(nx0,j,k,ue_nx0jk)
call exact(i,1,k,ue_i1k)
call exact(i,ny0,k,ue_iny0k)
call exact(i,j,1,ue_ij1)
call exact(i,j,nz,ue_ijnz)
do m = 1,5
pxi = (1.0d+00 - xi) * ue_1jk(m) + xi * ue_nx0jk(m)
peta = (1.0d+00 - eta) * ue_i1k(m) + eta * ue_iny0k(m)
pzeta = (1.0d+00 - zeta) * ue_ij1(m) + zeta * ue_ijnz(
&m)
u(m,i,j,k) = pxi + peta + pzeta - pxi * peta - peta *
&pzeta - pzeta * pxi + pxi * peta * pzeta
enddo
enddo
enddo
enddo
!DVM$ REALIGN (iEX0,iEX1,iEX2,iEX3) WITH dvmh_temp0(iEX0,iEX1,iEX2,iEX3)
!DVM$& :: u
return
end

View File

@@ -0,0 +1,765 @@
! *** generated by SAPFOR with version 1124 and build date: Apr 18 2019 20:56:38
!---------------------------------------------------------------------
!---------------------------------------------------------------------
subroutine ssor (niter)
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! to perform pseudo-time stepping SSOR iterations
! for five nonlinear pde's.
!---------------------------------------------------------------------
implicit none
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!--- applu.incl
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! npbparams.h defines parameters that depend on the class and
! number of nodes
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! end of include file
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!--- applu.incl
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! npbparams.h defines parameters that depend on the class and
! number of nodes
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! end of include file
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!--- applu.incl
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! npbparams.h defines parameters that depend on the class and
! number of nodes
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! end of include file
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!--- applu.incl
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! npbparams.h defines parameters that depend on the class and
! number of nodes
!---------------------------------------------------------------------
integer :: niter
!---------------------------------------------------------------------
! end of include file
INCLUDE 'applu.incl'
!---------------------------------------------------------------------
!---------------------------------------------------------------------
integer :: i,j,k,m,n
integer :: istep
double precision :: tmp,tv(5),d_(5,5),a_(5,5),b_(5,5),c_(5,5)
double precision :: delunm(5),rs(5)
external timer_read
double precision :: timer_read
integer :: mod_522_2
integer :: mod_522_1
integer :: mod_522_0
double precision :: tmp3
double precision :: tmp2
double precision :: tmp1
double precision :: c34
double precision :: c1345
double precision :: r43
integer :: j__3
integer :: i__4
integer :: d_15_14
integer :: d_15_13
integer :: ldx_14_12
integer :: ldx_14_11
integer :: ldy_13_10
integer :: ldy_13_9
integer :: ldz_12_8
integer :: ldz_12_7
integer :: v_11_6
integer :: v_11_5
double precision :: tv__15(5)
double precision :: tmat(5,5)
double precision :: tmp1__16
double precision :: tmp__17
integer :: m__18
integer :: j__19
integer :: i__20
double precision :: tmp3__21
double precision :: tmp2__22
double precision :: tmp1__23
double precision :: c34__24
double precision :: c1345__25
double precision :: r43__26
integer :: j__27
integer :: i__28
integer :: udz_43_38
integer :: udz_43_37
integer :: udy_42_36
integer :: udy_42_35
integer :: udx_41_34
integer :: udx_41_33
integer :: d_40_32
integer :: d_40_31
integer :: v_38_30
integer :: v_38_29
double precision :: tmat__39(5,5)
double precision :: tmp1__40
double precision :: tmp__41
integer :: m__42
integer :: j__43
integer :: i__44
!---------------------------------------------------------------------
! begin pseudo-time stepping iterations
!---------------------------------------------------------------------
tmp = 1.0d+00 / (omega * (2.0d+00 - omega))
do i = 1,11
call timer_clear(i)
enddo
!---------------------------------------------------------------------
! compute the steady-state residuals
!---------------------------------------------------------------------
call rhs()
!---------------------------------------------------------------------
! compute the L2 norms of newton iteration residuals
!---------------------------------------------------------------------
call l2norm(isiz1,isiz2,isiz3,nx0,ny0,nz0,ist,iend,jst,jend,rsd,rs
&dnm)
! if ( ipr .eq. 1 ) then
! write (*,*) ' Initial residual norms'
! write (*,*)
! write (*,1007) ( rsdnm(m), m = 1, 5 )
! write (*,'(/a)') 'Iteration RMS-residual of 5th PDE'
! end if
do i = 1,11
call timer_clear(i)
enddo
call timer_start(1)
!---------------------------------------------------------------------
! the timestep loop
!---------------------------------------------------------------------
do istep = 1,niter
mod_522_0 = mod (istep,20)
! if ( ( mod ( istep, inorm ) .eq. 0 ) .and.
! > ipr .eq. 1 ) then
! write ( *, 1001 ) istep
! end if
if (mod_522_0 .eq. 0 .or. istep .eq. itmax .or. istep .eq. 1) t
&hen
if (niter .gt. 1) write (unit = *,fmt = 200) istep
200 FORMAT(' Time step ', I4)
endif
!---------------------------------------------------------------------
! perform SSOR iteration
!---------------------------------------------------------------------
if (timeron) then
call timer_start(5)
endif
!DVM$ INTERVAL 22
if (timeron) then
call timer_stop(5)
endif
r43 = 4.0d+00 / 3.0d+00
c1345 = c1 * c3 * c4 * c5
c34 = c3 * c4
r43__26 = 4.0d+00 / 3.0d+00
c1345__25 = c1 * c3 * c4 * c5
c34__24 = c3 * c4
!DVM$ REGION
!DVM$ PARALLEL (k,j,i) ON rsd(*,i,j,k), PRIVATE (tmp3,tmp1,tmp2,tmat,
!DVM$&k,tv,rs,rmk,rmj,rmi,ro),ACROSS (rsd(0:0,1:0,1:0,1:0)),CUDA_BLOCK (
!DVM$&16,16)
do k = 2,nz - 1
do j = jst,jend
do i = ist,iend
rmk = 1.0d+00 / u(1,i,j,k - 1)
rmj = 1.0d+00 / u(1,i,j - 1,k)
rmi = 1.0d+00 / u(1,i - 1,j,k)
ro = 1.0d+00 / u(1,i,j,k)
rs(1) = dt * rsd(1,i,j,k)
rs(2) = dt * rsd(2,i,j,k)
rs(3) = dt * rsd(3,i,j,k)
rs(4) = dt * rsd(4,i,j,k)
rs(5) = dt * rsd(5,i,j,k)
rs(1) = rs(1) - omega * ((-(dt)) * tz1 * dz1 * rsd(1,i
&,j,k - 1) + (-(dt)) * tz2 * rsd(4,i,j,k - 1))
tv(1) = rs(1) - omega * ((-(dt)) * ty1 * dy1 * rsd(1,i
&,j - 1,k) + (-(dt)) * tx1 * dx1 * rsd(1,i - 1,j,k) + (-(dt)) * tx2
& * rsd(2,i - 1,j,k) + (-(dt)) * ty2 * rsd(3,i,j - 1,k) + 0.0d+00 *
& rsd(3,i - 1,j,k) + 0.0d+00 * rsd(4,i - 1,j,k) + 0.0d+00 * rsd(5,i
& - 1,j,k))
tmat(1,1) = 1.0d+00 + dt * 2.0d+00 * (tx1 * dx1 + ty1
&* dy1 + tz1 * dz1)
tmat(1,2) = 0
tmat(1,3) = 0
tmat(1,4) = 0
tmat(1,5) = 0
rs(2) = rs(2) - omega * (((-(dt)) * tz2 * ((-(u(2,i,j,
&k - 1) * u(4,i,j,k - 1))) * rmk * rmk) - dt * tz1 * ((-(c34)) * rm
&k * rmk * u(2,i,j,k - 1))) * rsd(1,i,j,k - 1) + ((-(dt)) * tz2 * (
&u(4,i,j,k - 1) * rmk) - dt * tz1 * c34 * rmk - dt * tz1 * dz2) * r
&sd(2,i,j,k - 1) + (-(dt)) * tz2 * (u(2,i,j,k - 1) * rmk) * rsd(4,i
&,j,k - 1))
tv(2) = rs(2) - omega * (((-(dt)) * ty2 * ((-(u(2,i,j
&- 1,k) * u(3,i,j - 1,k))) * rmj * rmj) - dt * ty1 * ((-(c34)) * rm
&j * rmj * u(2,i,j - 1,k))) * rsd(1,i,j - 1,k) + ((-(dt)) * tx2 * (
&(-((u(2,i - 1,j,k) * rmi)** 2)) + c2 * qs(i - 1,j,k) * rmi) - dt *
& tx1 * ((-(r43)) * c34 * rmi * rmi * u(2,i - 1,j,k))) * rsd(1,i -
&1,j,k) + ((-(dt)) * ty2 * (u(3,i,j - 1,k) * rmj) - dt * ty1 * (c34
& * rmj) - dt * ty1 * dy2) * rsd(2,i,j - 1,k) + ((-(dt)) * tx2 * ((
&2.0d+00 - c2) * (u(2,i - 1,j,k) * rmi)) - dt * tx1 * (r43 * c34 *
&rmi) - dt * tx1 * dx2) * rsd(2,i - 1,j,k) + (-(dt)) * ty2 * (u(2,i
&,j - 1,k) * rmj) * rsd(3,i,j - 1,k) + (-(dt)) * tx2 * ((-(c2)) * (
&u(3,i - 1,j,k) * rmi)) * rsd(3,i - 1,j,k) + 0.0d+00 * rsd(4,i,j -
&1,k) + (-(dt)) * tx2 * ((-(c2)) * (u(4,i - 1,j,k) * rmi)) * rsd(4,
&i - 1,j,k) + 0.0d+00 * rsd(5,i,j - 1,k) + (-(dt)) * tx2 * c2 * rsd
&(5,i - 1,j,k))
tmat(2,1) = (-(dt)) * 2.0d+00 * (tx1 * r43 + ty1 + tz1
&) * c34 * ro * ro * u(2,i,j,k)
tmat(2,2) = 1.0d+00 + dt * 2.0d+00 * c34 * ro * (tx1 *
& r43 + ty1 + tz1) + dt * 2.0d+00 * (tx1 * dx2 + ty1 * dy2 + tz1 *
&dz2)
tmat(2,3) = 0
tmat(2,4) = 0
tmat(2,5) = 0
rs(3) = rs(3) - omega * (((-(dt)) * tz2 * ((-(u(3,i,j,
&k - 1) * u(4,i,j,k - 1))) * rmk * rmk) - dt * tz1 * ((-(c34)) * rm
&k * rmk * u(3,i,j,k - 1))) * rsd(1,i,j,k - 1) + ((-(dt)) * tz2 * (
&u(4,i,j,k - 1) * rmk) - dt * tz1 * (c34 * rmk) - dt * tz1 * dz3) *
& rsd(3,i,j,k - 1) + (-(dt)) * tz2 * (u(3,i,j,k - 1) * rmk) * rsd(4
&,i,j,k - 1))
tv(3) = rs(3) - omega * (((-(dt)) * ty2 * ((-((u(3,i,j
& - 1,k) * rmj)** 2)) + c2 * (qs(i,j - 1,k) * rmj)) - dt * ty1 * ((
&-(r43)) * c34 * rmj * rmj * u(3,i,j - 1,k))) * rsd(1,i,j - 1,k) +
&((-(dt)) * tx2 * ((-(u(2,i - 1,j,k) * u(3,i - 1,j,k))) * rmi * rmi
&) - dt * tx1 * ((-(c34)) * rmi * rmi * u(3,i - 1,j,k))) * rsd(1,i
&- 1,j,k) + (-(dt)) * ty2 * ((-(c2)) * (u(2,i,j - 1,k) * rmj)) * rs
&d(2,i,j - 1,k) + (-(dt)) * tx2 * (u(3,i - 1,j,k) * rmi) * rsd(2,i
&- 1,j,k) + ((-(dt)) * ty2 * ((2.0d+00 - c2) * (u(3,i,j - 1,k) * rm
&j)) - dt * ty1 * (r43 * c34 * rmj) - dt * ty1 * dy3) * rsd(3,i,j -
& 1,k) + ((-(dt)) * tx2 * (u(2,i - 1,j,k) * rmi) - dt * tx1 * (c34
&* rmi) - dt * tx1 * dx3) * rsd(3,i - 1,j,k) + (-(dt)) * ty2 * ((-(
&c2)) * (u(4,i,j - 1,k) * rmj)) * rsd(4,i,j - 1,k) + 0.0d+00 * rsd(
&4,i - 1,j,k) + (-(dt)) * ty2 * c2 * rsd(5,i,j - 1,k) + 0.0d+00 * r
&sd(5,i - 1,j,k))
tmat(3,1) = (-(dt)) * 2.0d+00 * (tx1 + ty1 * r43 + tz1
&) * c34 * ro * ro * u(3,i,j,k)
tmat(3,2) = 0
tmat(3,3) = 1.0d+00 + dt * 2.0d+00 * c34 * ro * (tx1 +
& ty1 * r43 + tz1) + dt * 2.0d+00 * (tx1 * dx3 + ty1 * dy3 + tz1 *
&dz3)
tmat(3,4) = 0
tmat(3,5) = 0
rs(4) = rs(4) - omega * (((-(dt)) * tz2 * ((-((u(4,i,j
&,k - 1) * rmk)** 2)) + c2 * qs(i,j,k - 1) * rmk) - dt * tz1 * ((-(
&r43)) * c34 * rmk * rmk * u(4,i,j,k - 1))) * rsd(1,i,j,k - 1) + (-
&(dt)) * tz2 * ((-(c2)) * (u(2,i,j,k - 1) * rmk)) * rsd(2,i,j,k - 1
&) + (-(dt)) * tz2 * ((-(c2)) * (u(3,i,j,k - 1) * rmk)) * rsd(3,i,j
&,k - 1) + ((-(dt)) * tz2 * (2.0d+00 - c2) * (u(4,i,j,k - 1) * rmk)
& - dt * tz1 * (r43 * c34 * rmk) - dt * tz1 * dz4) * rsd(4,i,j,k -
&1) + (-(dt)) * tz2 * c2 * rsd(5,i,j,k - 1))
tv(4) = rs(4) - omega * (((-(dt)) * ty2 * ((-(u(3,i,j
&- 1,k) * u(4,i,j - 1,k))) * rmj * rmj) - dt * ty1 * ((-(c34)) * rm
&j * rmj * u(4,i,j - 1,k))) * rsd(1,i,j - 1,k) + ((-(dt)) * tx2 * (
&(-(u(2,i - 1,j,k) * u(4,i - 1,j,k))) * rmi * rmi) - dt * tx1 * ((-
&(c34)) * rmi * rmi * u(4,i - 1,j,k))) * rsd(1,i - 1,j,k) + 0.0d+00
& * rsd(2,i,j - 1,k) + (-(dt)) * tx2 * (u(4,i - 1,j,k) * rmi) * rsd
&(2,i - 1,j,k) + (-(dt)) * ty2 * (u(4,i,j - 1,k) * rmj) * rsd(3,i,j
& - 1,k) + 0.0d+00 * rsd(3,i - 1,j,k) + ((-(dt)) * ty2 * (u(3,i,j -
& 1,k) * rmj) - dt * ty1 * (c34 * rmj) - dt * ty1 * dy4) * rsd(4,i,
&j - 1,k) + ((-(dt)) * tx2 * (u(2,i - 1,j,k) * rmi) - dt * tx1 * (c
&34 * rmi) - dt * tx1 * dx4) * rsd(4,i - 1,j,k) + 0.0d+00 * rsd(5,i
&,j - 1,k) + 0.0d+00 * rsd(5,i - 1,j,k))
tmat(4,1) = (-(dt)) * 2.0d+00 * (tx1 + ty1 + tz1 * r43
&) * c34 * ro * ro * u(4,i,j,k)
tmat(4,2) = 0
tmat(4,3) = 0
tmat(4,4) = 1.0d+00 + dt * 2.0d+00 * c34 * ro * (tx1 +
& ty1 + tz1 * r43) + dt * 2.0d+00 * (tx1 * dx4 + ty1 * dy4 + tz1 *
&dz4)
tmat(4,5) = 0
rs(5) = rs(5) - omega * (((-(dt)) * tz2 * ((c2 * 2.0d0
& * qs(i,j,k - 1) - c1 * u(5,i,j,k - 1)) * u(4,i,j,k - 1) * rmk * r
&mk) - dt * tz1 * ((-(c34 - c1345)) * rmk * rmk * rmk * u(2,i,j,k -
& 1)** 2 - (c34 - c1345) * rmk * rmk * rmk * u(3,i,j,k - 1)** 2 - (
&r43 * c34 - c1345) * rmk * rmk * rmk * u(4,i,j,k - 1)** 2 - c1345
&* rmk * rmk * u(5,i,j,k - 1))) * rsd(1,i,j,k - 1) + ((-(dt)) * tz2
& * ((-(c2)) * (u(2,i,j,k - 1) * u(4,i,j,k - 1)) * rmk * rmk) - dt
&* tz1 * (c34 - c1345) * rmk * rmk * u(2,i,j,k - 1)) * rsd(2,i,j,k
&- 1) + ((-(dt)) * tz2 * ((-(c2)) * (u(3,i,j,k - 1) * u(4,i,j,k - 1
&)) * rmk * rmk) - dt * tz1 * (c34 - c1345) * rmk * rmk * u(3,i,j,k
& - 1)) * rsd(3,i,j,k - 1) + ((-(dt)) * tz2 * (c1 * (u(5,i,j,k - 1)
& * rmk) - c2 * (qs(i,j,k - 1) * rmk + u(4,i,j,k - 1) * u(4,i,j,k -
& 1) * rmk * rmk)) - dt * tz1 * (r43 * c34 - c1345) * rmk * rmk * u
&(4,i,j,k - 1)) * rsd(4,i,j,k - 1) + ((-(dt)) * tz2 * (c1 * (u(4,i,
&j,k - 1) * rmk)) - dt * tz1 * c1345 * rmk - dt * tz1 * dz5) * rsd(
&5,i,j,k - 1))
tv(5) = rs(5) - omega * (((-(dt)) * ty2 * ((c2 * 2.0d0
& * qs(i,j - 1,k) - c1 * u(5,i,j - 1,k)) * (u(3,i,j - 1,k) * rmj *
&rmj)) - dt * ty1 * ((-(c34 - c1345)) * rmj * rmj * rmj * u(2,i,j -
& 1,k)** 2 - (r43 * c34 - c1345) * rmj * rmj * rmj * u(3,i,j - 1,k)
&** 2 - (c34 - c1345) * rmj * rmj * rmj * u(4,i,j - 1,k)** 2 - c134
&5 * rmj * rmj * u(5,i,j - 1,k))) * rsd(1,i,j - 1,k) + ((-(dt)) * t
&x2 * ((c2 * 2.0d0 * qs(i - 1,j,k) - c1 * u(5,i - 1,j,k)) * u(2,i -
& 1,j,k) * rmi * rmi) - dt * tx1 * ((-(r43 * c34 - c1345)) * rmi *
&rmi * rmi * u(2,i - 1,j,k)** 2 - (c34 - c1345) * rmi * rmi * rmi *
& u(3,i - 1,j,k)** 2 - (c34 - c1345) * rmi * rmi * rmi * u(4,i - 1,
&j,k)** 2 - c1345 * rmi * rmi * u(5,i - 1,j,k))) * rsd(1,i - 1,j,k)
& + ((-(dt)) * ty2 * ((-(c2)) * (u(2,i,j - 1,k) * u(3,i,j - 1,k)) *
& rmj * rmj) - dt * ty1 * (c34 - c1345) * rmj * rmj * u(2,i,j - 1,k
&)) * rsd(2,i,j - 1,k) + ((-(dt)) * tx2 * (c1 * (u(5,i - 1,j,k) * r
&mi) - c2 * (u(2,i - 1,j,k) * u(2,i - 1,j,k) * rmi * rmi + qs(i - 1
&,j,k) * rmi)) - dt * tx1 * (r43 * c34 - c1345) * rmi * rmi * u(2,i
& - 1,j,k)) * rsd(2,i - 1,j,k) + ((-(dt)) * ty2 * (c1 * (u(5,i,j -
&1,k) * rmj) - c2 * (qs(i,j - 1,k) * rmj + u(3,i,j - 1,k) * u(3,i,j
& - 1,k) * rmj * rmj)) - dt * ty1 * (r43 * c34 - c1345) * rmj * rmj
& * u(3,i,j - 1,k)) * rsd(3,i,j - 1,k) + ((-(dt)) * tx2 * ((-(c2))
&* (u(3,i - 1,j,k) * u(2,i - 1,j,k)) * rmi * rmi) - dt * tx1 * (c34
& - c1345) * rmi * rmi * u(3,i - 1,j,k)) * rsd(3,i - 1,j,k) + ((-(d
&t)) * ty2 * ((-(c2)) * (u(3,i,j - 1,k) * u(4,i,j - 1,k)) * rmj * r
&mj) - dt * ty1 * (c34 - c1345) * rmj * rmj * u(4,i,j - 1,k)) * rsd
&(4,i,j - 1,k) + ((-(dt)) * tx2 * ((-(c2)) * (u(4,i - 1,j,k) * u(2,
&i - 1,j,k)) * rmi * rmi) - dt * tx1 * (c34 - c1345) * rmi * rmi *
&u(4,i - 1,j,k)) * rsd(4,i - 1,j,k) + ((-(dt)) * ty2 * (c1 * (u(3,i
&,j - 1,k) * rmj)) - dt * ty1 * c1345 * rmj - dt * ty1 * dy5) * rsd
&(5,i,j - 1,k) + ((-(dt)) * tx2 * (c1 * (u(2,i - 1,j,k) * rmi)) - d
&t * tx1 * c1345 * rmi - dt * tx1 * dx5) * rsd(5,i - 1,j,k))
tmat(5,1) = (-(dt)) * 2.0d+00 * (((tx1 * (r43 * c34 -
&c1345) + ty1 * (c34 - c1345) + tz1 * (c34 - c1345)) * u(2,i,j,k)**
& 2 + (tx1 * (c34 - c1345) + ty1 * (r43 * c34 - c1345) + tz1 * (c34
& - c1345)) * u(3,i,j,k)** 2 + (tx1 * (c34 - c1345) + ty1 * (c34 -
&c1345) + tz1 * (r43 * c34 - c1345)) * u(4,i,j,k)** 2) * ro * ro *
&ro + (tx1 + ty1 + tz1) * c1345 * ro * ro * u(5,i,j,k))
tmat(5,2) = dt * 2.0d+00 * ro * ro * u(2,i,j,k) * (tx1
& * (r43 * c34 - c1345) + ty1 * (c34 - c1345) + tz1 * (c34 - c1345)
&)
tmat(5,3) = dt * 2.0d+00 * ro * ro * u(3,i,j,k) * (tx1
& * (c34 - c1345) + ty1 * (r43 * c34 - c1345) + tz1 * (c34 - c1345)
&)
tmat(5,4) = dt * 2.0d+00 * ro * ro * u(4,i,j,k) * (tx1
& * (c34 - c1345) + ty1 * (c34 - c1345) + tz1 * (r43 * c34 - c1345)
&)
tmat(5,5) = 1.0d+00 + dt * 2.0d+00 * (tx1 + ty1 + tz1)
& * c1345 * ro + dt * 2.0d+00 * (tx1 * dx5 + ty1 * dy5 + tz1 * dz5)
tmp1 = 1.0d+00 / tmat(1,1)
tmp2 = tmp1 * tmat(2,1)
tmat(2,2) = tmat(2,2) - tmp2 * tmat(1,2)
tmat(2,3) = tmat(2,3) - tmp2 * tmat(1,3)
tmat(2,4) = tmat(2,4) - tmp2 * tmat(1,4)
tmat(2,5) = tmat(2,5) - tmp2 * tmat(1,5)
tv(2) = tv(2) - tv(1) * tmp2
tmp2 = tmp1 * tmat(3,1)
tmat(3,2) = tmat(3,2) - tmp2 * tmat(1,2)
tmat(3,3) = tmat(3,3) - tmp2 * tmat(1,3)
tmat(3,4) = tmat(3,4) - tmp2 * tmat(1,4)
tmat(3,5) = tmat(3,5) - tmp2 * tmat(1,5)
tv(3) = tv(3) - tv(1) * tmp2
tmp2 = tmp1 * tmat(4,1)
tmat(4,2) = tmat(4,2) - tmp2 * tmat(1,2)
tmat(4,3) = tmat(4,3) - tmp2 * tmat(1,3)
tmat(4,4) = tmat(4,4) - tmp2 * tmat(1,4)
tmat(4,5) = tmat(4,5) - tmp2 * tmat(1,5)
tv(4) = tv(4) - tv(1) * tmp2
tmp2 = tmp1 * tmat(5,1)
tmat(5,2) = tmat(5,2) - tmp2 * tmat(1,2)
tmat(5,3) = tmat(5,3) - tmp2 * tmat(1,3)
tmat(5,4) = tmat(5,4) - tmp2 * tmat(1,4)
tmat(5,5) = tmat(5,5) - tmp2 * tmat(1,5)
tv(5) = tv(5) - tv(1) * tmp2
tmp1 = 1.0d+00 / tmat(2,2)
tmp2 = tmp1 * tmat(3,2)
tmat(3,3) = tmat(3,3) - tmp2 * tmat(2,3)
tmat(3,4) = tmat(3,4) - tmp2 * tmat(2,4)
tmat(3,5) = tmat(3,5) - tmp2 * tmat(2,5)
tv(3) = tv(3) - tv(2) * tmp2
tmp2 = tmp1 * tmat(4,2)
tmat(4,3) = tmat(4,3) - tmp2 * tmat(2,3)
tmat(4,4) = tmat(4,4) - tmp2 * tmat(2,4)
tmat(4,5) = tmat(4,5) - tmp2 * tmat(2,5)
tv(4) = tv(4) - tv(2) * tmp2
tmp2 = tmp1 * tmat(5,2)
tmat(5,3) = tmat(5,3) - tmp2 * tmat(2,3)
tmat(5,4) = tmat(5,4) - tmp2 * tmat(2,4)
tmat(5,5) = tmat(5,5) - tmp2 * tmat(2,5)
tv(5) = tv(5) - tv(2) * tmp2
tmp1 = 1.0d+00 / tmat(3,3)
tmp2 = tmp1 * tmat(4,3)
tmat(4,4) = tmat(4,4) - tmp2 * tmat(3,4)
tmat(4,5) = tmat(4,5) - tmp2 * tmat(3,5)
tv(4) = tv(4) - tv(3) * tmp2
tmp2 = tmp1 * tmat(5,3)
tmat(5,4) = tmat(5,4) - tmp2 * tmat(3,4)
tmat(5,5) = tmat(5,5) - tmp2 * tmat(3,5)
tv(5) = tv(5) - tv(3) * tmp2
tmp1 = 1.0d+00 / tmat(4,4)
tmp2 = tmp1 * tmat(5,4)
tmat(5,5) = tmat(5,5) - tmp2 * tmat(4,5)
tv(5) = tv(5) - tv(4) * tmp2
rs(5) = tv(5) / tmat(5,5)
tv(4) = tv(4) - tmat(4,5) * rs(5)
rs(4) = tv(4) / tmat(4,4)
tv(3) = tv(3) - tmat(3,4) * rs(4) - tmat(3,5) * rs(5)
rs(3) = tv(3) / tmat(3,3)
tv(2) = tv(2) - tmat(2,3) * rs(3) - tmat(2,4) * rs(4)
&- tmat(2,5) * rs(5)
rs(2) = tv(2) / tmat(2,2)
tv(1) = tv(1) - tmat(1,2) * rs(2) - tmat(1,3) * rs(3)
&- tmat(1,4) * rs(4) - tmat(1,5) * rs(5)
rs(1) = tv(1) / tmat(1,1)
rsd(1,i,j,k) = rs(1)
rsd(2,i,j,k) = rs(2)
rsd(3,i,j,k) = rs(3)
rsd(4,i,j,k) = rs(4)
rsd(5,i,j,k) = rs(5)
enddo
enddo
enddo
!DVM$ PARALLEL (k,j,i) ON rsd(*,i,j,k), PRIVATE (tv,tmat,tmp2,tmp1,rm
!DVM$&k,rmj,rmi,ro),ACROSS (rsd(0:0,0:1,0:1,0:1)),CUDA_BLOCK (16,16)
do k = nz - 1,2,(-(1))
do j = jend,jst,(-(1))
do i = iend,ist,(-(1))
rmk = 1.0d+00 / u(1,i,j,k + 1)
rmj = 1.0d+00 / u(1,i,j + 1,k)
rmi = 1.0d+00 / u(1,i + 1,j,k)
ro = 1.0d+00 / u(1,i,j,k)
tv(1) = omega * ((-(dt)) * tz1 * dz1 * rsd(1,i,j,k + 1
&) + 0.0d+00 * rsd(2,i,j,k + 1) + 0.0d+00 * rsd(3,i,j,k + 1) + dt *
& tz2 * rsd(4,i,j,k + 1) + 0.0d+00 * rsd(5,i,j,k + 1))
tv(1) = tv(1) + omega * ((-(dt)) * ty1 * dy1 * rsd(1,i
&,j + 1,k) + (-(dt)) * tx1 * dx1 * rsd(1,i + 1,j,k) + 0.0d+00 * rsd
&(2,i,j + 1,k) + dt * tx2 * rsd(2,i + 1,j,k) + dt * ty2 * rsd(3,i,j
& + 1,k) + 0.0d+00 * rsd(3,i + 1,j,k) + 0.0d+00 * rsd(4,i,j + 1,k)
&+ 0.0d+00 * rsd(4,i + 1,j,k) + 0.0d+00 * rsd(5,i,j + 1,k) + 0.0d+0
&0 * rsd(5,i + 1,j,k))
tmat(1,1) = 1.0d+00 + dt * 2.0d+00 * (tx1 * dx1 + ty1
&* dy1 + tz1 * dz1)
tmat(1,2) = 0.0d+00
tmat(1,3) = 0.0d+00
tmat(1,4) = 0.0d+00
tmat(1,5) = 0.0d+00
tv(2) = omega * ((dt * tz2 * ((-(u(2,i,j,k + 1) * u(4,
&i,j,k + 1))) * rmk * rmk) - dt * tz1 * ((-(c34__24)) * rmk * rmk *
& u(2,i,j,k + 1))) * rsd(1,i,j,k + 1) + (dt * tz2 * (u(4,i,j,k + 1)
& * rmk) - dt * tz1 * c34__24 * rmk - dt * tz1 * dz2) * rsd(2,i,j,k
& + 1) + 0.0d+00 * rsd(3,i,j,k + 1) + dt * tz2 * (u(2,i,j,k + 1) *
&rmk) * rsd(4,i,j,k + 1) + 0.0d+00 * rsd(5,i,j,k + 1))
tv(2) = tv(2) + omega * ((dt * ty2 * ((-(u(2,i,j + 1,k
&) * u(3,i,j + 1,k))) * rmj * rmj) - dt * ty1 * ((-(c34__24)) * rmj
& * rmj * u(2,i,j + 1,k))) * rsd(1,i,j + 1,k) + (dt * tx2 * ((-((u(
&2,i + 1,j,k) * rmi)** 2)) + c2 * qs(i + 1,j,k) * rmi) - dt * tx1 *
& ((-(r43__26)) * c34__24 * rmi * rmi * u(2,i + 1,j,k))) * rsd(1,i
&+ 1,j,k) + (dt * ty2 * (u(3,i,j + 1,k) * rmj) - dt * ty1 * (c34__2
&4 * rmj) - dt * ty1 * dy2) * rsd(2,i,j + 1,k) + (dt * tx2 * ((2.0d
&+00 - c2) * (u(2,i + 1,j,k) * rmi)) - dt * tx1 * (r43__26 * c34__2
&4 * rmi) - dt * tx1 * dx2) * rsd(2,i + 1,j,k) + dt * ty2 * (u(2,i,
&j + 1,k) * rmj) * rsd(3,i,j + 1,k) + dt * tx2 * ((-(c2)) * (u(3,i
&+ 1,j,k) * rmi)) * rsd(3,i + 1,j,k) + 0.0d+00 * rsd(4,i,j + 1,k) +
& dt * tx2 * ((-(c2)) * (u(4,i + 1,j,k) * rmi)) * rsd(4,i + 1,j,k)
&+ 0.0d+00 * rsd(5,i,j + 1,k) + dt * tx2 * c2 * rsd(5,i + 1,j,k))
tmat(2,1) = dt * 2.0d+00 * ((-(tx1)) * r43__26 - ty1 -
& tz1) * (c34__24 * ro * ro * u(2,i,j,k))
tmat(2,2) = 1.0d+00 + dt * 2.0d+00 * c34__24 * ro * (t
&x1 * r43__26 + ty1 + tz1) + dt * 2.0d+00 * (tx1 * dx2 + ty1 * dy2
&+ tz1 * dz2)
tmat(2,3) = 0.0d+00
tmat(2,4) = 0.0d+00
tmat(2,5) = 0.0d+00
tv(3) = omega * ((dt * tz2 * ((-(u(3,i,j,k + 1) * u(4,
&i,j,k + 1))) * rmk * rmk) - dt * tz1 * ((-(c34__24)) * rmk * rmk *
& u(3,i,j,k + 1))) * rsd(1,i,j,k + 1) + 0.0d+00 * rsd(2,i,j,k + 1)
&+ (dt * tz2 * (u(4,i,j,k + 1) * rmk) - dt * tz1 * (c34__24 * rmk)
&- dt * tz1 * dz3) * rsd(3,i,j,k + 1) + dt * tz2 * (u(3,i,j,k + 1)
&* rmk) * rsd(4,i,j,k + 1) + 0.0d+00 * rsd(5,i,j,k + 1))
tv(3) = tv(3) + omega * ((dt * ty2 * ((-((u(3,i,j + 1,
&k) * rmj)** 2)) + c2 * (qs(i,j + 1,k) * rmj)) - dt * ty1 * ((-(r43
&__26)) * c34__24 * rmj * rmj * u(3,i,j + 1,k))) * rsd(1,i,j + 1,k)
& + (dt * tx2 * ((-(u(2,i + 1,j,k) * u(3,i + 1,j,k))) * rmi * rmi)
&- dt * tx1 * ((-(c34__24)) * rmi * rmi * u(3,i + 1,j,k))) * rsd(1,
&i + 1,j,k) + dt * ty2 * ((-(c2)) * (u(2,i,j + 1,k) * rmj)) * rsd(2
&,i,j + 1,k) + dt * tx2 * (u(3,i + 1,j,k) * rmi) * rsd(2,i + 1,j,k)
& + (dt * ty2 * ((2.0d+00 - c2) * (u(3,i,j + 1,k) * rmj)) - dt * ty
&1 * (r43__26 * c34__24 * rmj) - dt * ty1 * dy3) * rsd(3,i,j + 1,k)
& + (dt * tx2 * (u(2,i + 1,j,k) * rmi) - dt * tx1 * (c34__24 * rmi)
& - dt * tx1 * dx3) * rsd(3,i + 1,j,k) + dt * ty2 * ((-(c2)) * (u(4
&,i,j + 1,k) * rmj)) * rsd(4,i,j + 1,k) + 0.0d+00 * rsd(4,i + 1,j,k
&) + dt * ty2 * c2 * rsd(5,i,j + 1,k) + 0.0d+00 * rsd(5,i + 1,j,k))
tmat(3,1) = dt * 2.0d+00 * ((-(tx1)) - ty1 * r43__26 -
& tz1) * (c34__24 * ro * ro * u(3,i,j,k))
tmat(3,2) = 0.0d+00
tmat(3,3) = 1.0d+00 + dt * 2.0d+00 * c34__24 * ro * (t
&x1 + ty1 * r43__26 + tz1) + dt * 2.0d+00 * (tx1 * dx3 + ty1 * dy3
&+ tz1 * dz3)
tmat(3,4) = 0.0d+00
tmat(3,5) = 0.0d+00
tv(4) = omega * ((dt * tz2 * ((-((u(4,i,j,k + 1) * rmk
&)** 2)) + c2 * (qs(i,j,k + 1) * rmk)) - dt * tz1 * ((-(r43__26)) *
& c34__24 * rmk * rmk * u(4,i,j,k + 1))) * rsd(1,i,j,k + 1) + dt *
&tz2 * ((-(c2)) * (u(2,i,j,k + 1) * rmk)) * rsd(2,i,j,k + 1) + dt *
& tz2 * ((-(c2)) * (u(3,i,j,k + 1) * rmk)) * rsd(3,i,j,k + 1) + (dt
& * tz2 * (2.0d+00 - c2) * (u(4,i,j,k + 1) * rmk) - dt * tz1 * (r43
&__26 * c34__24 * rmk) - dt * tz1 * dz4) * rsd(4,i,j,k + 1) + dt *
&tz2 * c2 * rsd(5,i,j,k + 1))
tv(4) = tv(4) + omega * ((dt * ty2 * ((-(u(3,i,j + 1,k
&) * u(4,i,j + 1,k))) * rmj * rmj) - dt * ty1 * ((-(c34__24)) * rmj
& * rmj * u(4,i,j + 1,k))) * rsd(1,i,j + 1,k) + (dt * tx2 * ((-(u(2
&,i + 1,j,k) * u(4,i + 1,j,k))) * rmi * rmi) - dt * tx1 * ((-(c34__
&24)) * rmi * rmi * u(4,i + 1,j,k))) * rsd(1,i + 1,j,k) + 0.0d+00 *
& rsd(2,i,j + 1,k) + dt * tx2 * (u(4,i + 1,j,k) * rmi) * rsd(2,i +
&1,j,k) + dt * ty2 * (u(4,i,j + 1,k) * rmj) * rsd(3,i,j + 1,k) + 0.
&0d+00 * rsd(3,i + 1,j,k) + (dt * ty2 * (u(3,i,j + 1,k) * rmj) - dt
& * ty1 * (c34__24 * rmj) - dt * ty1 * dy4) * rsd(4,i,j + 1,k) + (d
&t * tx2 * (u(2,i + 1,j,k) * rmi) - dt * tx1 * (c34__24 * rmi) - dt
& * tx1 * dx4) * rsd(4,i + 1,j,k) + 0.0d+00 * rsd(5,i,j + 1,k) + 0.
&0d+00 * rsd(5,i + 1,j,k))
tmat(4,1) = dt * 2.0d+00 * ((-(tx1)) - ty1 - tz1 * r43
&__26) * (c34__24 * ro * ro * u(4,i,j,k))
tmat(4,2) = 0.0d+00
tmat(4,3) = 0.0d+00
tmat(4,4) = 1.0d+00 + dt * 2.0d+00 * c34__24 * ro * (t
&x1 + ty1 + tz1 * r43__26) + dt * 2.0d+00 * (tx1 * dx4 + ty1 * dy4
&+ tz1 * dz4)
tmat(4,5) = 0.0d+00
tv(5) = omega * ((dt * tz2 * ((c2 * 2.0d0 * qs(i,j,k +
& 1) - c1 * u(5,i,j,k + 1)) * (u(4,i,j,k + 1) * rmk * rmk)) - dt *
&tz1 * ((-(c34__24 - c1345__25)) * rmk * rmk * rmk * u(2,i,j,k + 1)
&** 2 - (c34__24 - c1345__25) * rmk * rmk * rmk * u(3,i,j,k + 1)**
&2 - (r43__26 * c34__24 - c1345__25) * rmk * rmk * rmk * u(4,i,j,k
&+ 1)** 2 - c1345__25 * rmk * rmk * u(5,i,j,k + 1))) * rsd(1,i,j,k
&+ 1) + (dt * tz2 * ((-(c2)) * (u(2,i,j,k + 1) * u(4,i,j,k + 1)) *
&rmk * rmk) - dt * tz1 * (c34__24 - c1345__25) * rmk * rmk * u(2,i,
&j,k + 1)) * rsd(2,i,j,k + 1) + (dt * tz2 * ((-(c2)) * (u(3,i,j,k +
& 1) * u(4,i,j,k + 1)) * rmk * rmk) - dt * tz1 * (c34__24 - c1345__
&25) * rmk * rmk * u(3,i,j,k + 1)) * rsd(3,i,j,k + 1) + (dt * tz2 *
& (c1 * (u(5,i,j,k + 1) * rmk) - c2 * (qs(i,j,k + 1) * rmk + u(4,i,
&j,k + 1) * u(4,i,j,k + 1) * rmk * rmk)) - dt * tz1 * (r43__26 * c3
&4__24 - c1345__25) * rmk * rmk * u(4,i,j,k + 1)) * rsd(4,i,j,k + 1
&) + (dt * tz2 * (c1 * (u(4,i,j,k + 1) * rmk)) - dt * tz1 * c1345__
&25 * rmk - dt * tz1 * dz5) * rsd(5,i,j,k + 1))
tv(5) = tv(5) + omega * ((dt * ty2 * ((c2 * 2.0d0 * qs
&(i,j + 1,k) - c1 * u(5,i,j + 1,k)) * (u(3,i,j + 1,k) * rmj * rmj))
& - dt * ty1 * ((-(c34__24 - c1345__25)) * rmj * rmj * rmj * u(2,i,
&j + 1,k)** 2 - (r43__26 * c34__24 - c1345__25) * rmj * rmj * rmj *
& u(3,i,j + 1,k)** 2 - (c34__24 - c1345__25) * rmj * rmj * rmj * u(
&4,i,j + 1,k)** 2 - c1345__25 * rmj * rmj * u(5,i,j + 1,k))) * rsd(
&1,i,j + 1,k) + (dt * tx2 * ((c2 * 2.0d0 * qs(i + 1,j,k) - c1 * u(5
&,i + 1,j,k)) * (u(2,i + 1,j,k) * rmi * rmi)) - dt * tx1 * ((-(r43_
&_26 * c34__24 - c1345__25)) * rmi * rmi * rmi * u(2,i + 1,j,k)** 2
& - (c34__24 - c1345__25) * rmi * rmi * rmi * u(3,i + 1,j,k)** 2 -
&(c34__24 - c1345__25) * rmi * rmi * rmi * u(4,i + 1,j,k)** 2 - c13
&45__25 * rmi * rmi * u(5,i + 1,j,k))) * rsd(1,i + 1,j,k) + (dt * t
&y2 * ((-(c2)) * (u(2,i,j + 1,k) * u(3,i,j + 1,k)) * rmj * rmj) - d
&t * ty1 * (c34__24 - c1345__25) * rmj * rmj * u(2,i,j + 1,k)) * rs
&d(2,i,j + 1,k) + (dt * tx2 * (c1 * (u(5,i + 1,j,k) * rmi) - c2 * (
&u(2,i + 1,j,k) * u(2,i + 1,j,k) * rmi * rmi + qs(i + 1,j,k) * rmi)
&) - dt * tx1 * (r43__26 * c34__24 - c1345__25) * rmi * rmi * u(2,i
& + 1,j,k)) * rsd(2,i + 1,j,k) + (dt * ty2 * (c1 * (u(5,i,j + 1,k)
&* rmj) - c2 * (qs(i,j + 1,k) * rmj + u(3,i,j + 1,k) * u(3,i,j + 1,
&k) * rmj * rmj)) - dt * ty1 * (r43__26 * c34__24 - c1345__25) * rm
&j * rmj * u(3,i,j + 1,k)) * rsd(3,i,j + 1,k) + (dt * tx2 * ((-(c2)
&) * (u(3,i + 1,j,k) * u(2,i + 1,j,k)) * rmi * rmi) - dt * tx1 * (c
&34__24 - c1345__25) * rmi * rmi * u(3,i + 1,j,k)) * rsd(3,i + 1,j,
&k) + (dt * ty2 * ((-(c2)) * (u(3,i,j + 1,k) * u(4,i,j + 1,k)) * rm
&j * rmj) - dt * ty1 * (c34__24 - c1345__25) * rmj * rmj * u(4,i,j
&+ 1,k)) * rsd(4,i,j + 1,k) + (dt * tx2 * ((-(c2)) * (u(4,i + 1,j,k
&) * u(2,i + 1,j,k)) * rmi * rmi) - dt * tx1 * (c34__24 - c1345__25
&) * rmi * rmi * u(4,i + 1,j,k)) * rsd(4,i + 1,j,k) + (dt * ty2 * (
&c1 * (u(3,i,j + 1,k) * rmj)) - dt * ty1 * c1345__25 * rmj - dt * t
&y1 * dy5) * rsd(5,i,j + 1,k) + (dt * tx2 * (c1 * (u(2,i + 1,j,k) *
& rmi)) - dt * tx1 * c1345__25 * rmi - dt * tx1 * dx5) * rsd(5,i +
&1,j,k))
tmat(5,1) = (-(dt)) * 2.0d+00 * (((tx1 * (r43__26 * c3
&4__24 - c1345__25) + ty1 * (c34__24 - c1345__25) + tz1 * (c34__24
&- c1345__25)) * u(2,i,j,k)** 2 + (tx1 * (c34__24 - c1345__25) + ty
&1 * (r43__26 * c34__24 - c1345__25) + tz1 * (c34__24 - c1345__25))
& * u(3,i,j,k)** 2 + (tx1 * (c34__24 - c1345__25) + ty1 * (c34__24
&- c1345__25) + tz1 * (r43__26 * c34__24 - c1345__25)) * u(4,i,j,k)
&** 2) * ro * ro * ro + (tx1 + ty1 + tz1) * c1345__25 * ro * ro * u
&(5,i,j,k))
tmat(5,2) = dt * 2.0d+00 * (tx1 * (r43__26 * c34__24 -
& c1345__25) + ty1 * (c34__24 - c1345__25) + tz1 * (c34__24 - c1345
&__25)) * ro * ro * u(2,i,j,k)
tmat(5,3) = dt * 2.0d+00 * (tx1 * (c34__24 - c1345__25
&) + ty1 * (r43__26 * c34__24 - c1345__25) + tz1 * (c34__24 - c1345
&__25)) * ro * ro * u(3,i,j,k)
tmat(5,4) = dt * 2.0d+00 * (tx1 * (c34__24 - c1345__25
&) + ty1 * (c34__24 - c1345__25) + tz1 * (r43__26 * c34__24 - c1345
&__25)) * ro * ro * u(4,i,j,k)
tmat(5,5) = 1.0d+00 + dt * 2.0d+00 * (tx1 + ty1 + tz1)
& * c1345__25 * ro + dt * 2.0d+00 * (tx1 * dx5 + ty1 * dy5 + tz1 *
&dz5)
tmp1 = 1.0d+00 / tmat(1,1)
tmp2 = tmp1 * tmat(2,1)
tmat(2,2) = tmat(2,2) - tmp2 * tmat(1,2)
tmat(2,3) = tmat(2,3) - tmp2 * tmat(1,3)
tmat(2,4) = tmat(2,4) - tmp2 * tmat(1,4)
tmat(2,5) = tmat(2,5) - tmp2 * tmat(1,5)
tv(2) = tv(2) - tv(1) * tmp2
tmp2 = tmp1 * tmat(3,1)
tmat(3,2) = tmat(3,2) - tmp2 * tmat(1,2)
tmat(3,3) = tmat(3,3) - tmp2 * tmat(1,3)
tmat(3,4) = tmat(3,4) - tmp2 * tmat(1,4)
tmat(3,5) = tmat(3,5) - tmp2 * tmat(1,5)
tv(3) = tv(3) - tv(1) * tmp2
tmp2 = tmp1 * tmat(4,1)
tmat(4,2) = tmat(4,2) - tmp2 * tmat(1,2)
tmat(4,3) = tmat(4,3) - tmp2 * tmat(1,3)
tmat(4,4) = tmat(4,4) - tmp2 * tmat(1,4)
tmat(4,5) = tmat(4,5) - tmp2 * tmat(1,5)
tv(4) = tv(4) - tv(1) * tmp2
tmp2 = tmp1 * tmat(5,1)
tmat(5,2) = tmat(5,2) - tmp2 * tmat(1,2)
tmat(5,3) = tmat(5,3) - tmp2 * tmat(1,3)
tmat(5,4) = tmat(5,4) - tmp2 * tmat(1,4)
tmat(5,5) = tmat(5,5) - tmp2 * tmat(1,5)
tv(5) = tv(5) - tv(1) * tmp2
tmp1 = 1.0d+00 / tmat(2,2)
tmp2 = tmp1 * tmat(3,2)
tmat(3,3) = tmat(3,3) - tmp2 * tmat(2,3)
tmat(3,4) = tmat(3,4) - tmp2 * tmat(2,4)
tmat(3,5) = tmat(3,5) - tmp2 * tmat(2,5)
tv(3) = tv(3) - tv(2) * tmp2
tmp2 = tmp1 * tmat(4,2)
tmat(4,3) = tmat(4,3) - tmp2 * tmat(2,3)
tmat(4,4) = tmat(4,4) - tmp2 * tmat(2,4)
tmat(4,5) = tmat(4,5) - tmp2 * tmat(2,5)
tv(4) = tv(4) - tv(2) * tmp2
tmp2 = tmp1 * tmat(5,2)
tmat(5,3) = tmat(5,3) - tmp2 * tmat(2,3)
tmat(5,4) = tmat(5,4) - tmp2 * tmat(2,4)
tmat(5,5) = tmat(5,5) - tmp2 * tmat(2,5)
tv(5) = tv(5) - tv(2) * tmp2
tmp1 = 1.0d+00 / tmat(3,3)
tmp2 = tmp1 * tmat(4,3)
tmat(4,4) = tmat(4,4) - tmp2 * tmat(3,4)
tmat(4,5) = tmat(4,5) - tmp2 * tmat(3,5)
tv(4) = tv(4) - tv(3) * tmp2
tmp2 = tmp1 * tmat(5,3)
tmat(5,4) = tmat(5,4) - tmp2 * tmat(3,4)
tmat(5,5) = tmat(5,5) - tmp2 * tmat(3,5)
tv(5) = tv(5) - tv(3) * tmp2
tmp1 = 1.0d+00 / tmat(4,4)
tmp2 = tmp1 * tmat(5,4)
tmat(5,5) = tmat(5,5) - tmp2 * tmat(4,5)
tv(5) = tv(5) - tv(4) * tmp2
tv(5) = tv(5) / tmat(5,5)
tv(4) = tv(4) - tmat(4,5) * tv(5)
tv(4) = tv(4) / tmat(4,4)
tv(3) = tv(3) - tmat(3,4) * tv(4) - tmat(3,5) * tv(5)
tv(3) = tv(3) / tmat(3,3)
tv(2) = tv(2) - tmat(2,3) * tv(3) - tmat(2,4) * tv(4)
&- tmat(2,5) * tv(5)
tv(2) = tv(2) / tmat(2,2)
tv(1) = tv(1) - tmat(1,2) * tv(2) - tmat(1,3) * tv(3)
&- tmat(1,4) * tv(4) - tmat(1,5) * tv(5)
tv(1) = tv(1) / tmat(1,1)
rsd(1,i,j,k) = rsd(1,i,j,k) - tv(1)
rsd(2,i,j,k) = rsd(2,i,j,k) - tv(2)
rsd(3,i,j,k) = rsd(3,i,j,k) - tv(3)
rsd(4,i,j,k) = rsd(4,i,j,k) - tv(4)
rsd(5,i,j,k) = rsd(5,i,j,k) - tv(5)
enddo
enddo
enddo
!---------------------------------------------------------------------
! update the variables
!---------------------------------------------------------------------
! if (timeron) then
! call timer_start(10)
! endif
!DVM$ PARALLEL (k,j,i,m) ON u(m,i,j,k), PRIVATE (j,m,i,k)
do k = 1,nz
do j = jst,jend
do i = ist,iend
do m = 1,5
u(m,i,j,k) = u(m,i,j,k) + tmp * rsd(m,i,j,k)
enddo
enddo
enddo
enddo
!DVM$ END REGION
! if (timeron) then
! call timer_stop(10)
! endif
!---------------------------------------------------------------------
! compute the steady-state residuals
!---------------------------------------------------------------------
!DVM$ END INTERVAL
call rhs()
mod_522_2 = mod (istep,inorm)
!---------------------------------------------------------------------
! compute the max-norms of newton iteration residuals
!---------------------------------------------------------------------
if (mod_522_2 .eq. 0 .or. istep .eq. itmax) then
if (timeron) then
call timer_start(11)
endif
call l2norm(isiz1,isiz2,isiz3,nx0,ny0,nz0,ist,iend,jst,jend,
&rsd,rsdnm)
if (timeron) then
call timer_stop(11)
endif
! if ( ipr .eq. 1 ) then
! write (*,1007) ( rsdnm(m), m = 1, 5 )
! end if
endif
!---------------------------------------------------------------------
! check the newton-iteration residuals against the tolerance levels
!---------------------------------------------------------------------
if (rsdnm(1) .lt. tolrsd(1) .and. rsdnm(2) .lt. tolrsd(2) .and.
& rsdnm(3) .lt. tolrsd(3) .and. rsdnm(4) .lt. tolrsd(4) .and. rsdnm
&(5) .lt. tolrsd(5)) then
! if (ipr .eq. 1 ) then
write (unit = *,fmt = 1004) istep
! end if
goto 900
endif
enddo
900 continue
call timer_stop(1)
maxtime = timer_read (1)
return
1001 FORMAT (1X/5X,'pseudo-time SSOR iteration no.=',I4/)
1004 FORMAT (1X/1X,'convergence was achieved after ',I4, ' pseudo-tim
&e steps' )
1006 FORMAT (1X/1X,'RMS-norm of SSOR-iteration correction ', 'for first
& pde = ',1PE12.5/, 1X,'RMS-norm of SSOR-iteration correction ', '
&for second pde = ',1PE12.5/, 1X,'RMS-norm of SSOR-iteration correc
&tion ', 'for third pde = ',1PE12.5/, 1X,'RMS-norm of SSOR-iterati
&on correction ', 'for fourth pde = ',1PE12.5/, 1X,'RMS-norm of SSO
&R-iteration correction ', 'for fifth pde = ',1PE12.5)
1007 FORMAT (1X/1X,'RMS-norm of steady-state residual for ', 'first pde
& = ',1PE12.5/, 1X,'RMS-norm of steady-state residual for ', 'seco
&nd pde = ',1PE12.5/, 1X,'RMS-norm of steady-state residual for ',
&'third pde = ',1PE12.5/, 1X,'RMS-norm of steady-state residual fo
&r ', 'fourth pde = ',1PE12.5/, 1X,'RMS-norm of steady-state residu
&al for ', 'fifth pde = ',1PE12.5)
end

View File

@@ -0,0 +1,97 @@
c---------------------------------------------------------------------
c---------------------------------------------------------------------
subroutine timer_clear(n)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
implicit none
integer n
double precision start(64), elapsed(64)
common /tt/ start, elapsed
elapsed(n) = 0.0
return
end
c---------------------------------------------------------------------
c---------------------------------------------------------------------
subroutine timer_start(n)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
implicit none
external elapsed_time
double precision elapsed_time
integer n
double precision start(64), elapsed(64)
common /tt/ start, elapsed
start(n) = elapsed_time()
return
end
c---------------------------------------------------------------------
c---------------------------------------------------------------------
subroutine timer_stop(n)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
implicit none
external elapsed_time
double precision elapsed_time
integer n
double precision start(64), elapsed(64)
common /tt/ start, elapsed
double precision t, now
now = elapsed_time()
t = now - start(n)
elapsed(n) = elapsed(n) + t
return
end
c---------------------------------------------------------------------
c---------------------------------------------------------------------
double precision function timer_read(n)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
implicit none
integer n
double precision start(64), elapsed(64)
common /tt/ start, elapsed
timer_read = elapsed(n)
return
end
c---------------------------------------------------------------------
c---------------------------------------------------------------------
double precision function elapsed_time()
c---------------------------------------------------------------------
c---------------------------------------------------------------------
implicit none
double precision dvtime
elapsed_time = dvtime()
return
end

View File

@@ -0,0 +1,382 @@
! *** generated by SAPFOR with version 1124 and build date: Apr 18 2019 20:56:38
!---------------------------------------------------------------------
!---------------------------------------------------------------------
subroutine verify (xcr, xce, xci, class, verified)
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! verification routine
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!--- applu.incl
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! npbparams.h defines parameters that depend on the class and
! number of nodes
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!--- applu.incl
!---------------------------------------------------------------------
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! npbparams.h defines parameters that depend on the class and
! number of nodes
!---------------------------------------------------------------------
implicit none
!---------------------------------------------------------------------
! end of include file
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! end of include file
include 'applu.incl'
!---------------------------------------------------------------------
double precision xcr(5),xce(5),xci
double precision xcrref(5),xceref(5),xciref,xcrdif(5),xcedif(5),x
&cidif,epsilon,dtref
integer m
character class
logical verified
!---------------------------------------------------------------------
! tolerance level
!---------------------------------------------------------------------
epsilon = 1.0d-08
class = 'U'
verified = .TRUE.
do m = 1,5
xcrref(m) = 1.0
xceref(m) = 1.0
enddo
xciref = 1.0
if (nx0 .eq. 12 .and. ny0 .eq. 12 .and. nz0 .eq. 12 .and. itmax .e
&q. 50) then
class = 'S'
dtref = 5.0d-1
!---------------------------------------------------------------------
! Reference values of RMS-norms of residual, for the (12X12X12) grid,
! after 50 time steps, with DT = 5.0d-01
!---------------------------------------------------------------------
xcrref(1) = 1.6196343210976702d-02
xcrref(2) = 2.1976745164821318d-03
xcrref(3) = 1.5179927653399185d-03
xcrref(4) = 1.5029584435994323d-03
xcrref(5) = 3.4264073155896461d-02
!---------------------------------------------------------------------
! Reference values of RMS-norms of solution error, for the (12X12X12) grid,
! after 50 time steps, with DT = 5.0d-01
!---------------------------------------------------------------------
xceref(1) = 6.4223319957960924d-04
xceref(2) = 8.4144342047347926d-05
xceref(3) = 5.8588269616485186d-05
xceref(4) = 5.8474222595157350d-05
xceref(5) = 1.3103347914111294d-03
!---------------------------------------------------------------------
! Reference value of surface integral, for the (12X12X12) grid,
! after 50 time steps, with DT = 5.0d-01
!---------------------------------------------------------------------
xciref = 7.8418928865937083d+00
else if (nx0 .eq. 33 .and. ny0 .eq. 33 .and. nz0 .eq. 33 .and. itm
&ax .eq. 300) then
!SPEC95fp size
class = 'W'
dtref = 1.5d-3
!---------------------------------------------------------------------
! Reference values of RMS-norms of residual, for the (33x33x33) grid,
! after 300 time steps, with DT = 1.5d-3
!---------------------------------------------------------------------
xcrref(1) = 0.1236511638192d+02
xcrref(2) = 0.1317228477799d+01
xcrref(3) = 0.2550120713095d+01
xcrref(4) = 0.2326187750252d+01
xcrref(5) = 0.2826799444189d+02
!---------------------------------------------------------------------
! Reference values of RMS-norms of solution error, for the (33X33X33) grid,
!---------------------------------------------------------------------
xceref(1) = 0.4867877144216d+00
xceref(2) = 0.5064652880982d-01
xceref(3) = 0.9281818101960d-01
xceref(4) = 0.8570126542733d-01
xceref(5) = 0.1084277417792d+01
!---------------------------------------------------------------------
! Reference value of surface integral, for the (33X33X33) grid,
! after 300 time steps, with DT = 1.5d-3
!---------------------------------------------------------------------
xciref = 0.1161399311023d+02
else if (nx0 .eq. 64 .and. ny0 .eq. 64 .and. nz0 .eq. 64 .and. itm
&ax .eq. 250) then
class = 'A'
dtref = 2.0d+0
!---------------------------------------------------------------------
! Reference values of RMS-norms of residual, for the (64X64X64) grid,
! after 250 time steps, with DT = 2.0d+00
!---------------------------------------------------------------------
xcrref(1) = 7.7902107606689367d+02
xcrref(2) = 6.3402765259692870d+01
xcrref(3) = 1.9499249727292479d+02
xcrref(4) = 1.7845301160418537d+02
xcrref(5) = 1.8384760349464247d+03
!---------------------------------------------------------------------
! Reference values of RMS-norms of solution error, for the (64X64X64) grid,
! after 250 time steps, with DT = 2.0d+00
!---------------------------------------------------------------------
xceref(1) = 2.9964085685471943d+01
xceref(2) = 2.8194576365003349d+00
xceref(3) = 7.3473412698774742d+00
xceref(4) = 6.7139225687777051d+00
xceref(5) = 7.0715315688392578d+01
!---------------------------------------------------------------------
! Reference value of surface integral, for the (64X64X64) grid,
! after 250 time steps, with DT = 2.0d+00
!---------------------------------------------------------------------
xciref = 2.6030925604886277d+01
else if (nx0 .eq. 102 .and. ny0 .eq. 102 .and. nz0 .eq. 102 .and.
&itmax .eq. 250) then
class = 'B'
dtref = 2.0d+0
!---------------------------------------------------------------------
! Reference values of RMS-norms of residual, for the (102X102X102) grid,
! after 250 time steps, with DT = 2.0d+00
!---------------------------------------------------------------------
xcrref(1) = 3.5532672969982736d+03
xcrref(2) = 2.6214750795310692d+02
xcrref(3) = 8.8333721850952190d+02
xcrref(4) = 7.7812774739425265d+02
xcrref(5) = 7.3087969592545314d+03
!---------------------------------------------------------------------
! Reference values of RMS-norms of solution error, for the (102X102X102)
! grid, after 250 time steps, with DT = 2.0d+00
!---------------------------------------------------------------------
xceref(1) = 1.1401176380212709d+02
xceref(2) = 8.1098963655421574d+00
xceref(3) = 2.8480597317698308d+01
xceref(4) = 2.5905394567832939d+01
xceref(5) = 2.6054907504857413d+02
!---------------------------------------------------------------------
! Reference value of surface integral, for the (102X102X102) grid,
! after 250 time steps, with DT = 2.0d+00
!---------------------------------------------------------------------
xciref = 4.7887162703308227d+01
else if (nx0 .eq. 162 .and. ny0 .eq. 162 .and. nz0 .eq. 162 .and.
&itmax .eq. 250) then
class = 'C'
dtref = 2.0d+0
!---------------------------------------------------------------------
! Reference values of RMS-norms of residual, for the (162X162X162) grid,
! after 250 time steps, with DT = 2.0d+00
!---------------------------------------------------------------------
xcrref(1) = 1.03766980323537846d+04
xcrref(2) = 8.92212458801008552d+02
xcrref(3) = 2.56238814582660871d+03
xcrref(4) = 2.19194343857831427d+03
xcrref(5) = 1.78078057261061185d+04
!---------------------------------------------------------------------
! Reference values of RMS-norms of solution error, for the (162X162X162)
! grid, after 250 time steps, with DT = 2.0d+00
!---------------------------------------------------------------------
xceref(1) = 2.15986399716949279d+02
xceref(2) = 1.55789559239863600d+01
xceref(3) = 5.41318863077207766d+01
xceref(4) = 4.82262643154045421d+01
xceref(5) = 4.55902910043250358d+02
!---------------------------------------------------------------------
! Reference value of surface integral, for the (162X162X162) grid,
! after 250 time steps, with DT = 2.0d+00
!---------------------------------------------------------------------
xciref = 6.66404553572181300d+01
!---------------------------------------------------------------------
! Reference value of surface integral, for the (162X162X162) grid,
! after 250 time steps, with DT = 2.0d+00
!---------------------------------------------------------------------
xciref = 6.66404553572181300d+01
else if (nx0 .eq. 408 .and. ny0 .eq. 408 .and. nz0 .eq. 408 .and.
&itmax .eq. 300) then
class = 'D'
dtref = 1.0d+0
!---------------------------------------------------------------------
! Reference values of RMS-norms of residual, for the (408X408X408) grid,
! after 300 time steps, with DT = 1.0d+00
!---------------------------------------------------------------------
xcrref(1) = 0.4868417937025d+05
xcrref(2) = 0.4696371050071d+04
xcrref(3) = 0.1218114549776d+05
xcrref(4) = 0.1033801493461d+05
xcrref(5) = 0.7142398413817d+05
!---------------------------------------------------------------------
! Reference values of RMS-norms of solution error, for the (408X408X408)
! grid, after 300 time steps, with DT = 1.0d+00
!---------------------------------------------------------------------
xceref(1) = 0.3752393004482d+03
xceref(2) = 0.3084128893659d+02
xceref(3) = 0.9434276905469d+02
xceref(4) = 0.8230686681928d+02
xceref(5) = 0.7002620636210d+03
!---------------------------------------------------------------------
! Reference value of surface integral, for the (408X408X408) grid,
! after 300 time steps, with DT = 1.0d+00
!---------------------------------------------------------------------
xciref = 0.8334101392503d+02
else if (nx0 .eq. 1020 .and. ny0 .eq. 1020 .and. nz0 .eq. 1020 .an
&d. itmax .eq. 300) then
class = 'E'
dtref = 0.5d+0
!---------------------------------------------------------------------
! Reference values of RMS-norms of residual, for the (1020X1020X1020) grid,
! after 300 time steps, with DT = 0.5d+00
!---------------------------------------------------------------------
xcrref(1) = 0.2099641687874d+06
xcrref(2) = 0.2130403143165d+05
xcrref(3) = 0.5319228789371d+05
xcrref(4) = 0.4509761639833d+05
xcrref(5) = 0.2932360006590d+06
!---------------------------------------------------------------------
! Reference values of RMS-norms of solution error, for the (1020X1020X1020)
! grid, after 300 time steps, with DT = 0.5d+00
!---------------------------------------------------------------------
xceref(1) = 0.4800572578333d+03
xceref(2) = 0.4221993400184d+02
xceref(3) = 0.1210851906824d+03
xceref(4) = 0.1047888986770d+03
xceref(5) = 0.8363028257389d+03
!---------------------------------------------------------------------
! Reference value of surface integral, for the (1020X1020X1020) grid,
! after 300 time steps, with DT = 0.5d+00
!---------------------------------------------------------------------
xciref = 0.9512163272273d+02
else
verified = .FALSE.
endif
!---------------------------------------------------------------------
! verification test for residuals if gridsize is one of
! the defined grid sizes above (class .ne. 'U')
!---------------------------------------------------------------------
!---------------------------------------------------------------------
! Compute the difference of solution values and the known reference values.
!---------------------------------------------------------------------
do m = 1,5
xcrdif(m) = dabs ((xcr(m) - xcrref(m)) / xcrref(m))
xcedif(m) = dabs ((xce(m) - xceref(m)) / xceref(m))
enddo
xcidif = dabs ((xci - xciref) / xciref)
!---------------------------------------------------------------------
! Output the comparison of computed results to known cases.
!---------------------------------------------------------------------
if (class .ne. 'U') then
write (unit = *,fmt = 1990) class
1990 format(/, ' Verification being performed for class ', a
&)
write (unit = *,fmt = 2000) epsilon
2000 format(' Accuracy setting for epsilon = ', E20.13)
verified = dabs (dt - dtref) .le. epsilon
if (.not.(verified)) then
class = 'U'
write (unit = *,fmt = 1000) dtref
1000 format(' DT does not match the reference value
& of ', E15.8)
endif
else
write (unit = *,fmt = 1995)
1995 format(' Unknown class')
endif
if (class .ne. 'U') then
write (unit = *,fmt = 2001)
else
write (unit = *,fmt = 2005)
endif
2001 format(' Comparison of RMS-norms of residual')
2005 format(' RMS-norms of residual')
do m = 1,5
if (class .eq. 'U') then
write (unit = *,fmt = 2015) m,xcr(m)
else if (xcrdif(m) .le. epsilon) then
write (unit = *,fmt = 2011) m,xcr(m),xcrref(m),xcrdif(m)
else
verified = .FALSE.
write (unit = *,fmt = 2010) m,xcr(m),xcrref(m),xcrdif(m)
endif
enddo
if (class .ne. 'U') then
write (unit = *,fmt = 2002)
else
write (unit = *,fmt = 2006)
endif
2002 format(' Comparison of RMS-norms of solution error')
2006 format(' RMS-norms of solution error')
do m = 1,5
if (class .eq. 'U') then
write (unit = *,fmt = 2015) m,xce(m)
else if (xcedif(m) .le. epsilon) then
write (unit = *,fmt = 2011) m,xce(m),xceref(m),xcedif(m)
else
verified = .FALSE.
write (unit = *,fmt = 2010) m,xce(m),xceref(m),xcedif(m)
endif
enddo
2010 format(' FAILURE: ', i2, 2x, E20.13, E20.13, E20.13)
2011 format(' ', i2, 2x, E20.13, E20.13, E20.13)
2015 format(' ', i2, 2x, E20.13)
if (class .ne. 'U') then
write (unit = *,fmt = 2025)
else
write (unit = *,fmt = 2026)
endif
2025 format(' Comparison of surface integral')
2026 format(' Surface integral')
if (class .eq. 'U') then
write (unit = *,fmt = 2030) xci
else if (xcidif .le. epsilon) then
write (unit = *,fmt = 2032) xci,xciref,xcidif
else
verified = .FALSE.
write (unit = *,fmt = 2031) xci,xciref,xcidif
endif
2030 format(' ', 4x, E20.13)
2031 format(' FAILURE: ', 4x, E20.13, E20.13, E20.13)
2032 format(' ', 4x, E20.13, E20.13, E20.13)
if (class .eq. 'U') then
write (unit = *,fmt = 2022)
write (unit = *,fmt = 2023)
2022 format(' No reference values provided')
2023 format(' No verification performed')
else if (verified) then
write (unit = *,fmt = 2020)
2020 format(' Verification Successful')
else
write (unit = *,fmt = 2021)
2021 format(' Verification failed')
endif
return
end

View File

@@ -0,0 +1,31 @@
SHELL=/bin/sh
BENCHMARK=mg
BENCHMARKU=MG
include ../config/make.def
include ../sys/make.common
SOURCES = mg.fdv \
mg3p.fdv \
comm3.fdv \
interp.fdv \
norm2u3.fdv \
psinv.fdv \
resid.fdv \
rjrp3.fdv \
setupDVM.fdv \
utilities.fdv \
zran3.fdv
OBJS = ${SOURCES:.fdv=.o}
${PROGRAM}: config $(OBJS)
${FLINK} -o ${PROGRAM} ${OBJS}
%.o: %.fdv npbparams.h globals.h dvmvars.h
${F77} ${FFLAGS} -c -o $@ $<
clean:
rm -f npbparams.h
rm -f *.o *~
rm -f *.cu *.cuf *.c *.f

View File

@@ -0,0 +1,12 @@
@echo off
set CLASS=%1
set OPT=%2
CALL ..\sys\setparams MG %CLASS%
CALL %F77% %OPT% mg 1>out_%CLASS%.txt 2>err_%CLASS%.txt
if exist mg.exe (
copy mg.exe %BIN%\mg.%CLASS%.x.exe
del mg.exe
)

View File

@@ -0,0 +1,88 @@
c---------------------------------------------------------------------
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>.
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>.
c---------------------------------------------------------------------
c @param double precission :: u(n1 ,n2 ,n3) - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
c @param integer :: kk - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
c---------------------------------------------------------------------
subroutine comm3(u,n1,n2,n3,kk)
c---------------------------------------------------------------------
!DVM$ INHERIT u
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c comm3 organizes the communication on all borders
c---------------------------------------------------------------------
implicit none
include 'globals.h'
integer n1, n2, n3, kk, i1, i2, i3
integer blockX, blockY
double precision u(n1,n2,n3)
!DVM$ interval 5
!DVM$ REGION
!, REMOTE_ACCESS (u(n1-1,:,:))
!DVM$ PARALLEL (i3,i2) ON u(1,i2,i3)
CDVM$& ,cuda_block(128)
do i3=2,n3-1
do i2=2,n2-1
u(1,i2,i3) = u(n1-1,i2,i3)
enddo
enddo
!, REMOTE_ACCESS (u(2,:,:))
!DVM$ PARALLEL (i3,i2) ON u(n1,i2,i3 )
CDVM$& ,cuda_block(128)
do i3=2,n3-1
do i2=2,n2-1
u(n1,i2,i3) = u(2,i2,i3)
enddo
enddo
c----------------------------------
!, REMOTE_ACCESS (u(:,n2-1,:))
!DVM$ PARALLEL (i3,i1) ON u(i1,1,i3)
CDVM$& ,cuda_block(128)
do i3=2,n3-1
do i1=1,n1
u(i1,1,i3) = u(i1,n2-1,i3)
enddo
enddo
! , REMOTE_ACCESS (u(:,2,:))
!DVM$ PARALLEL (i3,i1) ON u(i1,n2,i3)
CDVM$& ,cuda_block(128)
do i3=2,n3-1
do i1=1,n1
u(i1,n2,i3) = u(i1,2,i3)
enddo
enddo
c----------------------------------
!, REMOTE_ACCESS (u(:,:,n3-1))
!DVM$ PARALLEL (i2,i1) ON u(i1,i2,1)
CDVM$& ,cuda_block(128)
do i2=1,n2
do i1=1,n1
u(i1,i2,1) = u(i1,i2,n3-1)
enddo
enddo
!, REMOTE_ACCESS (u(:,:,2))
!DVM$ PARALLEL (i2,i1) ON u(i1,i2,n3)
CDVM$& ,cuda_block(128)
do i2=1,n2
do i1=1,n1
u(i1,i2,n3) = u(i1,i2,2)
enddo
enddo
!DVM$ END REGION
!DVM$ end interval
if (timeron) call timer_stop(T_comm3)
return
end

View File

@@ -0,0 +1,57 @@
c---------------------------------------------------------------------
c FDVM specifications
c---------------------------------------------------------------------
integer p_u_ir(maxlevel) !p_u_ir(k) u(ir(k))
integer p_r_ir(maxlevel) !p_u_ir(k) r(ir(k))
integer pu !pu u(lt) p_u_ir(lt)
integer pr !pr r(lt) p_r_ir(lt)
integer pv !pv v
integer p_curr_u_k !p_curr_u_k u(k)
integer p_curr_u_j !p_curr_u_j u(j)
integer p_curr_r_k !p_curr_r_k r(k)
integer p_curr_r_j !p_curr_r_j r(j)
common /pointers/ p_u_ir, p_r_ir
common /pointers/ pu, pr, pv
common /pointers/ p_curr_r_j, p_curr_r_k
common /pointers/ p_curr_u_j, p_curr_u_k
CDVM$ DOUBLE PRECISION, POINTER(:,:,:) :: p_u_ir, p_r_ir,
CDVM$& pu, pr, pv,
CDVM$& p_curr_r_j, p_curr_r_k,
CDVM$& p_curr_u_j, p_curr_u_k
CDVM$ ALIGN :: pu, pr, pv,
CDVM$& p_u_ir, p_r_ir,
CDVM$& p_curr_r_k, p_curr_r_j,
CDVM$& p_curr_u_k, p_curr_u_j
CDVM$ DYNAMIC p_u_ir, p_r_ir,
CDVM$& pu, pr, pv,
CDVM$& p_curr_r_j, p_curr_r_k,
CDVM$& p_curr_u_j, p_curr_u_k
CDVM$ SHADOW pu(1:1,1:1,1:1)
CDVM$ SHADOW pr(1:1,1:1,1:1)
CDVM$ SHADOW p_curr_r_k(1:1,1:1,1:1)
CDVM$ SHADOW p_curr_u_k(1:1,1:1,1:1)
CDVM$ SHADOW p_curr_u_j(1:1,1:1,1:1)
CDVM$ TEMPLATE EXT (nv1, nv2, nv3)
CDVM$ DISTRIBUTE EXT (BLOCK, BLOCK, BLOCK)
c---------------------------------------------------------------------
c Distribution from programm
c---------------------------------------------------------------------
double precision u(nr), r(nr)
double precision v(nv)
common /noautom/ u,r,v
CDVM$ HEAP u, r, v
double precision a(0:3), c(0:3)
common /coefficients/ a,c
CDVM$ DISTRIBUTE (*) :: a, c

View File

@@ -0,0 +1,68 @@
c---------------------------------------------------------------------
c Parameter lm (declared and set in "npbparams.h") is the log-base2 of
c the edge size max for the partition on a given node, so must be changed
c either to save space (if running a small case) or made bigger for larger
c cases, for example, 512^3. Thus lm=7 means that the largest dimension
c of a partition that can be solved on a node is 2^7 = 128. lm is set
c automatically in npbparams.h
c Parameters ndim1, ndim2, ndim3 are the local problem dimensions.
c---------------------------------------------------------------------
include 'npbparams.h'
integer nm ! actual dimension including ghost cells for communications
c *** type of nv, nr and ir is set in npbparams.h
c > , nv ! size of rhs array
c > , nr ! size of residual array
> , nm2 ! size of communication buffer
> , maxlevel! maximum number of levels
integer nv1, nv2, nv3
parameter( nv1=one*(2+2**ndim1) )
parameter( nv2=one*(2+2**ndim2) )
parameter( nv3=one*(2+2**ndim3) )
parameter( nm=2+2**lm, maxlevel=(lt_default+1) )
parameter( nm2=2*nm*nm)
parameter( nv=nv1*nv2*nv3/one/one )
parameter( nr = ((nv+nm**2+5*nm+7*lm+6)/7)*8 )
c---------------------------------------------------------------------
integer nbr(3,-1:1,maxlevel), msg_type(3,-1:1)
integer msg_id(3,-1:1,2),nx(maxlevel),ny(maxlevel),nz(maxlevel)
common /mg3/ nbr,msg_type,msg_id,nx,ny,nz
character class
common /ClassType/class
integer debug_vec(0:7)
common /my_debug/ debug_vec
integer m1(maxlevel), m2(maxlevel), m3(maxlevel)
integer lt, lb
common /fap/ ir(maxlevel),m1,m2,m3,lt,lb
logical ver
! FALSE for GPU and TRUE for CPU
parameter (ver = .false. )
logical dead(maxlevel), give_ex(3,maxlevel), take_ex(3,maxlevel)
common /comm_ex/ dead, give_ex, take_ex
c---------------------------------------------------------------------
c Set at m=1024, can handle cases up to 1024^3 case
c---------------------------------------------------------------------
integer m
c parameter( m=1037 )
parameter( m=nm+1 )
double precision buff(nm2,4)
common /buffer/ buff
logical timeron
common /timers/ timeron
integer T_init, T_bench, T_psinv, T_resid, T_rprj3, T_interp,
> T_norm2, T_mg3P, T_resid2, T_comm3, T_last
parameter (T_init=1, T_bench=2, T_mg3P=3,
> T_psinv=4, T_resid=5, T_resid2=6, T_rprj3=7,
> T_interp=8, T_norm2=9, T_comm3=10, T_last=10)

View File

@@ -0,0 +1,169 @@
c---------------------------------------------------------------------
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
c---------------------------------------------------------------------
c u(h) = u(h) + Q u(H)
c <20><><EFBFBD> H = 2h - <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>,
c Q - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
c u - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
c---------------------------------------------------------------------
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>,
c <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> V-<2D><><EFBFBD><EFBFBD><EFBFBD>
c---------------------------------------------------------------------
c @param double precission :: z(mm1,mm2,mm3) ? u(H) - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
c @param double precission :: u(n1 ,n2 ,n3 ) ? u(h) - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
c @param integer :: k - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
c---------------------------------------------------------------------
subroutine interp( z,mm1,mm2,mm3,u,n1,n2,n3,k )
c---------------------------------------------------------------------
!DVM$ INHERIT z,u
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c interp adds the trilinear interpolation of the correction
c from the coarser grid to the current approximation: u = u + Qu'
c
c Observe that this implementation costs 16A + 4M, where
c A and M denote the costs of Addition and Multiplication.
c Note that this vectorizes, and is also fine for cache
c based machines. Vector machines may get slightly better
c performance however, with 8 separate "do i1" loops, rather than 4.
c---------------------------------------------------------------------
implicit none
include 'globals.h'
integer mm1, mm2, mm3, n1, n2, n3,k
double precision z(mm1,mm2,mm3),u(n1,n2,n3),z1,z1_p1,z2,z2_p1
double precision z3,z3_p1,z4,z4_p1
integer i3, i2, i1, d1, d2, d3, t1, t2, t3
!DVM$ interval 1
if( n1 .ne. 3 .and. n2 .ne. 3 .and. n3 .ne. 3 ) then
if ( ver ) then
!DVM$ REGION
!DVM$ PARALLEL (i3,i2) ON u(*, 2*i2, 2*i3),
!DVM$& SHADOW_RENEW(z(CORNER)), cuda_block(32,6)
!DVM$& ,private(i1,z1,z1_p1, z2,z2_p1, z3,z3_p1, z4,z4_p1)
do i3=1,mm3-1
do i2=1,mm2-1
z1 = z(1,i2,i3)
z2 = z(1,i2+1,i3+1)
z3 = z(1,i2,i3+1)
z4 = z(1,i2+1,i3)
do i1=1, mm1-1
z1_p1 = z(i1+1,i2,i3)
z2_p1 = z(i1+1,i2+1,i3+1)
z3_p1 = z(i1+1,i2,i3+1)
z4_p1 = z(i1+1,i2+1,i3)
u(2*i1-1,2*i2-1,2*i3-1) = u(2*i1-1,2*i2-1,2*i3-1) + z1
u(2*i1,2*i2-1,2*i3-1) = u(2*i1,2*i2-1,2*i3-1) +
& 0.5d0 * ( z1_p1 + z1 )
u(2*i1-1,2*i2,2*i3-1) = u(2*i1-1,2*i2,2*i3-1) +
& 0.5d0 * ( z4 + z1 )
u(2*i1,2*i2,2*i3-1) = u(2*i1,2*i2,2*i3-1) +
& 0.25d0*( z4 + z1 + z4_p1 + z1_p1)
u(2*i1-1,2*i2-1,2*i3) = u(2*i1-1,2*i2-1,2*i3) +
& +0.5d0 * ( z3 + z1 )
u(2*i1,2*i2-1,2*i3) = u(2*i1,2*i2-1,2*i3) +
& 0.25d0*( z3 + z1 + z3_p1 + z1_p1)
u(2*i1-1,2*i2,2*i3) = u(2*i1-1,2*i2,2*i3) +
& 0.25d0* (z2 + z3 + z4 + z1 )
u(2*i1,2*i2,2*i3) = u(2*i1,2*i2,2*i3) +
& 0.125d0*( z2 + z3 + z4 + z1 + z2_p1 + z3_p1
& + z4_p1 + z1_p1 )
z1 = z1_p1
z2 = z2_p1
z3 = z3_p1
z4 = z4_p1
enddo
enddo
enddo
!DVM$ END REGION
else
!DVM$ REGION
!DVM$ PARALLEL (i3,i1) ON u(2*i1, *, 2*i3),
!DVM$& SHADOW_RENEW(z(CORNER)), cuda_block(32,6)
!DVM$& ,private(i2,z1,z1_p1, z2,z2_p1, z3,z3_p1, z4,z4_p1)
do i3=1,mm3-1
do i1=1, mm1-1
z1 = z(i1,1,i3)
z2 = z(i1+1,1,i3)
z3 = z(i1,1,i3+1)
z4 = z(i1+1,1,i3+1)
do i2=1,mm2-1
z1_p1 = z(i1,i2+1,i3)
z2_p1 = z(i1+1,i2+1,i3)
z3_p1 = z(i1,i2+1,i3+1)
z4_p1 = z(i1+1,i2+1,i3+1)
u(2*i1-1,2*i2-1,2*i3-1) = u(2*i1-1,2*i2-1,2*i3-1) + z1
u(2*i1,2*i2-1,2*i3-1) = u(2*i1,2*i2-1,2*i3-1) +
& 0.5d0 * ( z2 + z1 )
u(2*i1-1,2*i2,2*i3-1) = u(2*i1-1,2*i2,2*i3-1) +
& 0.5d0 * ( z1_p1 + z1 )
u(2*i1,2*i2,2*i3-1) = u(2*i1,2*i2,2*i3-1) +
& 0.25d0*( z1_p1 + z1 + z2_p1 + z2)
u(2*i1-1,2*i2-1,2*i3) = u(2*i1-1,2*i2-1,2*i3) +
& +0.5d0 * ( z3 + z1 )
u(2*i1,2*i2-1,2*i3) = u(2*i1,2*i2-1,2*i3) +
& 0.25d0*( z3 + z1 + z4 + z2)
u(2*i1-1,2*i2,2*i3) = u(2*i1-1,2*i2,2*i3) +
& 0.25d0* (z3_p1 + z3 + z1_p1 + z1 )
u(2*i1,2*i2,2*i3) = u(2*i1,2*i2,2*i3) +
& 0.125d0*( z3_p1 + z3 + z1_p1 + z1 +
& z4_p1 + z4 + z2_p1 + z2 )
z1 = z1_p1
z2 = z2_p1
z3 = z3_p1
z4 = z4_p1
enddo
enddo
enddo
!DVM$ END REGION
endif
else
if(n1.eq.3) then; d1 = 2; t1 = 1; else; d1 = 1; t1 = 0; endif
if(n2.eq.3) then; d2 = 2; t2 = 1; else; d2 = 1; t2 = 0; endif
if(n3.eq.3) then; d3 = 2; t3 = 1; else; d3 = 1; t3 = 0; endif
!DVM$ REGION
!DVM$ PARALLEL (i3,i2,i1) ON u(2*i1-d1,2*i2-d2,2*i3-d3),
!DVM$& SHADOW_RENEW(z(CORNER)), PRIVATE(i3,i2,i1)
!DVM$& ,cuda_block(32,6,1)
do i3=d3,mm3-1
do i2=d2,mm2-1
do i1=d1,mm1-1
u(2*i1-d1,2*i2-d2,2*i3-d3)=u(2*i1-d1,2*i2-d2,2*i3-d3)
& +z(i1,i2,i3)
u(2*i1-t1,2*i2-d2,2*i3-d3)=u(2*i1-t1,2*i2-d2,2*i3-d3)
& +0.5D0*(z(i1+1,i2,i3)+z(i1,i2,i3))
u(2*i1-d1,2*i2-t2,2*i3-d3)=u(2*i1-d1,2*i2-t2,2*i3-d3)
& +0.5D0*(z(i1,i2+1,i3)+z(i1,i2,i3))
u(2*i1-t1,2*i2-t2,2*i3-d3)=u(2*i1-t1,2*i2-t2,2*i3-d3)
& +0.25D0*(z(i1+1,i2+1,i3)+z(i1+1,i2,i3)
& +z(i1, i2+1,i3)+z(i1, i2,i3))
u(2*i1-d1,2*i2-d2,2*i3-t3)=u(2*i1-d1,2*i2-d2,2*i3-t3)
& +0.5D0*(z(i1,i2,i3+1)+z(i1,i2,i3))
u(2*i1-t1,2*i2-d2,2*i3-t3)=u(2*i1-t1,2*i2-d2,2*i3-t3)
& +0.25D0*(z(i1+1,i2,i3+1)+z(i1,i2,i3+1)
& +z(i1+1,i2,i3 )+z(i1,i2,i3 ))
u(2*i1-d1,2*i2-t2,2*i3-t3)=u(2*i1-d1,2*i2-t2,2*i3-t3)
& +0.25D0*(z(i1,i2+1,i3+1)+z(i1,i2,i3+1)
& +z(i1,i2+1,i3 )+z(i1,i2,i3 ))
u(2*i1-t1,2*i2-t2,2*i3-t3)=u(2*i1-t1,2*i2-t2,2*i3-t3)
& +0.125D0*(z(i1+1,i2+1,i3+1)+z(i1+1,i2,i3+1)
& +z(i1 ,i2+1,i3+1)+z(i1 ,i2,i3+1)
& +z(i1+1,i2+1,i3 )+z(i1+1,i2,i3 )
& +z(i1 ,i2+1,i3 )+z(i1 ,i2,i3 ))
enddo
enddo
enddo
!DVM$ END REGION
endif
!DVM$ end interval
return
end

View File

@@ -0,0 +1,369 @@
!-------------------------------------------------------------------------!
! !
! N A S P A R A L L E L B E N C H M A R K S 3.3 !
! !
! D V M V E R S I O N !
! !
! M G !
! !
!-------------------------------------------------------------------------!
! !
! This benchmark is an OpenMP version of the NPB MG code. !
! It is described in NAS Technical Report 99-011. !
! !
! Permission to use, copy, distribute and modify this software !
! for any purpose with or without fee is hereby granted. We !
! request, however, that all derived work reference the NAS !
! Parallel Benchmarks 3.3. This software is provided "as is" !
! without express or implied warranty. !
! !
! Information on NPB 3.3, including the technical report, the !
! original specifications, source code, results and information !
! on how to submit new results, is available at: !
! !
! http://www.nas.nasa.gov/Software/NPB/ !
! !
! Send comments or suggestions to npb@nas.nasa.gov !
! !
! NAS Parallel Benchmarks Group !
! NASA Ames Research Center !
! Mail Stop: T27A-1 !
! Moffett Field, CA 94035-1000 !
! !
! E-mail: npb@nas.nasa.gov !
! Fax: (650) 604-3957 !
! !
!-------------------------------------------------------------------------!
c---------------------------------------------------------------------
c
c Authors:
c Original:
c E. Barszcz
c P. Frederickson
c A. Woo
c M. Yarrow
c H. Jin
c DVM/DVMH vesion:
c A. Shubert
c Optimized for DVM/DVMH:
c A. Kolganov
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c Программа решающая уравнение Пуасона многосеточным методом (V-цикл)
c---------------------------------------------------------------------
program mg
c---------------------------------------------------------------------
implicit none
include 'globals.h'
include 'dvmvars.h'
c---------------------------------------------------------------------------c
c k is the current level. It is passed down through subroutine args
c and is NOT global. it is the current iteration
c---------------------------------------------------------------------------c
integer k, it, pdim, pi
external timer_read
double precision t, tinit, mflops, timer_read
c---------------------------------------------------------------------------c
c These arrays are in common because they are quite large
c and probably shouldn't be allocated on the stack. They
c are always passed as subroutine args.
c---------------------------------------------------------------------------c
double precision rnm2, rnmu, epsilon
integer n1, n2, n3, nit
double precision nn, verify_value, err, t_1,t_2
logical verified
integer i, fstatus
character t_names(t_last)*8
double precision tmax, elapsed_time
do i = T_init, T_last
call timer_clear(i)
end do
c---------------------------------------------------------------------
c Read in and broadcast input data
c---------------------------------------------------------------------
open(unit=7,file='timer.flag', status='old', iostat=fstatus)
if (fstatus .eq. 0) then
timeron = .true.
t_names(t_init) = 'init'
t_names(t_bench) = 'benchmk'
t_names(t_mg3P) = 'mg3P'
t_names(t_psinv) = 'psinv'
t_names(t_resid) = 'resid'
t_names(t_rprj3) = 'rprj3'
t_names(t_interp) = 'interp'
t_names(t_norm2) = 'norm2'
t_names(t_comm3) = 'comm3'
close(7)
else
timeron = .false.
endif
write (*, 1000)
open(unit=7,file="mg.input", status="old", iostat=fstatus)
if (fstatus .eq. 0) then
write(*,50)
50 format(' Reading from input file mg.input')
read(7,*) lt
read(7,*) nx(lt), ny(lt), nz(lt)
read(7,*) nit
read(7,*) (debug_vec(i),i=0,7)
else
write(*,51)
51 format(' No input file. Using compiled defaults ')
lt = lt_default
nit = nit_default
nx(lt) = nx_default
ny(lt) = ny_default
nz(lt) = nz_default
do i = 0,7
debug_vec(i) = debug_default
end do
endif
if ( (nx(lt) .ne. ny(lt)) .or. (nx(lt) .ne. nz(lt)) ) then
Class = 'U'
else if( nx(lt) .eq. 32 .and. nit .eq. 4 ) then
Class = 'S'
else if( nx(lt) .eq. 128 .and. nit .eq. 4 ) then
Class = 'W'
else if( nx(lt) .eq. 256 .and. nit .eq. 4 ) then
Class = 'A'
else if( nx(lt) .eq. 256 .and. nit .eq. 20 ) then
Class = 'B'
else if( nx(lt) .eq. 512 .and. nit .eq. 20 ) then
Class = 'C'
else if( nx(lt) .eq. 1024 .and. nit .eq. 50 ) then
Class = 'D'
else if( nx(lt) .eq. 2048 .and. nit .eq. 50 ) then
Class = 'E'
else
Class = 'U'
endif
c---------------------------------------------------------------------
c Use these for debug info:
c---------------------------------------------------------------------
c debug_vec(0) = 1 !=> report all norms
c debug_vec(1) = 1 !=> some setup information
c debug_vec(1) = 2 !=> more setup information
c debug_vec(2) = k => at level k or below, show result of resid
c debug_vec(3) = k => at level k or below, show result of psinv
c debug_vec(4) = k => at level k or below, show result of rprj
c debug_vec(5) = k => at level k or below, show result of interp
c debug_vec(6) = 1 => (unused)
c debug_vec(7) = 1 => (unused)
c---------------------------------------------------------------------
a(0) = -8.0D0/3.0D0
a(1) = 0.0D0
a(2) = 1.0D0/6.0D0
a(3) = 1.0D0/12.0D0
if(Class .eq. 'A' .or. Class .eq. 'S'.or. Class .eq.'W') then
c---------------------------------------------------------------------
c Coefficients for the S(a) smoother
c---------------------------------------------------------------------
c(0) = -3.0D0/8.0D0
c(1) = +1.0D0/32.0D0
c(2) = -1.0D0/64.0D0
c(3) = 0.0D0
else
c---------------------------------------------------------------------
c Coefficients for the S(b) smoother
c---------------------------------------------------------------------
c(0) = -3.0D0/17.0D0
c(1) = +1.0D0/33.0D0
c(2) = -1.0D0/61.0D0
c(3) = 0.0D0
endif
lb = 1
k = lt
c**********************************************************************
c**********************************************************************
c********************* START HERE *************************************
c**********************************************************************
c**********************************************************************
call setup(n1,n2,n3,k)
call setupDVM(0)
call timer_start(T_init)
call zero3(u(pu),n1,n2,n3)
call zran3(v(pv),n1,n2,n3,nx(lt),ny(lt),k,class)
write (*, 1001) nx(lt),ny(lt),nz(lt), Class
write (*, 1002) nit
write (*, *)
pdim = PROCESSORS_RANK()
write (*, 310) pdim
do pi=1, pdim
write (*, 311) pi, PROCESSORS_SIZE(pi)
enddo
write (*, *) ' '
310 format(' Processors grid rank: ', i4)
311 format(' Grid dimension [', i4, '] size: ', i4)
1000 format(//,' NAS Parallel Benchmarks (NPB3.3-DVMH)',
> ' - MG Benchmark', /)
1001 format(' Size: ', i4, 'x', i4, 'x', i4, ' (class ', A, ')' )
1002 format(' Iterations: ', i5)
call resid(u(pu),v(pv),r(pr),n1,n2,n3,a,k)
c---------------------------------------------------------------------
c One iteration for startup
c---------------------------------------------------------------------
call mg3P(n1,n2,n3,k)
call resid(u(pu),v(pv),r(pr),n1,n2,n3,a,k)
call norm2u3(r(pr),n1,n2,n3,rnm2,rnmu,nx(lt),ny(lt),nz(lt))
call setup(n1,n2,n3,k)
call zero3(u(pu),n1,n2,n3)
call zran3(v(pv),n1,n2,n3,nx(lt),ny(lt),k,class)
call timer_stop(T_init)
tinit = timer_read(T_init)
write( *,'(A,F15.3,A/)' )
> ' Initialization time: ',tinit, ' seconds'
do i = T_bench, T_last
call timer_clear(i)
end do
call timer_start(T_bench)
call resid(u(pu),v(pv),r(pr),n1,n2,n3,a,k)
call norm2u3(r(pr),n1,n2,n3,rnm2,rnmu,nx(lt),ny(lt),nz(lt))
! временный вызов
! call timer_start(T_bench)
c*****************************************************************
c********************MAIN LOOP ***********************************
c*****************************************************************
do it = 1, nit
if (it.eq.1 .or. it.eq.nit .or. mod(it,5).eq.0) then
write(*,80) it
80 format(' iter ',i3)
endif
call mg3P(n1,n2,n3,k)
call resid(u(pu),v(pv),r(pr),n1,n2,n3,a,k)
enddo
call norm2u3(r(pr),n1,n2,n3,rnm2,rnmu,nx(lt),ny(lt),nz(lt))
call timer_stop(T_bench)
t = timer_read(T_bench)
verified = .FALSE.
verify_value = 0.0
write(*,100)
100 format(/' Benchmark completed ')
epsilon = 1.d-8
if (Class .ne. 'U') then
if(Class.eq.'S') then
verify_value = 0.5307707005734d-04
elseif(Class.eq.'W') then
verify_value = 0.6467329375339d-05
elseif(Class.eq.'A') then
verify_value = 0.2433365309069d-05
elseif(Class.eq.'B') then
verify_value = 0.1800564401355d-05
elseif(Class.eq.'C') then
verify_value = 0.5706732285740d-06
elseif(Class.eq.'D') then
verify_value = 0.1583275060440d-09
elseif(Class.eq.'E') then
verify_value = 0.5630442584711d-10
endif
err = abs( rnm2 - verify_value ) / verify_value
if( err .le. epsilon ) then
verified = .TRUE.
write(*, 200)
write(*, 201) rnm2
write(*, 202) err
200 format(' VERIFICATION SUCCESSFUL ')
201 format(' L2 Norm is ', E20.13)
202 format(' Error is ', E20.13)
else
verified = .FALSE.
write(*, 300)
write(*, 301) rnm2
write(*, 302) verify_value
300 format(' VERIFICATION FAILED')
301 format(' L2 Norm is ', E20.13)
302 format(' The correct L2 Norm is ', E20.13)
endif
else
verified = .FALSE.
write (*, 400)
write (*, 401)
write (*, 201) rnm2
400 format(' Problem size unknown')
401 format(' NO VERIFICATION PERFORMED')
endif
nn = 1.0d0*nx(lt)*ny(lt)*nz(lt)
if( t .ne. 0. ) then
mflops = 58.*nit*nn*1.0D-6 /t
else
mflops = 0.0
endif
call print_results('MG', class, nx(lt), ny(lt), nz(lt),
> nit, t,
> mflops, ' floating point',
> verified, npbversion, compiletime,
> cs1, cs2, cs3, cs4, cs5, cs6, cs7)
600 format( i4, 2e19.12)
c---------------------------------------------------------------------
c More timers
c---------------------------------------------------------------------
if (.not.timeron) goto 999
tmax = timer_read(t_bench)
if (tmax .eq. 0.0) tmax = 1.0
write(*,800)
800 format(' SECTION Time (secs)')
do i=t_bench, t_last
t = timer_read(i)
if (i.eq.t_resid2) then
t = timer_read(T_resid) - t
write(*,820) 'mg-resid', t, t*100./tmax
else
write(*,810) t_names(i), t, t*100./tmax
endif
810 format(2x,a8,':',f9.3,' (',f6.2,'%)')
820 format(' --> ',a8,':',f9.3,' (',f6.2,'%)')
end do
999 continue
end

View File

@@ -0,0 +1,167 @@
c---------------------------------------------------------------------
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> V-<2D><><EFBFBD><EFBFBD><EFBFBD>
c---------------------------------------------------------------------
c @param integer d1, d2, d3 - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
c @param integer k - <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
c---------------------------------------------------------------------
subroutine VDownIteration(d1, d2, d3, k)
implicit none
include 'globals.h'
include 'dvmvars.h'
integer k
integer d1, d2, d3
! !DVM$ GET_ACTUAL(p_curr_r_k)
!write (*,*) 'R(',k,') down: '
!call printMatrix(r(p_curr_r_k), m1(k),m2(k),m3(k));
!stop
call rprj3(
> r(p_curr_r_k),m1(k),m2(k),m3(k),
> r(p_curr_r_j),m1(k-1),m2(k-1),m3(k-1),
> k, d1, d2, d3
> )
! !DVM$ GET_ACTUAL(p_curr_r_j)
!write (*,*) 'R(',k-1,') down: '
!call printMatrix(r(p_curr_r_j), m1(k-1),m2(k-1),m3(k-1));
!stop
return
end
c---------------------------------------------------------------------
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> V-<2D><><EFBFBD><EFBFBD><EFBFBD>
c---------------------------------------------------------------------
c @param integer k - <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
c---------------------------------------------------------------------
subroutine VUpIteration(k)
implicit none
include 'globals.h'
include 'dvmvars.h'
integer k
integer m1k, m2k, m3k
c---------------------------------------------------------------------
c prolongate from level k-1 to k
c---------------------------------------------------------------------
call zero3(u(p_curr_u_k),m1(k),m2(k),m3(k))
call interp(
> u(p_curr_u_j),m1(k-1),m2(k-1),m3(k-1),
> u(p_curr_u_k),m1(k),m2(k),m3(k),
> k
> )
c---------------------------------------------------------------------
c compute residual for level k
c---------------------------------------------------------------------
call resid(
> u(p_curr_u_k),
> r(p_curr_r_k),
> r(p_curr_r_k),
> m1(k),m2(k),m3(k),
> a,k
> )
c---------------------------------------------------------------------
c apply smoother
c---------------------------------------------------------------------
call psinv(r(p_curr_r_k),u(p_curr_u_k),m1(k),m2(k),m3(k),c,k)
return
end
c---------------------------------------------------------------------
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
c---------------------------------------------------------------------
subroutine mg3P(n1,n2,n3,k)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c multigrid V-cycle routine
c---------------------------------------------------------------------
implicit none
include 'globals.h'
include 'dvmvars.h'
integer n1, n2, n3, k
integer j, d1, d2, d3
c---------------------------------------------------------------------
c down cycle.
c restrict the residual from the find grid to the coarse
c---------------------------------------------------------------------
!!DVM$ INTERVAL 1
p_curr_r_j = pr
do k= lt, lb+1 , -1
j = k-1
p_curr_r_k = p_curr_r_j
p_curr_r_j = p_r_ir(j)
if(m1(k).eq.3)then; d1 = 2; else; d1 = 1; endif
if(m2(k).eq.3)then; d2 = 2; else; d2 = 1; endif
if(m3(k).eq.3)then; d3 = 2; else; d3 = 1; endif
call VDownIteration(d1,d2,d3,k)
enddo
!!DVM$ END INTERVAL
c---------------------------------------------------------------------
c compute an approximate solution on the coarsest grid
c---------------------------------------------------------------------
!!DVM$ INTERVAL 5
k = lb
p_curr_u_k = p_u_ir(k)
p_curr_r_k = p_r_ir(k)
call zero3(u(p_curr_u_k),m1(k),m2(k),m3(k))
call psinv(r(p_curr_r_k),u(p_curr_u_k),m1(k),m2(k),m3(k),c,k)
!!DVM$ END INTERVAL
c---------------------------------------------------------------------
c up cycle.
c---------------------------------------------------------------------
!!DVM$ INTERVAL 6
do k = lb+1, lt-1
j = k-1
p_curr_u_j = p_curr_u_k
p_curr_u_k = p_u_ir(k)
p_curr_r_k = p_r_ir(k)
call VUpIteration(k)
enddo
!!DVM$ END INTERVAL
200 continue
j = lt - 1
k = lt
p_curr_u_j = p_u_ir(j)
call interp(u(p_curr_u_j),m1(j) ,m2(j) ,m3(j),
> u(pu),m1(k),m2(k),m3(k),
> k
> )
call resid (u(pu),v(pv),r(pr),m1(k),m2(k),m3(k),a,k)
call psinv (r(pr),u(pu),m1(k),m2(k),m3(k),c,k)
return
end

View File

@@ -0,0 +1,51 @@
c---------------------------------------------------------------------
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
c---------------------------------------------------------------------
subroutine norm2u3(r,n1,n2,n3,rnm2,rnmu,nx,ny,nz)
c---------------------------------------------------------------------
!DVM$ INHERIT r
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c norm2u3 evaluates approximations to the L2 norm and the
c uniform (or L-infinity or Chebyshev) norm, under the
c assumption that the boundaries are periodic or zero. Add the
c boundaries in with half weight (quarter weight on the edges
c and eighth weight at the corners) for inhomogeneous boundaries.
c---------------------------------------------------------------------
implicit none
integer n1, n2, n3, nx, ny, nz
double precision rnm2, rnmu, r(n1,n2,n3)
double precision s, a
integer i3, i2, i1
double precision dn
integer T_norm2
parameter (T_norm2=9)
dn = 1.0d0*nx*ny*nz
s=0.0D0
rnmu = 0.0D0
!DVM$ REGION
!DVM$ PARALLEL (i3,i2,i1) ON r(i1,i2,i3),
!DVM$& REDUCTION(SUM(s), MAX(rnmu)), PRIVATE(a)
!DVM$& ,cuda_block(32,4,1)
do i3=2,n3-1
do i2=2,n2-1
do i1=2,n1-1
s=s+r(i1,i2,i3)**2
a=abs(r(i1,i2,i3))
rnmu=dmax1(rnmu,a)
enddo
enddo
enddo
!DVM$ END REGION
!!DVM$ END INTERVAL
rnm2=sqrt( s / dn )
return
end

View File

@@ -0,0 +1,167 @@
c---------------------------------------------------------------------
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
c---------------------------------------------------------------------
c u(h) = u(h) + C r(h)
c <20><><EFBFBD> C - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>-<2D><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>,
c r - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
c h - <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
c---------------------------------------------------------------------
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>:
c - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD> V-<2D><><EFBFBD><EFBFBD><EFBFBD>;
c - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> V-<2D><><EFBFBD><EFBFBD><EFBFBD>
c---------------------------------------------------------------------
c @param double precission :: r(n1,n2,n3) ? r(h) - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
c @param double precission :: u(n1,n2,n3) ? u(H) - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
c @param double precission :: <20>(3) ? <20> - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
c @param integer :: k - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
c---------------------------------------------------------------------
subroutine psinv( r,u,n1,n2,n3,c,k)
c---------------------------------------------------------------------
!DVM$ INHERIT r,u,c
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c psinv applies an approximate inverse as smoother: u = u + Cr
c
c This implementation costs 15A + 4M per result, where
c A and M denote the costs of Addition and Multiplication.
c Presuming coefficient c(3) is zero (the NPB assumes this,
c but it is thus not a general case), 2A + 1M may be eliminated,
c resulting in 13A + 3M.
c Note that this vectorizes, and is also fine for cache
c based machines.
c---------------------------------------------------------------------
implicit none
include 'globals.h'
integer n1,n2,n3,k
double precision u(n1,n2,n3),r(n1,n2,n3),c(0:3)
double precision c_0,c_1,c_2, r1,r1_m1,r1_p1, r2,r2_m1,r2_p1
double precision r3,r3_m1,r3_p1, r4,r4_m1,r4_p1, r5,r5_m1,r5_p1
integer i3, i2, i1
if(Class .eq. 'A' .or. Class .eq. 'S'.or. Class .eq.'W') then
c_0 = -3.0D0/8.0D0
c_1 = +1.0D0/32.0D0
c_2 = -1.0D0/64.0D0
else
c_0 = -3.0D0/17.0D0
c_1 = +1.0D0/33.0D0
c_2 = -1.0D0/61.0D0
endif
!DVM$ interval 2
if( ver ) then
!DVM$ REGION
!DVM$ PARALLEL (i3,i2) ON u(*,i2,i3),cuda_block(32,6),
!DVM$& SHADOW_RENEW(r(CORNER)),
!DVM$& private(i1,r1,r1_m1,r1_p1,r2,r2_m1,r2_p1,r3,r3_m1,r3_p1,
!DVM$& r4,r4_m1,r4_p1, r5,r5_m1,r5_p1)
do i3=2,n3-1
do i2=2,n2-1
r1_m1=r(1,i2,i3)
r1=r(2,i2,i3)
r2_m1=r(1,i2-1,i3)
r2=r(2,i2-1,i3)
r3_m1=r(1,i2+1,i3)
r3=r(2,i2+1,i3)
r4_m1=r(1,i2,i3+1)
r4=r(2,i2,i3+1)
r5_m1=r(1,i2,i3-1)
r5=r(2,i2,i3-1)
do i1=2,n1-1
r1_p1=r(i1+1,i2,i3)
r2_p1=r(i1+1,i2-1,i3)
r3_p1=r(i1+1,i2+1,i3)
r4_p1=r(i1+1,i2,i3+1)
r5_p1=r(i1+1,i2,i3-1)
u(i1,i2,i3) = u(i1,i2,i3)
& + c_0 * r1
& + c_1 * ( r1_m1 + r1_p1 + r2 + r3 + r5 + r4)
& + c_2 * ( r(i1,i2-1,i3-1) + r(i1,i2+1,i3-1)
& + r(i1,i2-1,i3+1) + r(i1,i2+1,i3+1)
& + r2_m1+r3_m1+r5_m1+r4_m1+r2_p1+r3_p1+r5_p1+r4_p1)
r1_m1 = r1
r1 = r1_p1
r2_m1 = r2
r2 = r2_p1
r3_m1 = r3
r3 = r3_p1
r4_m1 = r4
r4 = r4_p1
r5_m1 = r5
r5 = r5_p1
enddo
enddo
enddo
!DVM$ END REGION
else
!DVM$ REGION
!DVM$ PARALLEL (i3,i1) ON u(i1,*,i3),cuda_block(32,6)
!DVM$& ,private(i2,r1,r1_m1,r1_p1, r2,r2_m1,r2_p1, r3,r3_m1,r3_p1,
!DVM$& r4,r4_m1,r4_p1, r5,r5_m1,r5_p1), SHADOW_RENEW(r(CORNER))
do i3=2,n3-1
do i1=2,n1-1
r1_m1 = r(i1,1,i3)
r1 = r(i1,2,i3)
r2_m1 = r(i1-1,1,i3)
r2 = r(i1-1,2,i3)
r3_m1 = r(i1+1,1,i3)
r3 = r(i1+1,2,i3)
r4_m1 = r(i1,1,i3+1)
r4 = r(i1,2,i3+1)
r5_m1 = r(i1,1,i3-1)
r5 = r(i1,2,i3-1)
do i2=2,n2-1
r1_p1 = r(i1,i2+1,i3)
r2_p1 = r(i1-1,i2+1,i3)
r3_p1 = r(i1+1,i2+1,i3)
r4_p1 = r(i1,i2+1,i3+1)
r5_p1 = r(i1,i2+1,i3-1)
u(i1,i2,i3) = u(i1,i2,i3)
& + c_0 * r1
& + c_1 * ( r2 + r3
& + r1_m1 + r1_p1
& + r4 + r5
& )
& + c_2 * (
& r4_m1 + r4_p1
& + r5_m1 + r5_p1
& + r2_m1 + r2_p1
& + r(i1-1,i2,i3-1) + r(i1-1,i2,i3+1)
& + r3_m1 + r3_p1
& + r(i1+1,i2,i3-1) + r(i1+1,i2,i3+1)
& )
r1_m1 = r1
r1 = r1_p1
r2_m1 = r2
r2 = r2_p1
r3_m1 = r3
r3 = r3_p1
r4_m1 = r4
r4 = r4_p1
r5_m1 = r5
r5 = r5_p1
enddo
enddo
enddo
!DVM$ END REGION
endif
!DVM$ end interval
c---------------------------------------------------------------------
c exchange boundary points
c---------------------------------------------------------------------
call comm3(u,n1,n2,n3,k)
return
end

View File

@@ -0,0 +1,196 @@
c---------------------------------------------------------------------
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
c---------------------------------------------------------------------
c r(h) = v - A u(h)
c <20><><EFBFBD> A - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>-<2D><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>,
c v - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> (<28><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> V-<2D><><EFBFBD><EFBFBD><EFBFBD>, <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>)
c h - <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
c---------------------------------------------------------------------
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> V-<2D><><EFBFBD><EFBFBD><EFBFBD>
c---------------------------------------------------------------------
c @param double precission :: r(n1,n2,n3) ? r(h) - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
c @param double precission :: v(n1,n2,n3) ? v - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
c @param double precission :: a(3) ? A - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
c @param integer :: k - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
c---------------------------------------------------------------------
subroutine resid( u,v,r,n1,n2,n3,a,k )
c---------------------------------------------------------------------
!DVM$ INHERIT r, u, a, v
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c resid computes the residual: r = v - Au
c
c This implementation costs 15A + 4M per result, where
c A and M denote the costs of Addition (or Subtraction) and
c Multiplication, respectively.
c Presuming coefficient a(1) is zero (the NPB assumes this,
c but it is thus not a general case), 3A + 1M may be eliminated,
c resulting in 12A + 3M.
c Note that this vectorizes, and is also fine for cache
c based machines.
c---------------------------------------------------------------------
implicit none
include 'globals.h'
integer n1,n2,n3,k
double precision u(n1,n2,n3),v(n1,n2,n3),r(n1,n2,n3),a(0:3)
integer i3, i2, i1
double precision u1_m1,u1,u1_p1, u2_m1,u2,u2_p1, u3_m1,u3,u3_p1
double precision u4_m1,u4,u4_p1, u5_m1,u5,u5_p1, u6_m1,u6,u6_p1
double precision u7_m1,u7,u7_p1, u8_m1,u8,u8_p1
!DVM$ interval 3
if ( ver ) then
!DVM$ REGION
!DVM$ PARALLEL (i3,i2) ON r(*,i2,i3), cuda_block(32,6)
!DVM$& ,private(i1, u1_m1,u1,u1_p1, u2_m1,u2,u2_p1, u3_m1,u3,u3_p1,
!DVM$& u4_m1,u4,u4_p1, u5_m1,u5,u5_p1, u6_m1,u6,u6_p1,
!DVM$& u7_m1,u7,u7_p1, u8_m1,u8,u8_p1),SHADOW_RENEW(u(CORNER))
do i3=2,n3-1
do i2=2,n2-1
u1_m1 = u(1,i2-1,i3-1)
u1 = u(2,i2-1,i3-1)
u2_m1 = u(1,i2+1,i3+1)
u2 = u(2,i2+1,i3+1)
u3_m1 = u(1,i2-1,i3+1)
u3 = u(2,i2-1,i3+1)
u4_m1 = u(1,i2+1,i3-1)
u4 = u(2,i2+1,i3-1)
u5_m1 = u(1,i2+1,i3)
u5 = u(2,i2+1,i3)
u6_m1 = u(1,i2-1,i3)
u6 = u(2,i2-1,i3)
u7_m1 = u(1,i2,i3-1)
u7 = u(2,i2,i3-1)
u8_m1 = u(1,i2,i3+1)
u8 = u(2,i2,i3+1)
do i1=2,n1-1
u1_p1 = u(i1+1,i2-1,i3-1)
u2_p1 = u(i1+1,i2+1,i3+1)
u3_p1 = u(i1+1,i2-1,i3+1)
u4_p1 = u(i1+1,i2+1,i3-1)
u5_p1 = u(i1+1,i2+1,i3)
u6_p1 = u(i1+1,i2-1,i3)
u7_p1 = u(i1+1,i2,i3-1)
u8_p1 = u(i1+1,i2,i3+1)
r(i1,i2,i3) = v(i1,i2,i3)
& + 8.0D0/3.0D0 * u(i1,i2,i3) - 1.0D0/6.0D0 *
& (u1+u4+u3+u2+u6_m1+u5_m1+u7_m1+u8_m1+u6_p1+u5_p1+u7_p1+u8_p1)
& -1.0D0/12.0D0*(u1_m1+u4_m1+u3_m1+u2_m1+u1_p1+u4_p1+u3_p1+u2_p1)
u1_m1 = u1
u1 = u1_p1
u2_m1 = u2
u2 = u2_p1
u3_m1 = u3
u3 = u3_p1
u4_m1 = u4
u4 = u4_p1
u5_m1 = u5
u5 = u5_p1
u6_m1 = u6
u6 = u6_p1
u7_m1 = u7
u7 = u7_p1
u8_m1 = u8
u8 = u8_p1
enddo
enddo
enddo
!DVM$ END REGION
else
!DVM$ REGION
!DVM$ PARALLEL (i3,i1) ON r(i1,*,i3), cuda_block(32,6)
!DVM$& ,private(i2, u1_m1,u1,u1_p1, u2_m1,u2,u2_p1, u3_m1,u3,u3_p1,
!DVM$& u4_m1,u4,u4_p1, u5_m1,u5,u5_p1, u6_m1,u6,u6_p1,
!DVM$& u7_m1,u7,u7_p1, u8_m1,u8,u8_p1),SHADOW_RENEW(u(CORNER))
do i3=2,n3-1
do i1=2,n1-1
u1_m1 = u(i1,1,i3-1)
u1 = u(i1,2,i3-1)
u2_m1 = u(i1,1,i3+1)
u2 = u(i1,2,i3+1)
u3_m1 = u(i1-1,1,i3)
u3 = u(i1-1,2,i3)
u4_m1 = u(i1-1,1,i3-1)
u4 = u(i1-1,2,i3-1)
u5_m1 = u(i1-1,1,i3+1)
u5 = u(i1-1,2,i3+1)
u6_m1 = u(i1+1,1,i3)
u6 = u(i1+1,2,i3)
u7_m1 = u(i1+1,1,i3-1)
u7 = u(i1+1,2,i3-1)
u8_m1 = u(i1+1,1,i3+1)
u8 = u(i1+1,2,i3+1)
do i2=2,n2-1
u1_p1 = u(i1,i2+1,i3-1)
u2_p1 = u(i1,i2+1,i3+1)
u3_p1 = u(i1-1,i2+1,i3)
u4_p1 = u(i1-1,i2+1,i3-1)
u5_p1 = u(i1-1,i2+1,i3+1)
u6_p1 = u(i1+1,i2+1,i3)
u7_p1 = u(i1+1,i2+1,i3-1)
u8_p1 = u(i1+1,i2+1,i3+1)
r(i1,i2,i3) = v(i1,i2,i3)
& + 8.0D0/3.0D0 * u(i1,i2,i3)
& - 1.0D0/6.0D0 * (
& u1_m1 + u1_p1
& + u2_m1 + u2_p1
& + u3_m1 + u3_p1
& + u4 + u5
& + u6_m1 + u6_p1
& + u7 + u8
& )
& - 1.0D0/12.0D0 * (
& u4_m1 + u4_p1
& + u5_m1 + u5_p1
& + u7_m1 + u7_p1
& + u8_m1 + u8_p1
& )
u1_m1 = u1
u1 = u1_p1
u2_m1 = u2
u2 = u2_p1
u3_m1 = u3
u3 = u3_p1
u4_m1 = u4
u4 = u4_p1
u5_m1 = u5
u5 = u5_p1
u6_m1 = u6
u6 = u6_p1
u7_m1 = u7
u7 = u7_p1
u8_m1 = u8
u8 = u8_p1
enddo
enddo
enddo
!DVM$ END REGION
endif
!DVM$ end interval
c---------------------------------------------------------------------
c exchange boundary data
c---------------------------------------------------------------------
call comm3(r,n1,n2,n3,k)
return
end

View File

@@ -0,0 +1,169 @@
c---------------------------------------------------------------------
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> (<28><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>) <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
c---------------------------------------------------------------------
c r(H) = P r(h)
c <20><><EFBFBD> H = 2h - <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>,
c P - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>,
c r - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
c---------------------------------------------------------------------
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>, <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> V-<2D><><EFBFBD><EFBFBD><EFBFBD>
c---------------------------------------------------------------------
c @param double precission :: r(m1k,m2k,m3k) ? r(h) - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
c @param double precission :: s(m1j,m2j,m3j) ? r(H) - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
c @param double precission :: d1, d2, d3 - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
c @param integer :: k - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
c---------------------------------------------------------------------
subroutine rprj3( r,m1k,m2k,m3k,s,m1j,m2j,m3j,k, d1,d2,d3)
c---------------------------------------------------------------------
!DVM$ INHERIT r,s
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c rprj3 projects onto the next coarser grid,
c using a trilinear Finite Element projection: s = r' = P r
c
c This implementation costs 20A + 4M per result, where
c A and M denote the costs of Addition and Multiplication.
c Note that this vectorizes, and is also fine for cache
c based machines.
c---------------------------------------------------------------------
implicit none
include 'globals.h'
integer m1k, m2k, m3k, m1j, m2j, m3j,k
double precision r(m1k,m2k,m3k), s(m1j,m2j,m3j)
integer j3, j2, j1, i3, i2, i1, d1, d2, d3, j
double precision r1_m1,r1_p1, r2_m1,r2_p1, r3_m1,r3_p1
double precision r4_m1,r4_p1, r5_m1,r5_p1, r6_m1,r6_p1
double precision r7_m1,r7_p1, r8_m1,r8_p1, r9_m1,r9_p1,y2,x2
!DVM$ interval 4
if( ver ) then
!DVM$ REGION
!DVM$ PARALLEL (j3,j2) ON s(*,j2,j3), SHADOW_RENEW(r(CORNER)),
!DVM$& cuda_block(32,6), PRIVATE(i1, i2, i3, j1,
!DVM$& r1_m1,r1_p1, r2_m1,r2_p1, r3_m1,r3_p1, r4_m1,r4_p1, r5_m1,r5_p1
!DVM$&,r6_m1,r6_p1, r7_m1,r7_p1, r8_m1,r8_p1, r9_m1,r9_p1,y2,x2)
do j3=2,m3j-1
do j2=2,m2j-1
i3 = 2*j3-d3
i2 = 2*j2-d2
r1_m1 = r(2*2-d1-1,i2+1,i3)
r2_m1 = r(2*2-d1-1,i2,i3+1)
r3_m1 = r(2*2-d1-1,i2+1,i3+1)
r4_m1 = r(2*2-d1-1,i2-1,i3)
r5_m1 = r(2*2-d1-1,i2,i3-1)
r6_m1 = r(2*2-d1-1,i2-1,i3-1)
r7_m1 = r(2*2-d1-1,i2,i3)
r8_m1 = r(2*2-d1-1,i2-1,i3+1)
r9_m1 = r(2*2-d1-1,i2+1,i3-1)
do j1=2,m1j-1
i1 = 2*j1-d1
r1_p1 = r(i1+1,i2+1,i3)
r2_p1 = r(i1+1,i2,i3+1)
r3_p1 = r(i1+1,i2+1,i3+1)
r4_p1 = r(i1+1,i2-1,i3)
r5_p1 = r(i1+1,i2,i3-1)
r6_p1 = r(i1+1,i2-1,i3-1)
r7_p1 = r(i1+1,i2,i3)
r8_p1 = r(i1+1,i2-1,i3+1)
r9_p1 = r(i1+1,i2+1,i3-1)
y2 = r(i1, i2-1,i3-1) + r(i1, i2-1,i3+1)
& + r(i1, i2+1,i3-1) + r(i1, i2+1,i3+1)
x2 = r(i1, i2-1,i3 ) + r(i1, i2+1,i3 )
& + r(i1, i2, i3-1) + r(i1, i2, i3+1)
s(j1,j2,j3) =
& 0.5D0 * r(i1,i2,i3)
& + 0.25D0 * ( r7_m1 + r7_p1 + x2)
& + 0.125D0 * ( r4_m1 + r1_m1 + r5_m1 + r2_m1 +
& r4_p1 + r1_p1 + r5_p1 + r2_p1 + y2)
& + 0.0625D0 * ( r6_m1 + r8_m1
& + r9_m1 + r3_m1 + r6_p1 + r8_p1 + r9_p1 + r3_p1)
r1_m1 = r1_p1
r2_m1 = r2_p1
r3_m1 = r3_p1
r4_m1 = r4_p1
r5_m1 = r5_p1
r6_m1 = r6_p1
r7_m1 = r7_p1
r8_m1 = r8_p1
r9_m1 = r9_p1
enddo
enddo
enddo
!DVM$ END REGION
else
!DVM$ REGION
!DVM$ PARALLEL (j3,j1) ON s(j1,*,j3), SHADOW_RENEW(r(CORNER)),
!DVM$& cuda_block(32,6), PRIVATE(i1, i2, i3, j2,
!DVM$& r1_m1,r1_p1, r2_m1,r2_p1, r3_m1,r3_p1, r4_m1,r4_p1, r5_m1,r5_p1
!DVM$&,r6_m1,r6_p1, r7_m1,r7_p1, r8_m1,r8_p1, r9_m1,r9_p1)
do j3=2,m3j-1
do j1=2,m1j-1
i3 = 2*j3-d3
i1 = 2*j1-d1
r1_m1 = r(i1,2*2-d2-1,i3)
r2_m1 = r(i1-1,2*2-d2-1,i3)
r3_m1 = r(i1+1,2*2-d2-1,i3)
r4_m1 = r(i1,2*2-d2-1,i3+1)
r5_m1 = r(i1,2*2-d2-1,i3-1)
r6_m1 = r(i1+1,2*2-d2-1,i3+1)
r7_m1 = r(i1+1,2*2-d2-1,i3-1)
r8_m1 = r(i1-1,2*2-d2-1,i3-1)
r9_m1 = r(i1-1,2*2-d2-1,i3+1)
do j2=2,m2j-1
i2 = 2*j2-d2
r1_p1 = r(i1,i2+1,i3)
r2_p1 = r(i1-1,i2+1,i3)
r3_p1 = r(i1+1,i2+1,i3)
r4_p1 = r(i1,i2+1,i3+1)
r5_p1 = r(i1,i2+1,i3-1)
r6_p1 = r(i1+1,i2+1,i3+1)
r7_p1 = r(i1+1,i2+1,i3-1)
r8_p1 = r(i1-1,i2+1,i3-1)
r9_p1 = r(i1-1,i2+1,i3+1)
s(j1,j2,j3) =
& 0.5D0 * r(i1,i2,i3)
& + 0.25D0 * ( r1_m1 + r1_p1 +
& r(i1-1,i2,i3) + r(i1+1,i2,i3)
& + r(i1,i2,i3-1) + r(i1,i2,i3+1))
& + 0.125D0 * (
& r2_m1 + r2_p1 + r3_m1 + r3_p1
& + r5_m1 + r4_m1
& + r5_p1 + r4_p1
& + r(i1-1,i2, i3-1) + r(i1-1,i2, i3+1)
& + r(i1+1,i2, i3-1) + r(i1+1,i2, i3+1))
& + 0.0625D0 * (
& r8_m1 + r9_m1
& + r8_p1 + r9_p1
& + r7_m1 + r6_m1
& + r7_p1 + r6_p1)
r1_m1 = r1_p1
r2_m1 = r2_p1
r3_m1 = r3_p1
r4_m1 = r4_p1
r5_m1 = r5_p1
r6_m1 = r6_p1
r7_m1 = r7_p1
r8_m1 = r8_p1
r9_m1 = r9_p1
enddo
enddo
enddo
!DVM$ END REGION
endif
!DVM$ end interval
j = k-1
call comm3(s,m1j,m2j,m3j,j)
return
end

View File

@@ -0,0 +1,226 @@
c---------------------------------------------------------------------
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> V-<2D><><EFBFBD><EFBFBD><EFBFBD>
c @param integer d1, d2, d3 - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
c @param integer k - <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
c---------------------------------------------------------------------
subroutine dvmAlignDownCycle(d1, d2, d3, k)
implicit none
include 'globals.h'
include 'dvmvars.h'
integer k
integer d1, d2, d3
integer t1, t2, t3
!DVM$ TEMPLATE EXT_V_DOWN (m1(k)+1, m2(k)+1, m3(k)+1)
!DVM$ DISTRIBUTE EXT_V_DOWN (BLOCK, BLOCK, BLOCK)
t1 = d1 - 1; t2 = d2 - 1; t3 = d3 - 1;
!DVM$ REALIGN p_curr_r_j(i,j,k) WITH EXT_V_DOWN(2*i-1, 2*j-1, 2*k-1)
!DVM$ REALIGN p_curr_r_k(i,j,k) WITH EXT_V_DOWN(i+t1, j+t2, k+t3)
return
end
c---------------------------------------------------------------------
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> V-<2D><><EFBFBD><EFBFBD><EFBFBD>
c @param integer d1, d2, d3 - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
c @param integer k - <20><><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD>
c---------------------------------------------------------------------
subroutine dvmAlignUpCycle(k)
implicit none
include 'globals.h'
include 'dvmvars.h'
integer k
!!DVM$ SHADOW p_curr_u_k(1:1,1:1,1:1)
!!DVM$ SHADOW p_curr_u_j(1:1,1:1,1:1)
!DVM$ TEMPLATE EXT_V_UP (m1(k)+1, m2(k)+1, m3(k)+1)
!DVM$ DISTRIBUTE EXT_V_UP (BLOCK, BLOCK, BLOCK)
!DVM$ REALIGN p_curr_u_j(i,j,k) WITH EXT_V_UP(2*i-1, 2*j-1, 2*k-1)
!DVM$ REALIGN p_curr_u_k(i,j,k) WITH EXT_V_UP(i, j, k)
!DVM$ REALIGN p_curr_r_k(i,j,k) WITH EXT_V_UP(i, j, k)
return
end
c---------------------------------------------------------------------
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> DVM
c @param integer isSecond - 1 <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
c---------------------------------------------------------------------
subroutine setupDVM(isSecond)
implicit none
include 'globals.h'
include 'dvmvars.h'
integer allocate, k, j, isSecond
integer pdim(3)
integer d1, d2, d3
!DVM$ TEMPLATE EXT_BOTTOM (m1(lb), m2(lb), m3(lb))
!DVM$ DISTRIBUTE EXT_BOTTOM (BLOCK, BLOCK, BLOCK)
!DVM$ TEMPLATE EXT_LAST (m1(lt)+1, m2(lt)+1, m3(lt)+1)
!DVM$ DISTRIBUTE EXT_LAST (BLOCK, BLOCK, BLOCK)
if (isSecond .eq. 0) then
! <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
do k = lt, 1, -1
pdim =(/ m1(k), m2(k), m3(k) /)
p_u_ir(k) = allocate(pdim, ir(k))
p_r_ir(k) = allocate(pdim, ir(k))
enddo
! <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20> <20><><EFBFBD><EFBFBD> <20><><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
pdim =(/ m1(lt), m2(lt), m3(lt) /)
pv = allocate(pdim, 1)
! <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
pu = p_u_ir(lt)
pr = p_r_ir(lt)
endif
! <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><> <20><><EFBFBD><EFBFBD><EFBFBD>
p_curr_r_j = pr
do k= lt, lb+1 , -1
j = k-1
p_curr_r_k = p_curr_r_j
p_curr_r_j = p_r_ir(j)
if(m1(k).eq.3)then; d1 = 2; else; d1 = 1; endif
if(m2(k).eq.3)then; d2 = 2; else; d2 = 1; endif
if(m3(k).eq.3)then; d3 = 2; else; d3 = 1; endif
call dvmAlignDownCycle(d1, d2, d3, k)
enddo
! <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD>
k = lb
p_curr_u_k = p_u_ir(k)
p_curr_r_k = p_r_ir(k)
!DVM$ REALIGN (i, j, k) WITH EXT_BOTTOM(i,j,k) :: p_curr_u_k
!DVM$ REALIGN (i, j, k) WITH EXT_BOTTOM(i,j,k) :: p_curr_r_k
! <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
do k = lb+1, lt-1
j = k-1
p_curr_u_j = p_curr_u_k
p_curr_u_k = p_u_ir(k)
p_curr_r_k = p_r_ir(k)
call dvmAlignUpCycle(k)
enddo
! <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
j = lt - 1
k = lt
p_curr_u_j = p_u_ir(j)
!DVM$ REALIGN p_curr_u_j(i,j,k) WITH EXT_LAST(2*i-1, 2*j-1, 2*k-1)
!DVM$ REALIGN pu(i,j,k) WITH EXT_LAST(i, j, k)
!DVM$ REALIGN pr(i,j,k) WITH EXT_LAST(i, j, k)
!DVM$ REALIGN pv(i,j,k) WITH EXT_LAST(i, j, k)
end subroutine setupDVM
c---------------------------------------------------------------------
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD> DVM
c---------------------------------------------------------------------
function allocate(dims, disp)
integer allocate
allocate = disp
return
end function allocate
c---------------------------------------------------------------------
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
c---------------------------------------------------------------------
subroutine setup(n1,n2,n3,k)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
implicit none
include 'globals.h'
integer is1, is2, is3, ie1, ie2, ie3
common /grid/ is1,is2,is3,ie1,ie2,ie3
integer n1,n2,n3,k
integer j
integer ax, mi(3,maxlevel)
integer ng(3,maxlevel)
ng(1,lt) = nx(lt)
ng(2,lt) = ny(lt)
ng(3,lt) = nz(lt)
do ax=1,3
do k=lt-1,1,-1
ng(ax,k) = ng(ax,k+1)/2
enddo
enddo
61 format(10i4)
do k=lt,1,-1
nx(k) = ng(1,k)
ny(k) = ng(2,k)
nz(k) = ng(3,k)
enddo
do k = lt,1,-1
do ax = 1,3
mi(ax,k) = 2 + ng(ax,k)
enddo
m1(k) = mi(1,k)
m2(k) = mi(2,k)
m3(k) = mi(3,k)
enddo
k = lt
is1 = 2 + ng(1,k) - ng(1,lt)
ie1 = 1 + ng(1,k)
n1 = 3 + ie1 - is1
is2 = 2 + ng(2,k) - ng(2,lt)
ie2 = 1 + ng(2,k)
n2 = 3 + ie2 - is2
is3 = 2 + ng(3,k) - ng(3,lt)
ie3 = 1 + ng(3,k)
n3 = 3 + ie3 - is3
ir(lt)=1
do j = lt-1, 1, -1
ir(j)=ir(j+1)+one*m1(j+1)*m2(j+1)*m3(j+1)
enddo
if( debug_vec(1) .ge. 1 )then
write(*,*)' in setup, '
write(*,*)' k lt nx ny nz ',
& ' n1 n2 n3 is1 is2 is3 ie1 ie2 ie3'
write(*,9) k,lt,ng(1,k),ng(2,k),ng(3,k),
& n1,n2,n3,is1,is2,is3,ie1,ie2,ie3
9 format(15i4)
endif
k = lt
return
end

View File

@@ -0,0 +1,415 @@
c---------------------------------------------------------------------
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
c---------------------------------------------------------------------
c @param double precission :: z(n1 ,n2 ,n3) - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
c---------------------------------------------------------------------
subroutine zero3(z,n1,n2,n3)
c---------------------------------------------------------------------
!DVM$ INHERIT z
c---------------------------------------------------------------------
implicit none
integer n1, n2, n3
double precision z(n1,n2,n3)
integer i1, i2, i3
!!DVM$ INTERVAL 3
!DVM$ REGION
!DVM$ PARALLEL (i3,i2,i1) ON z(i1,i2,i3), PRIVATE(i3,i2,i1)
!DVM$&, cuda_block(32,6)
do i3=1,n3
do i2=1,n2
do i1=1,n1
z(i1,i2,i3)=0.0D0
enddo
enddo
enddo
!DVM$ END REGION
!!DVM$ END INTERVAL
return
end
c----- end of program ------------------------------------------------
c -- DEBUG --------------------------------------------------------
subroutine printMatrix(a,n1,n2,n3)
c---------------------------------------------------------------------
!DVM$ INHERIT a
integer n1,n2,n3,i1,i2,i3
double precision a(n1,n2,n3), z(n2)
integer m1, m2, m3
write(*,*) 'MATRIX ------------------'
write(*,*) a
write(*,*) ' '
! m1 = min(n1,18)
! m2 = min(n2,14)
! m3 = min(n3,18)
! write(*,*)' '
! do i3=1,m3
! do i1=1,m1
! do i2=1,m2
! z(i2) = a(i1,i2,i3)
! enddo
! write(*,6)(z(i2),i2=1,m2)
! enddo
! write(*,*)' - - - - - - - '
! enddo
! write(*,*)' '
! 6 format(15f6.3)
return
end
subroutine printMatrixNN(a,n1,n2,n3)
c---------------------------------------------------------------------
!DVM$ INHERIT a
integer n1, n2, n3
double precision a(n1,n2,n3), z
do i3=1,n3
do i2=1,n2
do i1=1,n1
z = a(i1,i2,i3)
if (z.ne.0) then
write(*,*) '(',i1,',',i2,',',i3,')=',z
endif
enddo
enddo
enddo
end
c -- EXTERNAL --------------------------------------------------------
subroutine timer_clear(n)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
implicit none
integer n
double precision start(64), elapsed(64)
common /tt/ start, elapsed
elapsed(n) = 0.0
return
end
subroutine timer_start(n)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
implicit none
external elapsed_time
double precision elapsed_time
integer n
double precision start(64), elapsed(64)
common /tt/ start, elapsed
start(n) = elapsed_time()
return
end
subroutine timer_stop(n)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
implicit none
external elapsed_time
double precision elapsed_time
integer n
double precision start(64), elapsed(64)
common /tt/ start, elapsed
double precision t, now
now = elapsed_time()
t = now - start(n)
elapsed(n) = elapsed(n) + t
return
end
double precision function timer_read(n)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
implicit none
integer n
double precision start(64), elapsed(64)
common /tt/ start, elapsed
timer_read = elapsed(n)
return
end
double precision function elapsed_time()
c---------------------------------------------------------------------
c---------------------------------------------------------------------
implicit none
double precision t
double precision dvtime
data t/0.d0/
c This function must measure wall clock time, not CPU time.
c Since there is no portable timer in Fortran (77)
c we call a routine compiled in C (though the C source may have
c to be tweaked).
t = dvtime()
c The following is not ok for "official" results because it reports
c CPU time not wall clock time. It may be useful for developing/testing
c on timeshared Crays, though.
c call second(t)
elapsed_time = t
return
end
subroutine print_results(name, class, n1, n2, n3, niter,
> t, mops, optype, verified, npbversion)
c ,compiletime, cs1, cs2, cs3, cs4, cs5, cs6, cs7)
implicit none
character*2 name
character*1 class
integer n1, n2, n3, niter, j
double precision t, mops
character optype*24, size*13
logical verified
character*5 npbversion
c > , compiletime, cs1, cs2, cs3, cs4, cs5, cs6, cs7
write (*, 2) name
2 format(//, ' ', A2, ' Benchmark Completed.')
write (*, 3) Class
3 format(' Class = ', 12x, a12)
c If this is not a grid-based problem (EP, FT, CG), then
c we only print n1, which contains some measure of the
c problem size. In that case, n2 and n3 are both zero.
c Otherwise, we print the grid size n1xn2xn3
if ((n2 .eq. 0) .and. (n3 .eq. 0)) then
if (name(1:2) .eq. 'EP') then
write(size, '(f12.0)' ) 2.d0**n1
do j =13,1,-1
if (size(j:j) .eq. '.') size(j:j) = ' '
end do
write (*,42) size
42 format(' Size = ',12x, a14)
else
write (*,44) n1
44 format(' Size = ',12x, i12)
endif
else
write (*, 4) n1,n2,n3
4 format(' Size = ',12x, i3,'x',i3,'x',i3)
endif
write (*, 5) niter
5 format(' Iterations = ', 12x, i12)
write (*, 6) t
6 format(' Time in seconds = ',12x, f12.2)
write (*,9) mops
9 format(' Mop/s total = ',12x, f12.2)
write(*, 11) optype
11 format(' Operation type = ', a24)
if (verified) then
write(*,12) ' SUCCESSFUL'
else
write(*,12) 'UNSUCCESSFUL'
endif
12 format(' Verification = ', 12x, a)
write(*,13) npbversion
13 format(' Version = ', 12x, a12)
c write(*,14) compiletime
c 14 format(' Compile date = ', 12x, a12)
c write (*,121) cs1
c 121 format(/, ' Compile options:', /,
c > ' F77 = ', A)
c write (*,122) cs2
c 122 format(' FLINK = ', A)
c write (*,123) cs3
c 123 format(' F_LIB = ', A)
c
c write (*,124) cs4
c 124 format(' F_INC = ', A)
c
c write (*,125) cs5
c 125 format(' FFLAGS = ', A)
c
c write (*,126) cs6
c 126 format(' FLINKFLAGS = ', A)
c
c write(*, 127) cs7
c 127 format(' RAND = ', A)
write (*,130)
130 format(//' Please send the results of this run to:'//
> ' NPB Development Team '/
> ' Internet: npb@nas.nasa.gov'/
> ' '/
> ' If email is not available, send this to:'//
> ' MS T27A-1'/
> ' NASA Ames Research Center'/
> ' Moffett Field, CA 94035-1000'//
> ' Fax: 415-604-3957'//)
return
end
double precision function randlc (x, a)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c
c This routine returns a uniform pseudorandom double precision number in the
c range (0, 1) by using the linear congruential generator
c
c x_{k+1} = a x_k (mod 2^46)
c
c where 0 < x_k < 2^46 and 0 < a < 2^46. This scheme generates 2^44 numbers
c before repeating. The argument A is the same as 'a' in the above formula,
c and X is the same as x_0. A and X must be odd double precision integers
c in the range (1, 2^46). The returned value RANDLC is normalized to be
c between 0 and 1, i.e. RANDLC = 2^(-46) * x_1. X is updated to contain
c the new seed x_1, so that subsequent calls to RANDLC using the same
c arguments will generate a continuous sequence.
c
c This routine should produce the same results on any computer with at least
c 48 mantissa bits in double precision floating point data. On 64 bit
c systems, double precision should be disabled.
c
c David H. Bailey October 26, 1990
c
c---------------------------------------------------------------------
implicit none
double precision r23,r46,t23,t46,a,x,t1,t2,t3,t4,a1,a2,x1,x2,z
parameter (r23 = 0.5d0 ** 23, r46 = r23 ** 2, t23 = 2.d0 ** 23,
> t46 = t23 ** 2)
c---------------------------------------------------------------------
c Break A into two parts such that A = 2^23 * A1 + A2.
c---------------------------------------------------------------------
t1 = r23 * a
a1 = int (t1)
a2 = a - t23 * a1
c---------------------------------------------------------------------
c Break X into two parts such that X = 2^23 * X1 + X2, compute
c Z = A1 * X2 + A2 * X1 (mod 2^23), and then
c X = 2^23 * Z + A2 * X2 (mod 2^46).
c---------------------------------------------------------------------
t1 = r23 * x
x1 = int (t1)
x2 = x - t23 * x1
t1 = a1 * x2 + a2 * x1
t2 = int (r23 * t1)
z = t1 - t23 * t2
t3 = t23 * z + a2 * x2
t4 = int (r46 * t3)
x = t3 - t46 * t4
randlc = r46 * x
return
end
subroutine vranlc (n, x, a, y)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c
c This routine generates N uniform pseudorandom double precision numbers in
c the range (0, 1) by using the linear congruential generator
c
c x_{k+1} = a x_k (mod 2^46)
c
c where 0 < x_k < 2^46 and 0 < a < 2^46. This scheme generates 2^44 numbers
c before repeating. The argument A is the same as 'a' in the above formula,
c and X is the same as x_0. A and X must be odd double precision integers
c in the range (1, 2^46). The N results are placed in Y and are normalized
c to be between 0 and 1. X is updated to contain the new seed, so that
c subsequent calls to VRANLC using the same arguments will generate a
c continuous sequence. If N is zero, only initialization is performed, and
c the variables X, A and Y are ignored.
c
c This routine is the standard version designed for scalar or RISC systems.
c However, it should produce the same results on any single processor
c computer with at least 48 mantissa bits in double precision floating point
c data. On 64 bit systems, double precision should be disabled.
c
c---------------------------------------------------------------------
implicit none
integer i,n
double precision y,r23,r46,t23,t46,a,x,t1,t2,t3,t4,a1,a2,x1,x2,z
dimension y(*)
parameter (r23 = 0.5d0 ** 23, r46 = r23 ** 2, t23 = 2.d0 ** 23,
> t46 = t23 ** 2)
c---------------------------------------------------------------------
c Break A into two parts such that A = 2^23 * A1 + A2.
c---------------------------------------------------------------------
t1 = r23 * a
a1 = int (t1)
a2 = a - t23 * a1
c---------------------------------------------------------------------
c Generate N results. This loop is not vectorizable.
c---------------------------------------------------------------------
do i = 1, n
c---------------------------------------------------------------------
c Break X into two parts such that X = 2^23 * X1 + X2, compute
c Z = A1 * X2 + A2 * X1 (mod 2^23), and then
c X = 2^23 * Z + A2 * X2 (mod 2^46).
c---------------------------------------------------------------------
t1 = r23 * x
x1 = int (t1)
x2 = x - t23 * x1
t1 = a1 * x2 + a2 * x1
t2 = int (r23 * t1)
z = t1 - t23 * t2
t3 = t23 * z + a2 * x2
t4 = int (r46 * t3)
x = t3 - t46 * t4
y(i) = r46 * x
enddo
return
end
c---------------------------------------------------------------------

View File

@@ -0,0 +1,431 @@
c---------------------------------------------------------------------
c <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD>-<2D><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
c---------------------------------------------------------------------
c @param double precission :: z(n1 ,n2 ,n3) - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
c @param integer :: k - <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD> <20><><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD><EFBFBD>
c---------------------------------------------------------------------
subroutine zran3(z,n1,n2,n3,nx,ny,k,class)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c zran3 loads +1 at ten randomly chosen points,
c loads -1 at a different ten random points,
c and zero elsewhere.
c---------------------------------------------------------------------
implicit none
include 'npbparams.h'
!DVM$ INHERIT z
!DVM$ DYNAMIC z
integer is1, is2, is3, ie1, ie2, ie3,i3b,i3e,i2b,i1b
common /grid/ is1,is2,is3,ie1,ie2,ie3
integer n1, n2, n3, k, nx, ny, ierr, i0, m0, m1
double precision z(n1,n2,n3),zz(2+2**ndim1)
character*1 class
integer mm, i1, i2, i3, d1, e1, e2, e3,ii2,ii1
double precision x, a, max_val, min_val
double precision xx, x0, x1, a1, a2, ai
parameter( mm = 10, a = 5.D0 ** 13, x = 314159265.D0)
double precision ten( mm, 0:1 ), temp, best
integer i, j1( mm, 0:1 ), j2( mm, 0:1 ), j3( mm, 0:1 )
integer jg( 0:3, mm, 0:1 ), jg_temp(4)
integer id1,id2,id3,idx1,idx2,idx3,nj,x22
call zero3(z,n1,n2,n3)
if(Class .eq. 'S') then
j1( 10,1)= 9
j1( 10,0)= 2
j2( 10,1)= 3
j2( 10,0)= 13
j3( 10,1)= 22
j3( 10,0)= 4
ten( 10,1)= 0.9999958165E+00
ten( 10,0)= 0.1621806298E-04
j1( 9,1)= 21
j1( 9,0)= 15
j2( 9,1)= 31
j2( 9,0)= 10
j3( 9,1)= 33
j3( 9,0)= 19
ten( 9,1)= 0.9999389618E+00
ten( 9,0)= 0.7495597642E-04
j1( 8,1)= 4
j1( 8,0)= 7
j2( 8,1)= 2
j2( 8,0)= 16
j3( 8,1)= 5
j3( 8,0)= 2
ten( 8,1)= 0.9999174510E+00
ten( 8,0)= 0.1889568795E-03
j1( 7,1)= 6
j1( 7,0)= 6
j2( 7,1)= 24
j2( 7,0)= 30
j3( 7,1)= 5
j3( 7,0)= 17
ten( 7,1)= 0.9998666211E+00
ten( 7,0)= 0.1958622020E-03
j1( 6,1)= 3
j1( 6,0)= 14
j2( 6,1)= 18
j2( 6,0)= 4
j3( 6,1)= 23
j3( 6,0)= 3
ten( 6,1)= 0.9998273669E+00
ten( 6,0)= 0.2522906835E-03
j1( 5,1)= 23
j1( 5,0)= 7
j2( 5,1)= 33
j2( 5,0)= 19
j3( 5,1)= 8
j3( 5,0)= 10
ten( 5,1)= 0.9997817402E+00
ten( 5,0)= 0.2966875037E-03
j1( 4,1)= 14
j1( 4,0)= 22
j2( 4,1)= 17
j2( 4,0)= 21
j3( 4,1)= 14
j3( 4,0)= 13
ten( 4,1)= 0.9997789044E+00
ten( 4,0)= 0.3082809722E-03
j1( 3,1)= 32
j1( 3,0)= 28
j2( 3,1)= 6
j2( 3,0)= 17
j3( 3,1)= 27
j3( 3,0)= 33
ten( 3,1)= 0.9997405518E+00
ten( 3,0)= 0.3944731504E-03
j1( 2,1)= 30
j1( 2,0)= 10
j2( 2,1)= 2
j2( 2,0)= 27
j3( 2,1)= 30
j3( 2,0)= 24
ten( 2,1)= 0.9997394292E+00
ten( 2,0)= 0.4423527428E-03
j1( 1,1)= 19
j1( 1,0)= 9
j2( 1,1)= 28
j2( 1,0)= 16
j3( 1,1)= 19
j3( 1,0)= 28
ten( 1,1)= 0.9996874580E+00
ten( 1,0)= 0.4726676489E-03
else if(class .eq. 'W') then
j1( 10,1)= 115
j1( 10,0)= 17
j2( 10,1)= 87
j2( 10,0)= 24
j3( 10,1)= 52
j3( 10,0)= 125
ten( 10,1)= 0.9999999670E+00
ten( 10,0)= 0.4097578454E-06
j1( 9,1)= 129
j1( 9,0)= 104
j2( 9,1)= 47
j2( 9,0)= 119
j3( 9,1)= 34
j3( 9,0)= 61
ten( 9,1)= 0.9999996061E+00
ten( 9,0)= 0.9408003763E-06
j1( 8,1)= 16
j1( 8,0)= 16
j2( 8,1)= 112
j2( 8,0)= 123
j3( 8,1)= 120
j3( 8,0)= 77
ten( 8,1)= 0.9999987379E+00
ten( 8,0)= 0.1220169409E-05
j1( 7,1)= 36
j1( 7,0)= 111
j2( 7,1)= 23
j2( 7,0)= 89
j3( 7,1)= 102
j3( 7,0)= 123
ten( 7,1)= 0.9999973226E+00
ten( 7,0)= 0.1432884929E-05
j1( 6,1)= 31
j1( 6,0)= 11
j2( 6,1)= 19
j2( 6,0)= 3
j3( 6,1)= 111
j3( 6,0)= 23
ten( 6,1)= 0.9999970764E+00
ten( 6,0)= 0.1917141063E-05
j1( 5,1)= 29
j1( 5,0)= 97
j2( 5,1)= 50
j2( 5,0)= 36
j3( 5,1)= 13
j3( 5,0)= 56
ten( 5,1)= 0.9999968171E+00
ten( 5,0)= 0.2780729588E-05
j1( 4,1)= 82
j1( 4,0)= 40
j2( 4,1)= 92
j2( 4,0)= 128
j3( 4,1)= 22
j3( 4,0)= 14
ten( 4,1)= 0.9999964096E+00
ten( 4,0)= 0.3077687282E-05
j1( 3,1)= 28
j1( 3,0)= 94
j2( 3,1)= 86
j2( 3,0)= 85
j3( 3,1)= 75
j3( 3,0)= 37
ten( 3,1)= 0.9999960890E+00
ten( 3,0)= 0.3419091698E-05
j1( 2,1)= 41
j1( 2,0)= 72
j2( 2,1)= 34
j2( 2,0)= 4
j3( 2,1)= 3
j3( 2,0)= 66
ten( 2,1)= 0.9999958165E+00
ten( 2,0)= 0.3899679498E-05
j1( 1,1)= 117
j1( 1,0)= 116
j2( 1,1)= 88
j2( 1,0)= 105
j3( 1,1)= 22
j3( 1,0)= 7
ten( 1,1)= 0.9999953932E+00
ten( 1,0)= 0.4564590384E-05
else if(class .eq. 'A' .or. class .eq. 'B') then
j1( 10,1)= 54
j1( 10,0)= 223
j2( 10,1)= 209
j2( 10,0)= 42
j3( 10,1)= 40
j3( 10,0)= 240
ten( 10,1)= 0.9999999811E+00
ten( 10,0)= 0.1058528198E-07
j1( 9,1)= 243
j1( 9,0)= 154
j2( 9,1)= 172
j2( 9,0)= 162
j3( 9,1)= 14
j3( 9,0)= 36
ten( 9,1)= 0.9999999670E+00
ten( 9,0)= 0.6491002580E-07
j1( 8,1)= 203
j1( 8,0)= 82
j2( 8,1)= 18
j2( 8,0)= 184
j3( 8,1)= 198
j3( 8,0)= 255
ten( 8,1)= 0.9999999092E+00
ten( 8,0)= 0.1261776816E-06
j1( 7,1)= 202
j1( 7,0)= 250
j2( 7,1)= 83
j2( 7,0)= 170
j3( 7,1)= 209
j3( 7,0)= 157
ten( 7,1)= 0.9999999006E+00
ten( 7,0)= 0.2087648028E-06
j1( 6,1)= 115
j1( 6,0)= 199
j2( 6,1)= 123
j2( 6,0)= 7
j3( 6,1)= 207
j3( 6,0)= 203
ten( 6,1)= 0.9999998605E+00
ten( 6,0)= 0.3218575699E-06
j1( 5,1)= 212
j1( 5,0)= 92
j2( 5,1)= 7
j2( 5,0)= 63
j3( 5,1)= 248
j3( 5,0)= 205
ten( 5,1)= 0.9999998070E+00
ten( 5,0)= 0.3231413785E-06
j1( 4,1)= 45
j1( 4,0)= 17
j2( 4,1)= 194
j2( 4,0)= 205
j3( 4,1)= 234
j3( 4,0)= 32
ten( 4,1)= 0.9999997641E+00
ten( 4,0)= 0.4097578454E-06
j1( 3,1)= 176
j1( 3,0)= 101
j2( 3,1)= 246
j2( 3,0)= 156
j3( 3,1)= 164
j3( 3,0)= 59
ten( 3,1)= 0.9999997464E+00
ten( 3,0)= 0.4272763050E-06
j1( 2,1)= 5
j1( 2,0)= 102
j2( 2,1)= 118
j2( 2,0)= 138
j3( 2,1)= 175
j3( 2,0)= 112
ten( 2,1)= 0.9999997340E+00
ten( 2,0)= 0.4331109977E-06
j1( 1,1)= 57
j1( 1,0)= 211
j2( 1,1)= 120
j2( 1,0)= 154
j3( 1,1)= 167
j3( 1,0)= 98
ten( 1,1)= 0.9999996868E+00
ten( 1,0)= 0.4353645551E-06
else if(class .eq. 'C') then
j1( 10,1)= 310
j1( 10,0)= 399
j2( 10,1)= 361
j2( 10,0)= 312
j3( 10,1)= 11
j3( 10,0)= 200
ten( 10,1)= 0.9999999811E+00
ten( 10,0)= 0.6358860105E-08
j1( 9,1)= 11
j1( 9,0)= 96
j2( 9,1)= 493
j2( 9,0)= 401
j3( 9,1)= 118
j3( 9,0)= 238
ten( 9,1)= 0.9999999808E+00
ten( 9,0)= 0.7946667324E-08
j1( 8,1)= 451
j1( 8,0)= 223
j2( 8,1)= 270
j2( 8,0)= 278
j3( 8,1)= 443
j3( 8,0)= 61
ten( 8,1)= 0.9999999778E+00
ten( 8,0)= 0.1058528198E-07
j1( 7,1)= 149
j1( 7,0)= 344
j2( 7,1)= 117
j2( 7,0)= 139
j3( 7,1)= 199
j3( 7,0)= 168
ten( 7,1)= 0.9999999700E+00
ten( 7,0)= 0.2456904724E-07
j1( 6,1)= 243
j1( 6,0)= 383
j2( 6,1)= 87
j2( 6,0)= 74
j3( 6,1)= 5
j3( 6,0)= 283
ten( 6,1)= 0.9999999670E+00
ten( 6,0)= 0.2954460854E-07
j1( 5,1)= 509
j1( 5,0)= 352
j2( 5,1)= 43
j2( 5,0)= 194
j3( 5,1)= 127
j3( 5,0)= 418
ten( 5,1)= 0.9999999666E+00
ten( 5,0)= 0.4643648310E-07
j1( 4,1)= 163
j1( 4,0)= 18
j2( 4,1)= 280
j2( 4,0)= 21
j3( 4,1)= 75
j3( 4,0)= 457
ten( 4,1)= 0.9999999358E+00
ten( 4,0)= 0.4987107616E-07
j1( 3,1)= 146
j1( 3,0)= 154
j2( 3,1)= 93
j2( 3,0)= 338
j3( 3,1)= 312
j3( 3,0)= 10
ten( 3,1)= 0.9999999149E+00
ten( 3,0)= 0.6491002580E-07
j1( 2,1)= 203
j1( 2,0)= 402
j2( 2,1)= 10
j2( 2,0)= 504
j3( 2,1)= 51
j3( 2,0)= 449
ten( 2,1)= 0.9999999092E+00
ten( 2,0)= 0.6990178747E-07
j1( 1,1)= 151
j1( 1,0)= 74
j2( 1,1)= 401
j2( 1,0)= 2
j3( 1,1)= 331
j3( 1,0)= 107
ten( 1,1)= 0.9999999069E+00
ten( 1,0)= 0.8774652827E-07
endif
i1 = mm
i0 = mm
do i=mm,1,-1
best = 0.d0
if(best .lt. ten( i1, 1 ))then
jg( 0, i, 1) = 0
jg( 1, i, 1) = is1 - 2 + j1( i1, 1 )
jg( 2, i, 1) = is2 - 2 + j2( i1, 1 )
jg( 3, i, 1) = is3 - 2 + j3( i1, 1 )
i1 = i1-1
else
jg( 0, i, 1) = 0
jg( 1, i, 1) = 0
jg( 2, i, 1) = 0
jg( 3, i, 1) = 0
endif
best = 1.d0
if(best .gt. ten( i0, 0 ))then
jg( 0, i, 0) = 0
jg( 1, i, 0) = is1 - 2 + j1( i0, 0 )
jg( 2, i, 0) = is2 - 2 + j2( i0, 0 )
jg( 3, i, 0) = is3 - 2 + j3( i0, 0 )
i0 = i0-1
else
jg( 0, i, 0) = 0
jg( 1, i, 0) = 0
jg( 2, i, 0) = 0
jg( 3, i, 0) = 0
endif
enddo
!DVM$ region
!DVM$ PARALLEL (i3,i2,i1) ON z(i1,i2,i3), private(i),cuda_block(32,6)
do i3=1,n3
do i2=1,n2
do i1=1,n1
z(i1,i2,i3) = 0.0D0
do i = mm,1,-1
if(i1 .eq. jg(1,i,0) .and. i2 .eq. jg(2,i,0)
& .and. i3 .eq. jg(3,i,0)) then
z(i1,i2,i3) = -1.0D0
endif
if(i1 .eq. jg(1,i,1) .and. i2 .eq. jg(2,i,1)
& .and. i3 .eq. jg(3,i,1)) then
z(i1,i2,i3) = 1.0D0
endif
enddo
enddo
enddo
enddo
!DVM$ end region
call comm3(z,n1,n2,n3,k)
c---------------------------------------------------------------------
c call showall(z,n1,n2,n3)
c---------------------------------------------------------------------
return
end

View File

@@ -0,0 +1,4 @@
integer dvm_debug
C dvm_debug = 0 - standard mode, dvm_debug > 0 - debugging mode
parameter (dvm_debug=0)

View File

@@ -0,0 +1,21 @@
c---------------------------------------------------------------------
c FDVM specifications
c---------------------------------------------------------------------
!! integer pv,pv1,pu1,pr1,pu(maxlevel),pr(maxlevel),pus,pus1
integer psize(3),pdim
common /pointers/ pv,pu1,pr1,pu,pr,pv1,pus,pus1
common/processors/ psize,pdim
DOUBLE PRECISION,POINTER,DIMENSION(:,:,:) ::
& pv,pu1,pr1,pv1,pus,pus1
!! DOUBLE PRECISION,POINTER,DIMENSION(:,:,:) ::
!! & pu(maxlevel),pr(maxlevel)
CDVM$ TEMPLATE tmp (1+2**(lt+1),1+2**(lt+1),1+2**(lt+1))
CDVM$ DISTRIBUTE tmp (*,*,BLOCK)
CDVM$ ALIGN :: pr1,pv,pu1,pus,pus1
CDVM$ DYNAMIC pv,tmp,pus,pus1,pu1,pr1
TYPE P
DOUBLE PRECISION,POINTER,DIMENSION(:,:,:) :: p
CDVM$ ALIGN :: p
CDVM$ DYNAMIC p
END TYPE
TYPE(P) pu(maxlevel),pr(maxlevel)

View File

@@ -0,0 +1,52 @@
c---------------------------------------------------------------------
c Parameter lm (declared and set in "npbparams.h") is the log-base2 of
c the edge size max for the partition on a given node, so must be changed
c either to save space (if running a small case) or made bigger for larger
c cases, for example, 512^3. Thus lm=7 means that the largest dimension
c of a partition that can be solved on a node is 2^7 = 128. lm is set
c automatically in npbparams.h
c Parameters ndim1, ndim2, ndim3 are the local problem dimensions.
c---------------------------------------------------------------------
include 'npbparams.h'
! nm - actual dimension including ghost cells for communications
! nv - size of rhs array
! nr - size of residual array
! nm2 - size of communication buffer
! maxlevel- maximum number of levels
integer nm
> , nv
> , nr
> , nm2
> , maxlevel
parameter( nm=2+2**lm, nv=(2+2**ndim1)*(2+2**ndim2)*(2+2**ndim3) )
parameter( nm2=2*nm*nm, maxlevel=11 )
parameter( nr = (8*(nv+nm**2+5*nm+7*lm))/7 )
c---------------------------------------------------------------------
integer nx(maxlevel),ny(maxlevel),nz(maxlevel)
common /mg3/ nx,ny,nz
character class
common /ClassType/class
integer debug_vec(0:7)
common /my_debug/ debug_vec
integer ir(maxlevel), m1(maxlevel), m2(maxlevel), m3(maxlevel)
integer lt, lb, mi(3,maxlevel),nreq,lbdvm
common /fap/ ir,m1,m2,m3,lt,lb,mi,nreq,lbdvm
logical proc1
parameter (proc1 = .TRUE.)
c---------------------------------------------------------------------
c Set at m=1024, can handle cases up to 1024^3 case
c---------------------------------------------------------------------
integer m
parameter( m=1037 )
double precision buff(nm2,4)
common /buffer/ buff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,44 @@
SHELL=/bin/sh
BENCHMARK=sp
BENCHMARKU=SP
include ../config/make.def
include ../sys/make.common
SOURCES = sp.for \
set_constants.for \
initialize.for \
exact_rhs.for \
compute_rhs.for \
verify.for \
compute_errors.for \
timers.for \
print_result.for
SOURCES_SINGLE = z_solve.for x_solve.for y_solve.for
SOURCES_MPI = x_solve_mpi.for y_solve_mpi.for z_solve_mpi.for
OBJS = ${SOURCES:.for=.o}
OBJS_SINGLE = ${SOURCES_SINGLE:.for=.o}
OBJS_MPI = ${SOURCES_MPI:.for=.o}
${PROGRAM}: config
@if [ $(VERSION) = MPI ] ; then \
${MAKE} MPI_VER; \
else \
${MAKE} SINGLE_VER; \
fi
MPI_VER: $(OBJS) $(OBJS_MPI)
${FLINK} -o ${PROGRAM} ${OBJS} $(OBJS_MPI)
SINGLE_VER: $(OBJS) $(OBJS_SINGLE)
${FLINK} -o ${PROGRAM} ${OBJS} $(OBJS_SINGLE)
%.o: %.for npbparams.h header.h
${F77} ${FFLAGS} -c -o $@ $<
clean:
rm -f npbparams.h
rm -f *.o *~
rm -f *.cu *.cuf *.c *.f

View File

@@ -0,0 +1,12 @@
@echo off
set CLASS=%1
set OPT=%2
CALL ..\sys\setparams SP %CLASS%
CALL %F77% %OPT% sp 1>out_%CLASS%.txt 2>err_%CLASS%.txt
if exist sp.exe (
copy sp.exe %BIN%\sp.%CLASS%.x.exe
del sp.exe
)

View File

@@ -0,0 +1,116 @@
c---------------------------------------------------------------------
c---------------------------------------------------------------------
subroutine error_norm(rms)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c this function computes the norm of the difference between the
c computed solution and the exact solution
c---------------------------------------------------------------------
include 'header.h'
integer i, j, k, m, d
double precision xi, eta, zeta, u_exact(5), rms(5), add
double precision r1,r2,r3,r4,r5
do m = 1, 5
rms(m) = 0.0d0
enddo
r1 = 0.0d0
r2 = 0.0d0
r3 = 0.0d0
r4 = 0.0d0
r5 = 0.0d0
!DVM$ region
!DVM$ parallel (k,j,i) on u(*,i,j,k),private(zeta,eta,xi,add,u_exact,m)
!DVM$& ,reduction(SUM(r1),SUM(r2),SUM(r3),SUM(r4),SUM(r5))
! DVM$& ,shadow_renew(u, rhs)
do k = 0, problem_size-1
do j = 0, problem_size-1
do i = 0, problem_size-1
zeta = dble(k) * dnzm1
eta = dble(j) * dnym1
xi = dble(i) * dnxm1
do m = 1, 5
u_exact(m) = ce(m,1) +
> xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) + xi*ce(m,11)))) +
> eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9) + eta*ce(m,12))))+
> zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) +
> zeta*ce(m,13))))
end do
add = u(1,i,j,k)-u_exact(1)
r1 = r1 + add*add
add = u(2,i,j,k)-u_exact(2)
r2 = r2 + add*add
add = u(3,i,j,k)-u_exact(3)
r3 = r3 + add*add
add = u(4,i,j,k)-u_exact(4)
r4 = r4 + add*add
add = u(5,i,j,k)-u_exact(5)
r5 = r5 + add*add
end do
end do
end do
!DVM$ end region
rms(1) = r1
rms(2) = r2
rms(3) = r3
rms(4) = r4
rms(5) = r5
do m = 1, 5
do d = 1, 3
rms(m) = rms(m) / dble(grid_points(d)-2)
end do
rms(m) = dsqrt(rms(m))
end do
return
end
subroutine rhs_norm(rms)
include 'header.h'
integer i, j, k, d, m
double precision rms(5), add
do m = 1, 5
rms(m) = 0.0d0
enddo
!DVM$ region
!DVM$ parallel (k,j,i) on u(*,i,j,k),private(add)
!DVM$& ,reduction(SUM(rms))
do k = 1, nz2
do j = 1, ny2
do i = 1, nx2
add = rhs(1,i,j,k)
rms(1) = rms(1) + add*add
add = rhs(2,i,j,k)
rms(2) = rms(2) + add*add
add = rhs(3,i,j,k)
rms(3) = rms(3) + add*add
add = rhs(4,i,j,k)
rms(4) = rms(4) + add*add
add = rhs(5,i,j,k)
rms(5) = rms(5) + add*add
end do
end do
end do
!DVM$ end region
do m = 1, 5
do d = 1, 3
rms(m) = rms(m) / dble(grid_points(d)-2)
end do
rms(m) = dsqrt(rms(m))
end do
return
end

View File

@@ -0,0 +1,339 @@
c---------------------------------------------------------------------
c---------------------------------------------------------------------
subroutine compute_rhs(aditional_comp)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
include 'header.h'
integer i, j, k, m
double precision aux, rho_inv, uijk, up1, um1, vijk, vp1, vm1,
> wijk, wp1, wm1,rhs_(5)
double precision t1, t2, t3, ac, ru1, uu, vv, ww,ac2inv
integer aditional_comp
if (timeron) call timer_start(t_rhs)
!DVM$ region out(us,vs,ws,qs,rho_i,speed,square)
!DVM$ parallel (k,j,i) on u(*,i,j,k),private(rho_inv,aux,m)
!DVM$& ,shadow_renew(u(0:0,2:3,2:3,2:3)),SHADOW_COMPUTE
do k = 0, problem_size-1
do j = 0, problem_size-1
do i = 0, problem_size-1
rho_inv = 1.0d0/u(1,i,j,k)
rho_i(i,j,k) = rho_inv
us(i,j,k) = u(2,i,j,k) * rho_inv
vs(i,j,k) = u(3,i,j,k) * rho_inv
ws(i,j,k) = u(4,i,j,k) * rho_inv
square(i,j,k) = 0.5d0* (
> u(2,i,j,k)*u(2,i,j,k) +
> u(3,i,j,k)*u(3,i,j,k) +
> u(4,i,j,k)*u(4,i,j,k) ) * rho_inv
qs(i,j,k) = square(i,j,k) * rho_inv
c---------------------------------------------------------------------
c (don't need speed and ainx until the lhs computation)
c---------------------------------------------------------------------
aux = c1c2*rho_inv* (u(5,i,j,k) - square(i,j,k))
speed(i,j,k) = dsqrt(aux)
do m = 1, 5
rhs(m,i,j,k) = forcing(m,i,j,k)
end do
end do
end do
end do
!DVM$ parallel (k,j,i) on rhs(*,i,j,k),private(uijk,up1,um1,m
!DVM$& ,vijk,vp1,vm1,wijk,wp1,wm1,rhs_,
!DVM$& t1, t2, t3, ac, ru1, uu, vv, ww,ac2inv), CUDA_BLOCK(32,4)
do k = 1, nz2
do j = 1, ny2
do i = 1, nx2
uijk = us(i,j,k)
up1 = us(i+1,j,k)
um1 = us(i-1,j,k)
rhs_(1) = rhs(1,i,j,k)
rhs_(2) = rhs(2,i,j,k)
rhs_(3) = rhs(3,i,j,k)
rhs_(4) = rhs(4,i,j,k)
rhs_(5) = rhs(5,i,j,k)
rhs_(1) = rhs_(1) + dx1tx1 *
> (u(1,i+1,j,k) - 2.0d0*u(1,i,j,k) +
> u(1,i-1,j,k)) -
> tx2 * (u(2,i+1,j,k) - u(2,i-1,j,k))
rhs_(2) = rhs_(2) + dx2tx1 *
> (u(2,i+1,j,k) - 2.0d0*u(2,i,j,k) +
> u(2,i-1,j,k)) +
> xxcon2*con43 * (up1 - 2.0d0*uijk + um1) -
> tx2 * (u(2,i+1,j,k)*up1 -
> u(2,i-1,j,k)*um1 +
> (u(5,i+1,j,k)- square(i+1,j,k)-
> u(5,i-1,j,k)+ square(i-1,j,k))*
> c2)
rhs_(3) = rhs_(3) + dx3tx1 *
> (u(3,i+1,j,k) - 2.0d0*u(3,i,j,k) +
> u(3,i-1,j,k)) +
> xxcon2 * (vs(i+1,j,k) - 2.0d0*vs(i,j,k) +
> vs(i-1,j,k)) -
> tx2 * (u(3,i+1,j,k)*up1 -
> u(3,i-1,j,k)*um1)
rhs_(4) = rhs_(4) + dx4tx1 *
> (u(4,i+1,j,k) - 2.0d0*u(4,i,j,k) +
> u(4,i-1,j,k)) +
> xxcon2 * (ws(i+1,j,k) - 2.0d0*ws(i,j,k) +
> ws(i-1,j,k)) -
> tx2 * (u(4,i+1,j,k)*up1 -
> u(4,i-1,j,k)*um1)
rhs_(5) = rhs_(5) + dx5tx1 *
> (u(5,i+1,j,k) - 2.0d0*u(5,i,j,k) +
> u(5,i-1,j,k)) +
> xxcon3 * (qs(i+1,j,k) - 2.0d0*qs(i,j,k) +
> qs(i-1,j,k)) +
> xxcon4 * (up1*up1 - 2.0d0*uijk*uijk +
> um1*um1) +
> xxcon5 * (u(5,i+1,j,k)*rho_i(i+1,j,k) -
> 2.0d0*u(5,i,j,k)*rho_i(i,j,k) +
> u(5,i-1,j,k)*rho_i(i-1,j,k)) -
> tx2 * ( (c1*u(5,i+1,j,k) -
> c2*square(i+1,j,k))*up1 -
> (c1*u(5,i-1,j,k) -
> c2*square(i-1,j,k))*um1 )
if(i .eq. 1) then
do m = 1, 5
rhs_(m) = rhs_(m)- dssp *
> ( 5.0d0*u(m,i,j,k) - 4.0d0*u(m,i+1,j,k) +
> u(m,i+2,j,k))
end do
elseif(i .eq. 2) then
do m = 1, 5
rhs_(m) = rhs_(m) - dssp *
> (-4.0d0*u(m,i-1,j,k) + 6.0d0*u(m,i,j,k) -
> 4.0d0*u(m,i+1,j,k) + u(m,i+2,j,k))
end do
elseif(i .ge. 3 .and. i .le. nx2-2) then
do m = 1, 5
rhs_(m) = rhs_(m) - dssp *
> ( u(m,i-2,j,k) - 4.0d0*u(m,i-1,j,k) +
> 6.0*u(m,i,j,k) - 4.0d0*u(m,i+1,j,k) +
> u(m,i+2,j,k) )
end do
elseif(i .eq. nx2-1) then
do m = 1, 5
rhs_(m) = rhs_(m) - dssp *
> ( u(m,i-2,j,k) - 4.0d0*u(m,i-1,j,k) +
> 6.0d0*u(m,i,j,k) - 4.0d0*u(m,i+1,j,k) )
end do
elseif( i .eq. nx2) then
do m = 1, 5
rhs_(m) = rhs_(m) - dssp *
> ( u(m,i-2,j,k) - 4.d0*u(m,i-1,j,k) +
> 5.d0*u(m,i,j,k) )
end do
endif
vijk = vs(i,j,k)
vp1 = vs(i,j+1,k)
vm1 = vs(i,j-1,k)
rhs_(1) = rhs_(1) + dy1ty1 *
> (u(1,i,j+1,k) - 2.0d0*u(1,i,j,k) +
> u(1,i,j-1,k)) -
> ty2 * (u(3,i,j+1,k) - u(3,i,j-1,k))
rhs_(2) = rhs_(2) + dy2ty1 *
> (u(2,i,j+1,k) - 2.0d0*u(2,i,j,k) +
> u(2,i,j-1,k)) +
> yycon2 * (us(i,j+1,k) - 2.0d0*us(i,j,k) +
> us(i,j-1,k)) -
> ty2 * (u(2,i,j+1,k)*vp1 -
> u(2,i,j-1,k)*vm1)
rhs_(3) = rhs_(3) + dy3ty1 *
> (u(3,i,j+1,k) - 2.0d0*u(3,i,j,k) +
> u(3,i,j-1,k)) +
> yycon2*con43 * (vp1 - 2.0d0*vijk + vm1) -
> ty2 * (u(3,i,j+1,k)*vp1 -
> u(3,i,j-1,k)*vm1 +
> (u(5,i,j+1,k) - square(i,j+1,k) -
> u(5,i,j-1,k) + square(i,j-1,k))
> *c2)
rhs_(4) = rhs_(4) + dy4ty1 *
> (u(4,i,j+1,k) - 2.0d0*u(4,i,j,k) +
> u(4,i,j-1,k)) +
> yycon2 * (ws(i,j+1,k) - 2.0d0*ws(i,j,k) +
> ws(i,j-1,k)) -
> ty2 * (u(4,i,j+1,k)*vp1 -
> u(4,i,j-1,k)*vm1)
rhs_(5) = rhs_(5) + dy5ty1 *
> (u(5,i,j+1,k) - 2.0d0*u(5,i,j,k) +
> u(5,i,j-1,k)) +
> yycon3 * (qs(i,j+1,k) - 2.0d0*qs(i,j,k) +
> qs(i,j-1,k)) +
> yycon4 * (vp1*vp1 - 2.0d0*vijk*vijk +
> vm1*vm1) +
> yycon5 * (u(5,i,j+1,k)*rho_i(i,j+1,k) -
> 2.0d0*u(5,i,j,k)*rho_i(i,j,k) +
> u(5,i,j-1,k)*rho_i(i,j-1,k)) -
> ty2 * ((c1*u(5,i,j+1,k) -
> c2*square(i,j+1,k)) * vp1 -
> (c1*u(5,i,j-1,k) -
> c2*square(i,j-1,k)) * vm1)
if(j .eq. 1) then
do m = 1, 5
rhs_(m) = rhs_(m)- dssp *
> ( 5.0d0*u(m,i,j,k) - 4.0d0*u(m,i,j+1,k) +
> u(m,i,j+2,k))
end do
elseif(j .eq. 2) then
do m = 1, 5
rhs_(m) = rhs_(m) - dssp *
> (-4.0d0*u(m,i,j-1,k) + 6.0d0*u(m,i,j,k) -
> 4.0d0*u(m,i,j+1,k) + u(m,i,j+2,k))
end do
elseif(j .ge. 3 .and. j .le. ny2-2) then
do m = 1, 5
rhs_(m) = rhs_(m) - dssp *
> ( u(m,i,j-2,k) - 4.0d0*u(m,i,j-1,k) +
> 6.0*u(m,i,j,k) - 4.0d0*u(m,i,j+1,k) +
> u(m,i,j+2,k) )
end do
elseif(j .eq. ny2-1) then
do m = 1, 5
rhs_(m) = rhs_(m) - dssp *
> ( u(m,i,j-2,k) - 4.0d0*u(m,i,j-1,k) +
> 6.0d0*u(m,i,j,k) - 4.0d0*u(m,i,j+1,k) )
end do
elseif(j .eq. ny2) then
do m = 1, 5
rhs_(m) = rhs_(m) - dssp *
> ( u(m,i,j-2,k) - 4.d0*u(m,i,j-1,k) +
> 5.d0*u(m,i,j,k) )
end do
endif
wijk = ws(i,j,k)
wp1 = ws(i,j,k+1)
wm1 = ws(i,j,k-1)
rhs_(1) = rhs_(1) + dz1tz1 *
> (u(1,i,j,k+1) - 2.0d0*u(1,i,j,k) +
> u(1,i,j,k-1)) -
> tz2 * (u(4,i,j,k+1) - u(4,i,j,k-1))
rhs_(2) = rhs_(2) + dz2tz1 *
> (u(2,i,j,k+1) - 2.0d0*u(2,i,j,k) +
> u(2,i,j,k-1)) +
> zzcon2 * (us(i,j,k+1) - 2.0d0*us(i,j,k) +
> us(i,j,k-1)) -
> tz2 * (u(2,i,j,k+1)*wp1 -
> u(2,i,j,k-1)*wm1)
rhs_(3) = rhs_(3) + dz3tz1 *
> (u(3,i,j,k+1) - 2.0d0*u(3,i,j,k) +
> u(3,i,j,k-1)) +
> zzcon2 * (vs(i,j,k+1) - 2.0d0*vs(i,j,k) +
> vs(i,j,k-1)) -
> tz2 * (u(3,i,j,k+1)*wp1 -
> u(3,i,j,k-1)*wm1)
rhs_(4) = rhs_(4) + dz4tz1 *
> (u(4,i,j,k+1) - 2.0d0*u(4,i,j,k) +
> u(4,i,j,k-1)) +
> zzcon2*con43 * (wp1 - 2.0d0*wijk + wm1) -
> tz2 * (u(4,i,j,k+1)*wp1 -
> u(4,i,j,k-1)*wm1 +
> (u(5,i,j,k+1) - square(i,j,k+1) -
> u(5,i,j,k-1) + square(i,j,k-1))
> *c2)
rhs_(5) = rhs_(5) + dz5tz1 *
> (u(5,i,j,k+1) - 2.0d0*u(5,i,j,k) +
> u(5,i,j,k-1)) +
> zzcon3 * (qs(i,j,k+1) - 2.0d0*qs(i,j,k) +
> qs(i,j,k-1)) +
> zzcon4 * (wp1*wp1 - 2.0d0*wijk*wijk +
> wm1*wm1) +
> zzcon5 * (u(5,i,j,k+1)*rho_i(i,j,k+1) -
> 2.0d0*u(5,i,j,k)*rho_i(i,j,k) +
> u(5,i,j,k-1)*rho_i(i,j,k-1)) -
> tz2 * ( (c1*u(5,i,j,k+1) -
> c2*square(i,j,k+1))*wp1 -
> (c1*u(5,i,j,k-1) -
> c2*square(i,j,k-1))*wm1)
if(k .eq. 1) then
do m = 1, 5
rhs_(m) = rhs_(m)- dssp *
> ( 5.0d0*u(m,i,j,k) - 4.0d0*u(m,i,j,k+1) +
> u(m,i,j,k+2))
end do
elseif(k .eq. 2) then
do m = 1, 5
rhs_(m) = rhs_(m) - dssp *
> (-4.0d0*u(m,i,j,k-1) + 6.0d0*u(m,i,j,k) -
> 4.0d0*u(m,i,j,k+1) + u(m,i,j,k+2))
end do
elseif(k .ge. 3 .and. k .le. nz2-2) then
do m = 1, 5
rhs_(m) = rhs_(m) - dssp *
> ( u(m,i,j,k-2) - 4.0d0*u(m,i,j,k-1) +
> 6.0*u(m,i,j,k) - 4.0d0*u(m,i,j,k+1) +
> u(m,i,j,k+2) )
end do
elseif(k .eq. nz2-1) then
do m = 1, 5
rhs_(m) = rhs_(m) - dssp *
> ( u(m,i,j,k-2) - 4.0d0*u(m,i,j,k-1) +
> 6.0d0*u(m,i,j,k) - 4.0d0*u(m,i,j,k+1) )
end do
else
do m = 1, 5
rhs_(m) = rhs_(m) - dssp *
> ( u(m,i,j,k-2) - 4.d0*u(m,i,j,k-1) +
> 5.d0*u(m,i,j,k) )
end do
endif
rhs_(1) = rhs_(1) * dt
rhs_(2) = rhs_(2) * dt
rhs_(3) = rhs_(3) * dt
rhs_(4) = rhs_(4) * dt
rhs_(5) = rhs_(5) * dt
rhs(1,i,j,k) = rhs_(1)
rhs(2,i,j,k) = rhs_(2)
rhs(3,i,j,k) = rhs_(3)
rhs(4,i,j,k) = rhs_(4)
rhs(5,i,j,k) = rhs_(5)
if(aditional_comp .eq. 1) then
ru1 = rho_i(i,j,k)
uu = us(i,j,k)
vv = vs(i,j,k)
ww = ws(i,j,k)
ac = speed(i,j,k)
ac2inv = ac*ac
t1 = c2 / ac2inv * ( qs(i,j,k)*rhs_(1)-uu*rhs_(2)-
> vv*rhs_(3)- ww*rhs_(4) + rhs_(5) )
t2 = bt * ru1 * ( uu * rhs_(1) - rhs_(2) )
t3 = ( bt * ru1 * ac ) * t1
rhs(1,i,j,k) = rhs_(1) - t1
rhs(2,i,j,k) = - ru1 * ( ww*rhs_(1) - rhs_(4))
rhs(3,i,j,k) = ru1 * ( vv*rhs_(1) - rhs_(3))
rhs(4,i,j,k) = - t2 + t3
rhs(5,i,j,k) = t2 + t3
endif
end do
end do
end do
!DVM$ end region
if (timeron) call timer_stop(t_rhs)
return
end

View File

@@ -0,0 +1,307 @@
c---------------------------------------------------------------------
c---------------------------------------------------------------------
subroutine exact_rhs
c---------------------------------------------------------------------
c---------------------------------------------------------------------
include 'header.h'
double precision dtemp(5), xi, eta, zeta, dtpp
integer m, i, j, k, ip1, im1, jp1, p, p1,
> jm1, km1, kp1,z
double precision ue_(-2:2, 5), buf_(-2:2, 5),cuf_(-2:2),q_(-2:2)
!DVM$ region
!DVM$ parallel (k,j,i) on forcing(*,i,j,k),private(m)
do k= 0, problem_size-1
do j = 0, problem_size-1
do i = 0, problem_size-1
do m = 1, 5
forcing(m,i,j,k) = 0.0d0
end do
end do
end do
end do
c---------------------------------------------------------------------
c xi-direction flux differences
c---------------------------------------------------------------------
!DVM$ parallel (k,j,i) on forcing(*,i,j,k),private(zeta,eta,xi,m,dtemp
!DVM$& ,buf_,cuf_,q_,dtpp,z,ue_)
do k = 1, problem_size-2
do j = 1, problem_size-2
do i = 1, problem_size-2
zeta = dble(k) * dnzm1
eta = dble(j) * dnym1
do z = -2, 2
xi = dble(i + z) * dnxm1
do m = 1, 5
dtemp(m) = ce(m,1) +
> xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) + xi*ce(m,11)))) +
> eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9) + eta*ce(m,12))))+
> zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) +
> zeta*ce(m,13))))
end do
do m = 1, 5
ue_(z,m) = dtemp(m)
end do
dtpp = 1.0d0 / dtemp(1)
do m = 2, 5
buf_(z, m) = dtpp * dtemp(m)
end do
cuf_(z) = buf_(z,2) * buf_(z,2)
buf_(z,1) = cuf_(z) + buf_(z,3) * buf_(z,3) +
> buf_(z,4) * buf_(z,4)
q_(z) = 0.5d0*(buf_(z,2)*ue_(z,2) + buf_(z,3)*
> ue_(z,3) + buf_(z,4)*ue_(z,4))
enddo
forcing(1,i,j,k) = forcing(1,i,j,k) -
> tx2*( ue_(1,2)-ue_(-1,2) )+
> dx1tx1*(ue_(1,1)-2.0d0*ue_(0,1)+ue_(-1,1))
forcing(2,i,j,k) = forcing(2,i,j,k) - tx2 * (
> (ue_(1,2)*buf_(1,2)+c2*(ue_(1,5)-q_(1)))-
> (ue_(-1,2)*buf_(-1,2)+c2*(ue_(-1,5)-q_(-1))))+
> xxcon1*(buf_(1,2)-2.0d0*buf_(0,2)+buf_(-1,2))+
> dx2tx1*( ue_(1,2)-2.0d0* ue_(0,2)+ue_(-1,2))
forcing(3,i,j,k) = forcing(3,i,j,k) - tx2 * (
> ue_(1,3)*buf_(1,2)-ue_(-1,3)*buf_(-1,2))+
> xxcon2*(buf_(1,3)-2.0d0*buf_(0,3)+buf_(-1,3))+
> dx3tx1*( ue_(1,3)-2.0d0*ue_(0,3) +ue_(-1,3))
forcing(4,i,j,k) = forcing(4,i,j,k) - tx2*(
> ue_(1,4)*buf_(1,2)-ue_(-1,4)*buf_(-1,2))+
> xxcon2*(buf_(1,4)-2.0d0*buf_(0,4)+buf_(-1,4))+
> dx4tx1*( ue_(1,4)-2.0d0* ue_(0,4)+ ue_(-1,4))
forcing(5,i,j,k) = forcing(5,i,j,k) - tx2*(
> buf_(1,2)*(c1*ue_(1,5)-c2*q_(1))-
> buf_(-1,2)*(c1*ue_(-1,5)-c2*q_(-1)))+
> 0.5d0*xxcon3*(buf_(1,1)-2.0d0*buf_(0,1)+
> buf_(-1,1))+
> xxcon4*(cuf_(1)-2.0d0*cuf_(0)+cuf_(-1))+
> xxcon5*(buf_(1,5)-2.0d0*buf_(0,5)+buf_(-1,5))+
> dx5tx1*( ue_(1,5)-2.0d0* ue_(0,5)+ ue_(-1,5))
do m = 1, 5
if(i .eq. 1) then
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
> (5.0d0*ue_(0,m) - 4.0d0*ue_(1,m) +ue_(2,m))
else if(i .eq. 2) then
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
> (-4.0d0*ue_(-1,m) + 6.0d0*ue_(0,m) -
> 4.0d0*ue_(1,m) + ue_(2,m))
else if(i .eq. problem_size-3) then
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
> (ue_(-2,m) - 4.0d0*ue_(-1,m) +
> 6.0d0*ue_(0,m) - 4.0d0*ue_(1,m))
else if(i .eq. problem_size-2) then
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
> (ue_(-2,m) - 4.0d0*ue_(-1,m) + 5.0d0*ue_(0,m))
else
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp*
> (ue_(-2,m) - 4.0d0*ue_(-1,m) +
> 6.0d0*ue_(0,m) - 4.0d0*ue_(1,m) + ue_(2,m))
endif
end do
end do
end do
end do
c---------------------------------------------------------------------
c eta-direction flux differences
c---------------------------------------------------------------------
!DVM$ parallel (k,j,i) on forcing(*,i,j,k),private(zeta,eta,xi,m,dtemp
!DVM$& ,buf_,cuf_,q_,dtpp,z,ue_)
do k = 1, problem_size- 2
do j = 1, problem_size-2
do i = 1, problem_size- 2
zeta = dble(k) * dnzm1
xi = dble(i) * dnxm1
do z = -2, 2
eta = dble(j + z) * dnym1
do m = 1, 5
dtemp(m) = ce(m,1) +
> xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) + xi*ce(m,11)))) +
> eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9) + eta*ce(m,12))))+
> zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) +
> zeta*ce(m,13))))
end do
do m = 1, 5
ue_(z,m) = dtemp(m)
end do
dtpp = 1.0d0 / dtemp(1)
do m = 2, 5
buf_(z, m) = dtpp * dtemp(m)
end do
cuf_(z) = buf_(z,3) * buf_(z,3)
buf_(z,1) = cuf_(z) + buf_(z,2) * buf_(z,2) +
> buf_(z,4) * buf_(z,4)
q_(z) = 0.5d0*(buf_(z,2)*ue_(z,2) + buf_(z,3)
> *ue_(z,3) + buf_(z,4) * ue_(z,4))
enddo
forcing(1,i,j,k) = forcing(1,i,j,k) -
> ty2*( ue_(1,3)-ue_(-1,3) )+
> dy1ty1*(ue_(1,1)-2.0d0*ue_(0,1)+ue_(-1,1))
forcing(2,i,j,k) = forcing(2,i,j,k) - ty2*(
> ue_(1,2)*buf_(1,3)-ue_(-1,2)*buf_(-1,3))+
> yycon2*(buf_(1,2)-2.0d0*buf_(0,2)+buf_(-1,2))+
> dy2ty1*( ue_(1,2)-2.0* ue_(0,2)+ ue_(-1,2))
forcing(3,i,j,k) = forcing(3,i,j,k) - ty2*(
> (ue_(1,3)*buf_(1,3)+c2*(ue_(1,5)-q_(1)))-
> (ue_(-1,3)*buf_(-1,3)+c2*(ue_(-1,5)-q_(-1))))+
> yycon1*(buf_(1,3)-2.0d0*buf_(0,3)+buf_(-1,3))+
> dy3ty1*( ue_(1,3)-2.0d0*ue_(0,3) +ue_(-1,3))
forcing(4,i,j,k) = forcing(4,i,j,k) - ty2*(
> ue_(1,4)*buf_(1,3)-ue_(-1,4)*buf_(-1,3))+
> yycon2*(buf_(1,4)-2.0d0*buf_(0,4)+buf_(-1,4))+
> dy4ty1*( ue_(1,4)-2.0d0*ue_(0,4)+ ue_(-1,4))
forcing(5,i,j,k) = forcing(5,i,j,k) - ty2*(
> buf_(1,3)*(c1*ue_(1,5)-c2*q_(1))-
> buf_(-1,3)*(c1*ue_(-1,5)-c2*q_(-1)))+
> 0.5d0*yycon3*(buf_(1,1)-2.0d0*buf_(0,1)+
> buf_(-1,1))+
> yycon4*(cuf_(1)-2.0d0*cuf_(0)+cuf_(-1))+
> yycon5*(buf_(1,5)-2.0d0*buf_(0,5)+buf_(-1,5))+
> dy5ty1*(ue_(1,5)-2.0d0*ue_(0,5)+ue_(-1,5))
do m = 1, 5
if(j .eq. 1) then
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
> (5.0d0*ue_(0,m) - 4.0d0*ue_(1,m) +ue_(2,m))
else if(j .eq. 2) then
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
> (-4.0d0*ue_(-1,m) + 6.0d0*ue_(0,m) -
> 4.0d0*ue_(1,m) + ue_(2,m))
else if(j .eq. problem_size-3) then
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
> (ue_(-2,m) - 4.0d0*ue_(-1,m) +
> 6.0d0*ue_(0,m) - 4.0d0*ue_(1,m))
else if(j .eq. problem_size-2) then
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
> (ue_(-2,m) - 4.0d0*ue_(-1,m) + 5.0d0*ue_(0,m))
else
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp*
> (ue_(-2,m) - 4.0d0*ue_(-1,m) +
> 6.0d0*ue_(0,m) - 4.0d0*ue_(1,m) + ue_(2,m))
endif
end do
end do
end do
end do
c---------------------------------------------------------------------
c zeta-direction flux differences
c---------------------------------------------------------------------
!DVM$ parallel (k,j,i) on forcing(*,i,j,k),private(zeta,eta,xi,m
!DVM$& ,buf_,cuf_,q_,ue_,dtpp,dtemp,z)
do k = 1, problem_size-2
do j = 1, problem_size-2
do i = 1, problem_size-2
xi = dble(i) * dnxm1
eta = dble(j) * dnym1
do z = -2, 2
zeta = dble(k + z) * dnzm1
do m = 1, 5
dtemp(m) = ce(m,1) +
> xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) + xi*ce(m,11)))) +
> eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9) + eta*ce(m,12))))+
> zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) +
> zeta*ce(m,13))))
end do
do m = 1, 5
ue_(z,m) = dtemp(m)
end do
dtpp = 1.0d0 / dtemp(1)
do m = 2, 5
buf_(z, m) = dtpp * dtemp(m)
end do
cuf_(z) = buf_(z,4) * buf_(z,4)
buf_(z,1) = cuf_(z) + buf_(z,2) * buf_(z,2) +
> buf_(z,3) * buf_(z,3)
q_(z) = 0.5d0*(buf_(z,2)*ue_(z,2) + buf_(z,3)*
> ue_(z,3) + buf_(z,4)*ue_(z,4))
enddo
forcing(1,i,j,k) = forcing(1,i,j,k) -
> tz2*( ue_(1,4)-ue_(-1,4) )+
> dz1tz1*(ue_(1,1)-2.0d0*ue_(0,1)+ue_(-1,1))
forcing(2,i,j,k) = forcing(2,i,j,k) - tz2 * (
> ue_(1,2)*buf_(1,4)-ue_(-1,2)*buf_(-1,4))+
> zzcon2*(buf_(1,2)-2.0d0*buf_(0,2)+buf_(-1,2))+
> dz2tz1*( ue_(1,2)-2.0d0* ue_(0,2)+ ue_(-1,2))
forcing(3,i,j,k) = forcing(3,i,j,k) - tz2 * (
> ue_(1,3)*buf_(1,4)-ue_(-1,3)*buf_(-1,4))+
> zzcon2*(buf_(1,3)-2.0d0*buf_(0,3)+buf_(-1,3))+
> dz3tz1*(ue_(1,3)-2.0d0*ue_(0,3)+ue_(-1,3))
forcing(4,i,j,k) = forcing(4,i,j,k) - tz2 * (
> (ue_(1,4)*buf_(1,4)+c2*(ue_(1,5)-q_(1)))-
> (ue_(-1,4)*buf_(-1,4)+c2*(ue_(-1,5)-q_(-1))))+
> zzcon1*(buf_(1,4)-2.0d0*buf_(0,4)+buf_(-1,4))+
> dz4tz1*( ue_(1,4)-2.0d0*ue_(0,4) +ue_(-1,4))
forcing(5,i,j,k) = forcing(5,i,j,k) - tz2 * (
> buf_(1,4)*(c1*ue_(1,5)-c2*q_(1))-
> buf_(-1,4)*(c1*ue_(-1,5)-c2*q_(-1)))+
> 0.5d0*zzcon3*(buf_(1,1)-2.0d0*buf_(0,1)
> +buf_(-1,1))+
> zzcon4*(cuf_(1)-2.0d0*cuf_(0)+cuf_(-1))+
> zzcon5*(buf_(1,5)-2.0d0*buf_(0,5)+buf_(-1,5))+
> dz5tz1*( ue_(1,5)-2.0d0*ue_(0,5)+ ue_(-1,5))
do m = 1, 5
if(k .eq. 1) then
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
> (5.0d0*ue_(0,m) - 4.0d0*ue_(1,m) +ue_(2,m))
else if(k .eq. 2) then
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
> (-4.0d0*ue_(-1,m) + 6.0d0*ue_(0,m) -
> 4.0d0*ue_(1,m) + ue_(2,m))
else if(k .eq. problem_size-3) then
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
> (ue_(-2,m) - 4.0d0*ue_(-1,m) +
> 6.0d0*ue_(0,m) - 4.0d0*ue_(1,m))
else if(k .eq. problem_size-2) then
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp *
> (ue_(-2,m) - 4.0d0*ue_(-1,m) + 5.0d0*ue_(0,m))
else
forcing(m,i,j,k) = forcing(m,i,j,k) - dssp*
> (ue_(-2,m) - 4.0d0*ue_(-1,m) +
> 6.0d0*ue_(0,m) - 4.0d0*ue_(1,m) + ue_(2,m))
endif
end do
end do
end do
end do
c---------------------------------------------------------------------
c now change the sign of the forcing function,
c---------------------------------------------------------------------
!DVM$ parallel (k,j,i) on forcing(*,i,j,k),private(m)
do k = 1, problem_size-2
do j = 1, problem_size-2
do i = 1, problem_size-2
do m = 1, 5
forcing(m,i,j,k) = -1.d0 * forcing(m,i,j,k)
end do
end do
end do
end do
!DVM$ end region
return
end

View File

@@ -0,0 +1,120 @@
c---------------------------------------------------------------------
c---------------------------------------------------------------------
implicit none
c---------------------------------------------------------------------
c The following include file is generated automatically by the
c "setparams" utility. It defines
c problem_size: 12, 64, 102, 162 (for class T, A, B, C)
c dt_default: default time step for this problem size if no
c config file
c niter_default: default number of iterations for this problem size
c---------------------------------------------------------------------
include 'npbparams.h'
integer grid_points(3), nx2, ny2, nz2,stage_n
common /global/ grid_points, nx2, ny2, nz2, timeron
double precision tx1, tx2, tx3, ty1, ty2, ty3, tz1, tz2, tz3,
& dx1, dx2, dx3, dx4, dx5, dy1, dy2, dy3, dy4,
& dy5, dz1, dz2, dz3, dz4, dz5, dssp, dt,
& ce(5,13), dxmax, dymax, dzmax, xxcon1, xxcon2,
& xxcon3, xxcon4, xxcon5, dx1tx1, dx2tx1, dx3tx1,
& dx4tx1, dx5tx1, yycon1, yycon2, yycon3, yycon4,
& yycon5, dy1ty1, dy2ty1, dy3ty1, dy4ty1, dy5ty1,
& zzcon1, zzcon2, zzcon3, zzcon4, zzcon5, dz1tz1,
& dz2tz1, dz3tz1, dz4tz1, dz5tz1, dnxm1, dnym1,
& dnzm1, c1c2, c1c5, c3c4, c1345, conz1, c1, c2,
& c3, c4, c5, c4dssp, c5dssp, dtdssp, dttx1, bt,
& dttx2, dtty1, dtty2, dttz1, dttz2, c2dttx1,
& c2dtty1, c2dttz1, comz1, comz4, comz5, comz6,
& c3c4tx3, c3c4ty3, c3c4tz3, c2iv, con43, con16
common /constants/ tx1, tx2, tx3, ty1, ty2, ty3, tz1, tz2, tz3,
& dx1, dx2, dx3, dx4, dx5, dy1, dy2, dy3, dy4,
& dy5, dz1, dz2, dz3, dz4, dz5, dssp, dt,
& ce, dxmax, dymax, dzmax, xxcon1, xxcon2,
& xxcon3, xxcon4, xxcon5, dx1tx1, dx2tx1, dx3tx1,
& dx4tx1, dx5tx1, yycon1, yycon2, yycon3, yycon4,
& yycon5, dy1ty1, dy2ty1, dy3ty1, dy4ty1, dy5ty1,
& zzcon1, zzcon2, zzcon3, zzcon4, zzcon5, dz1tz1,
& dz2tz1, dz3tz1, dz4tz1, dz5tz1, dnxm1, dnym1,
& dnzm1, c1c2, c1c5, c3c4, c1345, conz1, c1, c2,
& c3, c4, c5, c4dssp, c5dssp, dtdssp, dttx1, bt,
& dttx2, dtty1, dtty2, dttz1, dttz2, c2dttx1,
& c2dtty1, c2dttz1, comz1, comz4, comz5, comz6,
& c3c4tx3, c3c4ty3, c3c4tz3, c2iv, con43, con16,
& stage_n
integer IMAX, JMAX, KMAX, IMAXP, JMAXP
parameter (IMAX=problem_size,JMAX=problem_size,KMAX=problem_size)
parameter (IMAXP=IMAX/2*2,JMAXP=JMAX/2*2)
c---------------------------------------------------------------------
c To improve cache performance, first two dimensions padded by 1
c for even number sizes only
c---------------------------------------------------------------------
double precision
& u (5, 0:IMAXP, 0:JMAXP, 0:KMAX),
& us ( 0:IMAXP, 0:JMAXP, 0:KMAX),
& vs ( 0:IMAXP, 0:JMAXP, 0:KMAX),
& ws ( 0:IMAXP, 0:JMAXP, 0:KMAX),
& qs ( 0:IMAXP, 0:JMAXP, 0:KMAX),
& rho_i ( 0:IMAXP, 0:JMAXP, 0:KMAX),
& speed ( 0:IMAXP, 0:JMAXP, 0:KMAX),
& square ( 0:IMAXP, 0:JMAXP, 0:KMAX),
& rhs (5, 0:IMAXP, 0:JMAXP, 0:KMAX),
& forcing (5, 0:IMAXP, 0:JMAXP, 0:KMAX)
common /fields/ u, us, vs, ws, qs, rho_i, speed, square,
& rhs, forcing
double precision cv(0:problem_size-1), rhon(0:problem_size-1),
& rhos(0:problem_size-1), rhoq(0:problem_size-1),
& cuf(0:problem_size-1), q(0:problem_size-1),
& ue(0:problem_size-1,5), buf(0:problem_size-1,5),
& rhon_(0:problem_size-1,0:problem_size-1),
& cv_(0:problem_size-1,0:problem_size-1)
common /work_1d/ cv,rhon,rhos,rhoq, cuf, q, ue, buf,rhon_,cv_
double precision
& lhs(0:2,1:5,0:IMAXP, 0:JMAXP, 0:KMAX)
common /work_lhs/ lhs
c-----------------------------------------------------------------------
c Timer constants
c-----------------------------------------------------------------------
integer t_rhsx,t_rhsy,t_rhsz,t_xsolve,t_ysolve,t_zsolve,
& t_rdis1,t_rdis2,t_tzetar,t_ninvr,t_pinvr,t_add,
& t_rhs,t_txinvr,t_last,t_total
logical timeron
parameter (t_total = 1)
parameter (t_rhsx = 2)
parameter (t_rhsy = 3)
parameter (t_rhsz = 4)
parameter (t_rhs = 5)
parameter (t_xsolve = 6)
parameter (t_ysolve = 7)
parameter (t_zsolve = 8)
parameter (t_rdis1 = 9)
parameter (t_rdis2 = 10)
parameter (t_txinvr = 11)
parameter (t_pinvr = 12)
parameter (t_ninvr = 13)
parameter (t_tzetar = 14)
parameter (t_add = 15)
parameter (t_last = 15)
!DVM$ SHADOW lhs(0:0,0:0,2:2,2:2,2:2)
!DVM$ SHADOW (0:0,2:3,2:3,2:3) :: rhs,forcing,u
!DVM$ SHADOW (2:3,2:3,2:3) :: qs,us,ws,vs,speed,square,rho_i
!DVM$ DISTRIBUTE u(*,BLOCK,BLOCK,BLOCK)
!DVM$ ALIGN (*,i,j,k) WITH u(*,i,j,k) :: forcing,rhs
!DVM$ ALIGN (*,*,i,j,k) WITH u(*,i,j,k) :: lhs
!DVM$ ALIGN (i,j,k) WITH u(*,i,j,k) :: square,speed,rho_i,qs,ws,vs,us

View File

@@ -0,0 +1,189 @@
c---------------------------------------------------------------------
c---------------------------------------------------------------------
subroutine initialize
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c This subroutine initializes the field variable u using
c tri-linear transfinite interpolation of the boundary values
c---------------------------------------------------------------------
include 'header.h'
integer i, j, k, m, ix, iy, iz
double precision xi, eta, zeta, Pface(5,3,2), Pxi, Peta,
> Pzeta, temp(5)
!DVM$ region
!DVM$ parallel (k,j,i) on u(*,i,j,k), private(zeta, eta, xi, ix, pxi, m,
!DVM$& pface, iy, peta, iz, pzeta, temp)
do k = 0, problem_size-1
do j = 0, problem_size-1
do i = 0, problem_size-1
u(1,i,j,k) = 1.0
u(2,i,j,k) = 0.0
u(3,i,j,k) = 0.0
u(4,i,j,k) = 0.0
u(5,i,j,k) = 1.0
zeta = dble(k) * dnzm1
eta = dble(j) * dnym1
xi = dble(i) * dnxm1
do ix = 1, 2
Pxi = dble(ix-1)
do m = 1, 5
Pface(m,1,ix) = ce(m,1) +
> Pxi*(ce(m,2)+Pxi*(ce(m,5) +Pxi*(ce(m,8) +
> Pxi*ce(m,11))))+eta*(ce(m,3) + eta*(ce(m,6)
> + eta*(ce(m,9) + eta*ce(m,12))))+
> zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) +
> zeta*ce(m,13))))
end do
end do
do iy = 1, 2
Peta = dble(iy-1)
do m = 1, 5
Pface(m,2,iy) = ce(m,1) +
> xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) +
> xi*ce(m,11)))) +
> Peta*(ce(m,3) +Peta*(ce(m,6) +Peta*(ce(m,9)+
> Peta*ce(m,12))))+
> zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) +
> zeta*ce(m,13))))
end do
end do
do iz = 1, 2
Pzeta = dble(iz-1)
do m = 1, 5
Pface(m,3,iz) = ce(m,1) +
> xi*(ce(m,2) + xi*(ce(m,5) + xi*(ce(m,8) +
> xi*ce(m,11)))) +
> eta*(ce(m,3) + eta*(ce(m,6) + eta*(ce(m,9)+
> eta*ce(m,12))))+
> Pzeta*(ce(m,4) + Pzeta*(ce(m,7) + Pzeta*(ce(m,10) +
> Pzeta*ce(m,13))))
end do
end do
do m = 1, 5
Pxi = xi * Pface(m,1,2) +
> (1.0d0-xi) * Pface(m,1,1)
Peta = eta * Pface(m,2,2) +
> (1.0d0-eta) * Pface(m,2,1)
Pzeta = zeta * Pface(m,3,2) +
> (1.0d0-zeta) * Pface(m,3,1)
u(m,i,j,k) = Pxi + Peta + Pzeta -
> Pxi*Peta - Pxi*Pzeta - Peta*Pzeta +
> Pxi*Peta*Pzeta
end do
zeta = dble(k) * dnzm1
eta = dble(j) * dnym1
xi = 0.0d0
if( i .eq. 0) then
! call exact_solution(xi, eta, zeta, temp)
do m = 1, 5
temp(m) = ce(m,1) +
> xi*(ce(m,2)+xi*(ce(m,5)+xi*(ce(m,8)+xi*ce(m,11))))+
> eta*(ce(m,3)+eta*(ce(m,6)+eta*(ce(m,9)+eta*ce(m,12))))+
> zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) +
> zeta*ce(m,13))))
end do
do m = 1, 5
u(m,i,j,k) = temp(m)
end do
endif
xi = 1.0d0
if( i .eq. problem_size-1) then
! call exact_solution(xi, eta, zeta, temp)
do m = 1, 5
temp(m) = ce(m,1) +
> xi*(ce(m,2)+xi*(ce(m,5)+xi*(ce(m,8)+xi*ce(m,11))))+
> eta*(ce(m,3)+eta*(ce(m,6)+eta*(ce(m,9)+eta*ce(m,12))))+
> zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) +
> zeta*ce(m,13))))
end do
do m = 1, 5
u(m,i,j,k) = temp(m)
end do
endif
zeta = dble(k) * dnzm1
eta = 0.0d0
xi = dble(i) * dnxm1
if( j .eq. 0) then
! call exact_solution(xi, eta, zeta, temp)
do m = 1, 5
temp(m) = ce(m,1) +
> xi*(ce(m,2)+xi*(ce(m,5)+xi*(ce(m,8)+xi*ce(m,11))))+
> eta*(ce(m,3)+eta*(ce(m,6)+eta*(ce(m,9)+eta*ce(m,12))))
> +zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) +
> zeta*ce(m,13))))
end do
do m = 1, 5
u(m,i,j,k) = temp(m)
end do
endif
eta = 1.0d0
if( j .eq. problem_size-1) then
! call exact_solution(xi, eta, zeta, temp)
do m = 1, 5
temp(m) = ce(m,1) +
> xi*(ce(m,2)+xi*(ce(m,5)+xi*(ce(m,8)+xi*ce(m,11))))+
> eta*(ce(m,3)+eta*(ce(m,6)+eta*(ce(m,9)+eta*ce(m,12))))
> +zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) +
> zeta*ce(m,13))))
end do
do m = 1, 5
u(m,i,j,k) = temp(m)
end do
endif
zeta = 0.0d0
eta = dble(j) * dnym1
xi = dble(i) *dnxm1
if( k .eq. 0) then
! call exact_solution(xi, eta, zeta, temp)
do m = 1, 5
temp(m) = ce(m,1) +
> xi*(ce(m,2)+xi*(ce(m,5)+xi*(ce(m,8)+xi*ce(m,11))))+
> eta*(ce(m,3)+eta*(ce(m,6)+eta*(ce(m,9)+eta*ce(m,12))))
> +zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) +
> zeta*ce(m,13))))
end do
do m = 1, 5
u(m,i,j,k) = temp(m)
end do
endif
zeta = 1.0d0
if( k .eq. problem_size-1) then
! call exact_solution(xi, eta, zeta, temp)
do m = 1, 5
temp(m) = ce(m,1) +
> xi*(ce(m,2)+xi*(ce(m,5)+xi*(ce(m,8)+xi*ce(m,11))))+
> eta*(ce(m,3)+eta*(ce(m,6)+eta*(ce(m,9)+eta*ce(m,12))))
> +zeta*(ce(m,4) + zeta*(ce(m,7) + zeta*(ce(m,10) +
> zeta*ce(m,13))))
end do
do m = 1, 5
u(m,i,j,k) = temp(m)
end do
endif
end do
end do
end do
!DVM$ end region
return
end

View File

@@ -0,0 +1,121 @@
subroutine print_results(name, class, n1, n2, n3, niter,
> t, mops, optype, verified, npbversion,
> compiletime, cs1, cs2, cs3, cs4, cs5, cs6, cs7)
implicit none
character name*(*)
character class*1
integer n1, n2, n3, niter, j
double precision t, mops
character optype*24, size*15
logical verified
character*(*) npbversion, compiletime,
> cs1, cs2, cs3, cs4, cs5, cs6, cs7
integer num_threads, max_threads, i
max_threads = 1
num_threads = 1
write (*, 2) name
2 format(//, ' ', A, ' Benchmark Completed.')
write (*, 3) Class
3 format(' Class = ', 12x, a12)
c If this is not a grid-based problem (EP, FT, CG), then
c we only print n1, which contains some measure of the
c problem size. In that case, n2 and n3 are both zero.
c Otherwise, we print the grid size n1xn2xn3
if ((n2 .eq. 0) .and. (n3 .eq. 0)) then
if (name(1:2) .eq. 'EP') then
write(size, '(f15.0)' ) 2.d0**n1
j = 15
if (size(j:j) .eq. '.') j = j - 1
write (*,42) size(1:j)
42 format(' Size = ',9x, a15)
else
write (*,44) n1
44 format(' Size = ',12x, i12)
endif
else
write (*, 4) n1,n2,n3
4 format(' Size = ',9x, i4,'x',i4,'x',i4)
endif
write (*, 5) niter
5 format(' Iterations = ', 12x, i12)
write (*, 6) t
6 format(' Time in seconds = ',12x, f12.2)
write (*,7) num_threads
7 format(' Total threads = ', 12x, i12)
write (*,8) max_threads
8 format(' Avail threads = ', 12x, i12)
if (num_threads .ne. max_threads) write (*,88)
88 format(' Warning: Threads used differ from threads available')
write (*,9) mops
9 format(' Mop/s total = ',12x, f12.2)
write (*,10) mops/float( num_threads )
10 format(' Mop/s/thread = ', 12x, f12.2)
write(*, 11) optype
11 format(' Operation type = ', a24)
if (verified) then
write(*,12) ' SUCCESSFUL'
else
write(*,12) 'UNSUCCESSFUL'
endif
12 format(' Verification = ', 12x, a)
write(*,13) npbversion
13 format(' Version = ', 12x, a12)
write(*,14) compiletime
14 format(' Compile date = ', 12x, a12)
write (*,121) cs1
121 format(/, ' Compile options:', /,
> ' F77 = ', A)
write (*,122) cs2
122 format(' FLINK = ', A)
write (*,123) cs3
123 format(' F_LIB = ', A)
write (*,124) cs4
124 format(' F_INC = ', A)
write (*,125) cs5
125 format(' FFLAGS = ', A)
write (*,126) cs6
126 format(' FLINKFLAGS = ', A)
write(*, 127) cs7
127 format(' RAND = ', A)
write (*,130)
130 format(//' Please send all errors/feedbacks to:'//
> ' NPB Development Team'/
> ' npb@nas.nasa.gov'//)
c 130 format(//' Please send the results of this run to:'//
c > ' NPB Development Team '/
c > ' Internet: npb@nas.nasa.gov'/
c > ' '/
c > ' If email is not available, send this to:'//
c > ' MS T27A-1'/
c > ' NASA Ames Research Center'/
c > ' Moffett Field, CA 94035-1000'//
c > ' Fax: 650-604-3957'//)
return
end

View File

@@ -0,0 +1,202 @@
c---------------------------------------------------------------------
c---------------------------------------------------------------------
subroutine set_constants
c---------------------------------------------------------------------
c---------------------------------------------------------------------
include 'header.h'
ce(1,1) = 2.0d0
ce(1,2) = 0.0d0
ce(1,3) = 0.0d0
ce(1,4) = 4.0d0
ce(1,5) = 5.0d0
ce(1,6) = 3.0d0
ce(1,7) = 0.5d0
ce(1,8) = 0.02d0
ce(1,9) = 0.01d0
ce(1,10) = 0.03d0
ce(1,11) = 0.5d0
ce(1,12) = 0.4d0
ce(1,13) = 0.3d0
ce(2,1) = 1.0d0
ce(2,2) = 0.0d0
ce(2,3) = 0.0d0
ce(2,4) = 0.0d0
ce(2,5) = 1.0d0
ce(2,6) = 2.0d0
ce(2,7) = 3.0d0
ce(2,8) = 0.01d0
ce(2,9) = 0.03d0
ce(2,10) = 0.02d0
ce(2,11) = 0.4d0
ce(2,12) = 0.3d0
ce(2,13) = 0.5d0
ce(3,1) = 2.0d0
ce(3,2) = 2.0d0
ce(3,3) = 0.0d0
ce(3,4) = 0.0d0
ce(3,5) = 0.0d0
ce(3,6) = 2.0d0
ce(3,7) = 3.0d0
ce(3,8) = 0.04d0
ce(3,9) = 0.03d0
ce(3,10) = 0.05d0
ce(3,11) = 0.3d0
ce(3,12) = 0.5d0
ce(3,13) = 0.4d0
ce(4,1) = 2.0d0
ce(4,2) = 2.0d0
ce(4,3) = 0.0d0
ce(4,4) = 0.0d0
ce(4,5) = 0.0d0
ce(4,6) = 2.0d0
ce(4,7) = 3.0d0
ce(4,8) = 0.03d0
ce(4,9) = 0.05d0
ce(4,10) = 0.04d0
ce(4,11) = 0.2d0
ce(4,12) = 0.1d0
ce(4,13) = 0.3d0
ce(5,1) = 5.0d0
ce(5,2) = 4.0d0
ce(5,3) = 3.0d0
ce(5,4) = 2.0d0
ce(5,5) = 0.1d0
ce(5,6) = 0.4d0
ce(5,7) = 0.3d0
ce(5,8) = 0.05d0
ce(5,9) = 0.04d0
ce(5,10) = 0.03d0
ce(5,11) = 0.1d0
ce(5,12) = 0.3d0
ce(5,13) = 0.2d0
c1 = 1.4d0
c2 = 0.4d0
c3 = 0.1d0
c4 = 1.0d0
c5 = 1.4d0
bt = dsqrt(0.5d0)
dnxm1 = 1.0d0 / dble(problem_size-1)
dnym1 = 1.0d0 / dble(problem_size-1)
dnzm1 = 1.0d0 / dble(problem_size-1)
c1c2 = c1 * c2
c1c5 = c1 * c5
c3c4 = c3 * c4
c1345 = c1c5 * c3c4
conz1 = (1.0d0-c1c5)
tx1 = 1.0d0 / (dnxm1 * dnxm1)
tx2 = 1.0d0 / (2.0d0 * dnxm1)
tx3 = 1.0d0 / dnxm1
ty1 = 1.0d0 / (dnym1 * dnym1)
ty2 = 1.0d0 / (2.0d0 * dnym1)
ty3 = 1.0d0 / dnym1
tz1 = 1.0d0 / (dnzm1 * dnzm1)
tz2 = 1.0d0 / (2.0d0 * dnzm1)
tz3 = 1.0d0 / dnzm1
dx1 = 0.75d0
dx2 = 0.75d0
dx3 = 0.75d0
dx4 = 0.75d0
dx5 = 0.75d0
dy1 = 0.75d0
dy2 = 0.75d0
dy3 = 0.75d0
dy4 = 0.75d0
dy5 = 0.75d0
dz1 = 1.0d0
dz2 = 1.0d0
dz3 = 1.0d0
dz4 = 1.0d0
dz5 = 1.0d0
dxmax = dmax1(dx3, dx4)
dymax = dmax1(dy2, dy4)
dzmax = dmax1(dz2, dz3)
dssp = 0.25d0 * dmax1(dx1, dmax1(dy1, dz1) )
c4dssp = 4.0d0 * dssp
c5dssp = 5.0d0 * dssp
dttx1 = dt*tx1
dttx2 = dt*tx2
dtty1 = dt*ty1
dtty2 = dt*ty2
dttz1 = dt*tz1
dttz2 = dt*tz2
c2dttx1 = 2.0d0*dttx1
c2dtty1 = 2.0d0*dtty1
c2dttz1 = 2.0d0*dttz1
dtdssp = dt*dssp
comz1 = dtdssp
comz4 = 4.0d0*dtdssp
comz5 = 5.0d0*dtdssp
comz6 = 6.0d0*dtdssp
c3c4tx3 = c3c4*tx3
c3c4ty3 = c3c4*ty3
c3c4tz3 = c3c4*tz3
dx1tx1 = dx1*tx1
dx2tx1 = dx2*tx1
dx3tx1 = dx3*tx1
dx4tx1 = dx4*tx1
dx5tx1 = dx5*tx1
dy1ty1 = dy1*ty1
dy2ty1 = dy2*ty1
dy3ty1 = dy3*ty1
dy4ty1 = dy4*ty1
dy5ty1 = dy5*ty1
dz1tz1 = dz1*tz1
dz2tz1 = dz2*tz1
dz3tz1 = dz3*tz1
dz4tz1 = dz4*tz1
dz5tz1 = dz5*tz1
c2iv = 2.5d0
con43 = 4.0d0/3.0d0
con16 = 1.0d0/6.0d0
xxcon1 = c3c4tx3*con43*tx3
xxcon2 = c3c4tx3*tx3
xxcon3 = c3c4tx3*conz1*tx3
xxcon4 = c3c4tx3*con16*tx3
xxcon5 = c3c4tx3*c1c5*tx3
yycon1 = c3c4ty3*con43*ty3
yycon2 = c3c4ty3*ty3
yycon3 = c3c4ty3*conz1*ty3
yycon4 = c3c4ty3*con16*ty3
yycon5 = c3c4ty3*c1c5*ty3
zzcon1 = c3c4tz3*con43*tz3
zzcon2 = c3c4tz3*tz3
zzcon3 = c3c4tz3*conz1*tz3
zzcon4 = c3c4tz3*con16*tz3
zzcon5 = c3c4tz3*c1c5*tz3
return
end

View File

@@ -0,0 +1,231 @@
!-------------------------------------------------------------------------!
! !
! N A S P A R A L L E L B E N C H M A R K S 3.3.1 !
! !
! D V M H V E R S I O N !
! !
! S P !
! !
!-------------------------------------------------------------------------!
!-------------------------------------------------------------------------!
c---------------------------------------------------------------------
c
c Authors:
c Original:
c R. Van der Wijngaart
c W. Saphir
c H. Jin
c Optimize for DVMH:
c Kolganov A.S.
c---------------------------------------------------------------------
c---------------------------------------------------------------------
program SP
c---------------------------------------------------------------------
include 'header.h'
integer i, niter, step, fstatus, n3
external timer_read
double precision mflops, t, tmax, timer_read, trecs(t_last)
logical verified
character class
character t_names(t_last)*8
c---------------------------------------------------------------------
c Read input file (if it exists), else take
c defaults from parameters
c---------------------------------------------------------------------
open (unit=2,file='timer.flag',status='old', iostat=fstatus)
if (fstatus .eq. 0) then
timeron = .true.
t_names(t_total) = 'total'
t_names(t_rhsx) = 'rhsx'
t_names(t_rhsy) = 'rhsy'
t_names(t_rhsz) = 'rhsz'
t_names(t_rhs) = 'rhs'
t_names(t_xsolve) = 'xsolve'
t_names(t_ysolve) = 'ysolve'
t_names(t_zsolve) = 'zsolve'
t_names(t_rdis1) = 'redist1'
t_names(t_rdis2) = 'redist2'
t_names(t_tzetar) = 'tzetar'
t_names(t_ninvr) = 'ninvr'
t_names(t_pinvr) = 'pinvr'
t_names(t_txinvr) = 'txinvr'
t_names(t_add) = 'add'
close(2)
else
timeron = .false.
endif
write(*, 1000)
open (unit=2,file='inputsp.data',status='old', iostat=fstatus)
if (fstatus .eq. 0) then
write(*,233)
233 format(' Reading from input file inputsp.data')
read (2,*) niter
read (2,*) dt
read (2,*) grid_points(1), grid_points(2), grid_points(3)
close(2)
else
write(*,234)
niter = niter_default
dt = dt_default
grid_points(1) = problem_size
grid_points(2) = problem_size
grid_points(3) = problem_size
endif
234 format(' No input file inputsp.data. Using compiled defaults')
open (unit = 2,file = 'inputStage',status = 'old',iostat = fstat
&us)
if (fstatus .eq. 0) then
read (unit = 2,fmt = *) stage_n
close (unit = 2)
else
stage_n = 0
endif
write(*,*) 'stage = ', stage_n
write(*, 1001) problem_size, problem_size, problem_size
write(*, 1002) niter, dt
write(*, *)
1000 format(//, ' NAS Parallel Benchmarks (NPB3.3.1-DVMH)',
> ' - SP Benchmark', /)
1001 format(' Size: ', i4, 'x', i4, 'x', i4)
1002 format(' Iterations: ', i4, ' dt: ', F11.7)
1003 format(' Number of available threads: ', i5)
if ( (problem_size .gt. IMAX) .or.
> (problem_size .gt. JMAX) .or.
> (problem_size .gt. KMAX) ) then
print *, (grid_points(i),i=1,3)
print *,' Problem size too big for compiled array sizes'
goto 999
endif
nx2 = problem_size - 2
ny2 = problem_size - 2
nz2 = problem_size - 2
call set_constants
call exact_rhs
call initialize
call adi_first
call adi_first
call initialize
do i = 1, t_last
call timer_clear(i)
end do
call timer_start(1)
!DVM$ BARRIER
do step = 1, niter
if (mod(step, 20) .eq. 0 .or. step .eq. 1) then
write(*, 200) step
200 format(' Time step ', i4)
endif
call adi
end do
call timer_stop(1)
tmax = timer_read(1)
call verify(niter, class, verified)
if( tmax .ne. 0. ) then
n3 = problem_size*problem_size*problem_size
t = (problem_size+problem_size+problem_size)/3.0
mflops = (881.174 * float( n3 )
> -4683.91 * t**2
> +11484.5 * t
> -19272.4) * float( niter ) / (tmax*1000000.0d0)
else
mflops = 0.0
endif
call print_results('SP', class, problem_size,
> problem_size, problem_size, niter,
> tmax, mflops, ' floating point',
> verified, npbversion,compiletime, cs1, cs2, cs3, cs4, cs5,
> cs6, '(none)')
c---------------------------------------------------------------------
c More timers
c---------------------------------------------------------------------
if (.not.timeron) goto 999
do i=1, t_last
trecs(i) = timer_read(i)
end do
if (tmax .eq. 0.0) tmax = 1.0
write(*,800)
800 format(' SECTION Time (secs)')
do i=1, t_last
write(*,810) t_names(i), trecs(i), trecs(i)*100./tmax
if (i.eq.t_rhs) then
t = trecs(t_rhsx) + trecs(t_rhsy) + trecs(t_rhsz)
write(*,820) 'sub-rhs', t, t*100./tmax
t = trecs(t_rhs) - t
write(*,820) 'rest-rhs', t, t*100./tmax
elseif (i.eq.t_zsolve) then
t = trecs(t_zsolve) - trecs(t_rdis1) - trecs(t_rdis2)
write(*,820) 'sub-zsol', t, t*100./tmax
elseif (i.eq.t_rdis2) then
t = trecs(t_rdis1) + trecs(t_rdis2)
write(*,820) 'redist', t, t*100./tmax
endif
810 format(2x,a8,':',f9.3,' (',f6.2,'%)')
820 format(' --> ',a8,':',f9.3,' (',f6.2,'%)')
end do
999 continue
end
c---------------------------------------------------------------------
c---------------------------------------------------------------------
subroutine adi_first
c---------------------------------------------------------------------
c---------------------------------------------------------------------
call compute_rhs(1)
call x_solve
call y_solve
call z_solve
return
end
c---------------------------------------------------------------------
c---------------------------------------------------------------------
subroutine adi
c---------------------------------------------------------------------
c---------------------------------------------------------------------
!DVM$ interval 1
call compute_rhs(1)
!DVM$ end interval
!DVM$ interval 12
call x_solve
!DVM$ end interval
!DVM$ interval 13
call y_solve
!DVM$ end interval
!DVM$ interval 14
call z_solve
!DVM$ end interval
return
end

View File

@@ -0,0 +1,99 @@
c---------------------------------------------------------------------
c---------------------------------------------------------------------
subroutine timer_clear(n)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
implicit none
integer n
double precision start(64), elapsed(64)
common /tt/ start, elapsed
elapsed(n) = 0.0
return
end
c---------------------------------------------------------------------
c---------------------------------------------------------------------
subroutine timer_start(n)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
implicit none
external elapsed_time
double precision elapsed_time
integer n
double precision start(64), elapsed(64)
common /tt/ start, elapsed
start(n) = elapsed_time()
return
end
c---------------------------------------------------------------------
c---------------------------------------------------------------------
subroutine timer_stop(n)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
implicit none
external elapsed_time
double precision elapsed_time
integer n
double precision start(64), elapsed(64)
common /tt/ start, elapsed
double precision t, now
now = elapsed_time()
t = now - start(n)
elapsed(n) = elapsed(n) + t
return
end
c---------------------------------------------------------------------
c---------------------------------------------------------------------
double precision function timer_read(n)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
implicit none
integer n
double precision start(64), elapsed(64)
common /tt/ start, elapsed
timer_read = elapsed(n)
return
end
c---------------------------------------------------------------------
c---------------------------------------------------------------------
double precision function elapsed_time()
c---------------------------------------------------------------------
c---------------------------------------------------------------------
implicit none
double precision t,dvtime
t = dvtime()
elapsed_time = t
return
end

View File

@@ -0,0 +1,356 @@
c---------------------------------------------------------------------
c---------------------------------------------------------------------
subroutine verify(no_time_steps, class, verified)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c verification routine
c---------------------------------------------------------------------
include 'header.h'
double precision xcrref(5),xceref(5),xcrdif(5),xcedif(5),
> epsilon, xce(5), xcr(5), dtref
integer m, no_time_steps
character class
logical verified
c---------------------------------------------------------------------
c tolerance level
c---------------------------------------------------------------------
epsilon = 1.0d-08
c---------------------------------------------------------------------
c compute the error norm and the residual norm, and exit if not printing
c---------------------------------------------------------------------
call error_norm(xce)
call compute_rhs(0)
call rhs_norm(xcr)
do m = 1, 5
xcr(m) = xcr(m) / dt
enddo
class = 'U'
verified = .true.
do m = 1,5
xcrref(m) = 1.0
xceref(m) = 1.0
end do
c---------------------------------------------------------------------
c reference data for 12X12X12 grids after 100 time steps, with DT = 1.50d-02
c---------------------------------------------------------------------
if ( (problem_size .eq. 12 ) .and.
> (problem_size .eq. 12 ) .and.
> (problem_size .eq. 12 ) .and.
> (no_time_steps .eq. 100 )) then
class = 'S'
dtref = 1.5d-2
c---------------------------------------------------------------------
c Reference values of RMS-norms of residual.
c---------------------------------------------------------------------
xcrref(1) = 2.7470315451339479d-02
xcrref(2) = 1.0360746705285417d-02
xcrref(3) = 1.6235745065095532d-02
xcrref(4) = 1.5840557224455615d-02
xcrref(5) = 3.4849040609362460d-02
c---------------------------------------------------------------------
c Reference values of RMS-norms of solution error.
c---------------------------------------------------------------------
xceref(1) = 2.7289258557377227d-05
xceref(2) = 1.0364446640837285d-05
xceref(3) = 1.6154798287166471d-05
xceref(4) = 1.5750704994480102d-05
xceref(5) = 3.4177666183390531d-05
c---------------------------------------------------------------------
c reference data for 36X36X36 grids after 400 time steps, with DT = 1.5d-03
c---------------------------------------------------------------------
elseif ( (problem_size .eq. 36) .and.
> (problem_size .eq. 36) .and.
> (problem_size .eq. 36) .and.
> (no_time_steps . eq. 400) ) then
class = 'W'
dtref = 1.5d-3
c---------------------------------------------------------------------
c Reference values of RMS-norms of residual.
c---------------------------------------------------------------------
xcrref(1) = 0.1893253733584d-02
xcrref(2) = 0.1717075447775d-03
xcrref(3) = 0.2778153350936d-03
xcrref(4) = 0.2887475409984d-03
xcrref(5) = 0.3143611161242d-02
c---------------------------------------------------------------------
c Reference values of RMS-norms of solution error.
c---------------------------------------------------------------------
xceref(1) = 0.7542088599534d-04
xceref(2) = 0.6512852253086d-05
xceref(3) = 0.1049092285688d-04
xceref(4) = 0.1128838671535d-04
xceref(5) = 0.1212845639773d-03
c---------------------------------------------------------------------
c reference data for 64X64X64 grids after 400 time steps, with DT = 1.5d-03
c---------------------------------------------------------------------
elseif ( (problem_size .eq. 64) .and.
> (problem_size .eq. 64) .and.
> (problem_size .eq. 64) .and.
> (no_time_steps . eq. 400) ) then
class = 'A'
dtref = 1.5d-3
c---------------------------------------------------------------------
c Reference values of RMS-norms of residual.
c---------------------------------------------------------------------
xcrref(1) = 2.4799822399300195d0
xcrref(2) = 1.1276337964368832d0
xcrref(3) = 1.5028977888770491d0
xcrref(4) = 1.4217816211695179d0
xcrref(5) = 2.1292113035138280d0
c---------------------------------------------------------------------
c Reference values of RMS-norms of solution error.
c---------------------------------------------------------------------
xceref(1) = 1.0900140297820550d-04
xceref(2) = 3.7343951769282091d-05
xceref(3) = 5.0092785406541633d-05
xceref(4) = 4.7671093939528255d-05
xceref(5) = 1.3621613399213001d-04
c---------------------------------------------------------------------
c reference data for 102X102X102 grids after 400 time steps,
c with DT = 1.0d-03
c---------------------------------------------------------------------
elseif ( (problem_size .eq. 102) .and.
> (problem_size .eq. 102) .and.
> (problem_size .eq. 102) .and.
> (no_time_steps . eq. 400) ) then
class = 'B'
dtref = 1.0d-3
c---------------------------------------------------------------------
c Reference values of RMS-norms of residual.
c---------------------------------------------------------------------
xcrref(1) = 0.6903293579998d+02
xcrref(2) = 0.3095134488084d+02
xcrref(3) = 0.4103336647017d+02
xcrref(4) = 0.3864769009604d+02
xcrref(5) = 0.5643482272596d+02
c---------------------------------------------------------------------
c Reference values of RMS-norms of solution error.
c---------------------------------------------------------------------
xceref(1) = 0.9810006190188d-02
xceref(2) = 0.1022827905670d-02
xceref(3) = 0.1720597911692d-02
xceref(4) = 0.1694479428231d-02
xceref(5) = 0.1847456263981d-01
c---------------------------------------------------------------------
c reference data for 162X162X162 grids after 400 time steps,
c with DT = 0.67d-03
c---------------------------------------------------------------------
elseif ( (problem_size .eq. 162) .and.
> (problem_size .eq. 162) .and.
> (problem_size .eq. 162) .and.
> (no_time_steps . eq. 400) ) then
class = 'C'
dtref = 0.67d-3
c---------------------------------------------------------------------
c Reference values of RMS-norms of residual.
c---------------------------------------------------------------------
xcrref(1) = 0.5881691581829d+03
xcrref(2) = 0.2454417603569d+03
xcrref(3) = 0.3293829191851d+03
xcrref(4) = 0.3081924971891d+03
xcrref(5) = 0.4597223799176d+03
c---------------------------------------------------------------------
c Reference values of RMS-norms of solution error.
c---------------------------------------------------------------------
xceref(1) = 0.2598120500183d+00
xceref(2) = 0.2590888922315d-01
xceref(3) = 0.5132886416320d-01
xceref(4) = 0.4806073419454d-01
xceref(5) = 0.5483377491301d+00
c---------------------------------------------------------------------
c reference data for 408X408X408 grids after 500 time steps,
c with DT = 0.3d-03
c---------------------------------------------------------------------
elseif ( (problem_size .eq. 408) .and.
> (problem_size .eq. 408) .and.
> (problem_size .eq. 408) .and.
> (no_time_steps . eq. 500) ) then
class = 'D'
dtref = 0.30d-3
c---------------------------------------------------------------------
c Reference values of RMS-norms of residual.
c---------------------------------------------------------------------
xcrref(1) = 0.1044696216887d+05
xcrref(2) = 0.3204427762578d+04
xcrref(3) = 0.4648680733032d+04
xcrref(4) = 0.4238923283697d+04
xcrref(5) = 0.7588412036136d+04
c---------------------------------------------------------------------
c Reference values of RMS-norms of solution error.
c---------------------------------------------------------------------
xceref(1) = 0.5089471423669d+01
xceref(2) = 0.5323514855894d+00
xceref(3) = 0.1187051008971d+01
xceref(4) = 0.1083734951938d+01
xceref(5) = 0.1164108338568d+02
c---------------------------------------------------------------------
c reference data for 1020X1020X1020 grids after 500 time steps,
c with DT = 0.1d-03
c---------------------------------------------------------------------
elseif ( (problem_size .eq. 1020) .and.
> (problem_size .eq. 1020) .and.
> (problem_size .eq. 1020) .and.
> (no_time_steps . eq. 500) ) then
class = 'E'
dtref = 0.10d-3
c---------------------------------------------------------------------
c Reference values of RMS-norms of residual.
c---------------------------------------------------------------------
xcrref(1) = 0.6255387422609d+05
xcrref(2) = 0.1495317020012d+05
xcrref(3) = 0.2347595750586d+05
xcrref(4) = 0.2091099783534d+05
xcrref(5) = 0.4770412841218d+05
c---------------------------------------------------------------------
c Reference values of RMS-norms of solution error.
c---------------------------------------------------------------------
xceref(1) = 0.6742735164909d+02
xceref(2) = 0.5390656036938d+01
xceref(3) = 0.1680647196477d+02
xceref(4) = 0.1536963126457d+02
xceref(5) = 0.1575330146156d+03
else
verified = .false.
endif
c---------------------------------------------------------------------
c verification test for residuals if gridsize is one of
c the defined grid sizes above (class .ne. 'U')
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c Compute the difference of solution values and the known reference values.
c---------------------------------------------------------------------
do m = 1, 5
xcrdif(m) = dabs((xcr(m)-xcrref(m))/xcrref(m))
xcedif(m) = dabs((xce(m)-xceref(m))/xceref(m))
enddo
c---------------------------------------------------------------------
c Output the comparison of computed results to known cases.
c---------------------------------------------------------------------
if (class .ne. 'U') then
write(*, 1990) class
1990 format(' Verification being performed for class ', a)
write (*,2000) epsilon
2000 format(' accuracy setting for epsilon = ', E20.13)
verified = (dabs(dt-dtref) .le. epsilon)
if (.not.verified) then
class = 'U'
write (*,1000) dtref
1000 format(' DT does not match the reference value of ',
> E15.8)
endif
else
write(*, 1995)
1995 format(' Unknown class')
endif
if (class .ne. 'U') then
write (*, 2001)
else
write (*, 2005)
endif
2001 format(' Comparison of RMS-norms of residual')
2005 format(' RMS-norms of residual')
do m = 1, 5
if (class .eq. 'U') then
write(*, 2015) m, xcr(m)
else if (xcrdif(m) .le. epsilon .and.
& (.not. isnan(xcrdif(m)))) then
write (*,2011) m,xcr(m),xcrref(m),xcrdif(m)
else
verified = .false.
write (*,2010) m,xcr(m),xcrref(m),xcrdif(m)
endif
enddo
if (class .ne. 'U') then
write (*,2002)
else
write (*,2006)
endif
2002 format(' Comparison of RMS-norms of solution error')
2006 format(' RMS-norms of solution error')
do m = 1, 5
if (class .eq. 'U') then
write(*, 2015) m, xce(m)
else if (xcedif(m) .le. epsilon .and.
& (.not. isnan(xcedif(m)))) then
write (*,2011) m,xce(m),xceref(m),xcedif(m)
else
verified = .false.
write (*,2010) m,xce(m),xceref(m),xcedif(m)
endif
enddo
2010 format(' FAILURE: ', i2, E20.13, E20.13, E20.13)
2011 format(' ', i2, E20.13, E20.13, E20.13)
2015 format(' ', i2, E20.13)
if (class .eq. 'U') then
write(*, 2022)
write(*, 2023)
2022 format(' No reference values provided')
2023 format(' No verification performed')
else if (verified) then
write(*, 2020)
2020 format(' Verification Successful')
else
write(*, 2021)
2021 format(' Verification failed')
endif
return
end

View File

@@ -0,0 +1,392 @@
c---------------------------------------------------------------------
c---------------------------------------------------------------------
subroutine x_solve
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c this function performs the solution of the approximate factorization
c step in the x-direction for all five matrix components
c simultaneously. The Thomas algorithm is employed to solve the
c systems for the x-lines. Boundary conditions are non-periodic
c---------------------------------------------------------------------
include 'header.h'
integer i, j, k, i1, i2, m, m1
double precision ru1, fac1, fac2, rhs__(5,0:2),t1,t2
double precision lhs__(5,0:2), lhsm__(5,0:2), lhsp__(5,0:2)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
if (timeron) call timer_start(t_xsolve)
!DVM$ region local(lhs)
!DVM$ parallel (k,j) on u(*,*,j,k)
!DVM$& , CUDA_BLOCK(32,4)
!DVM$& ,private(m,i,ru1,i1,i2,fac1,fac2,lhs__, lhsp__, lhsm__, rhs__,
!DVM$& t1,t2)
do k = 1, nz2
do j = 1, ny2
do i = 0, problem_size-1
if(i .eq. 0) then
lhs__(1,0) = 0.0d0
lhsp__(1,0) = 0.0d0
lhsm__(1,0) = 0.0d0
lhs__(2,0) = 0.0d0
lhsp__(2,0) = 0.0d0
lhsm__(2,0) = 0.0d0
lhs__(3,0) = 1.0d0
lhsp__(3,0) = 1.0d0
lhsm__(3,0) = 1.0d0
lhs__(4,0) = 0.0d0
lhsp__(4,0) = 0.0d0
lhsm__(4,0) = 0.0d0
lhs__(5,0) = 0.0d0
lhsp__(5,0) = 0.0d0
lhsm__(5,0) = 0.0d0
lhs__(1,1) = 0.0d0
ru1 = c3c4*1.0d0/u(1,1-1,j,k)
ru1 = dmax1(dx2+con43*ru1,
> dx5+c1c5*ru1,
> dxmax+ru1,
> dx1)
lhs__(2,1) = - dttx2 * us(1-1,j,k) - dttx1 * ru1
ru1 = c3c4*1.0d0/u(1,1,j,k)
ru1 = dmax1(dx2+con43*ru1,
> dx5+c1c5*ru1,
> dxmax+ru1,
> dx1)
lhs__(3,1) = 1.0d0 + c2dttx1 * ru1
ru1 = c3c4*1.0d0/u(1,1+1,j,k)
ru1 = dmax1(dx2+con43*ru1,
> dx5+c1c5*ru1,
> dxmax+ru1,
> dx1)
lhs__(4,1) = dttx2 * us(1+1,j,k) - dttx1 * ru1
lhs__(5,1) = 0.0d0
lhs__(3,1) = lhs__(3,1) + comz5
lhs__(4,1) = lhs__(4,1) - comz4
lhs__(5,1) = lhs__(5,1) + comz1
lhsp__(1,1) = lhs__(1,1)
lhsp__(2,1) = lhs__(2,1) - dttx2 * speed(1-1,j,k)
lhsp__(3,1) = lhs__(3,1)
lhsp__(4,1) = lhs__(4,1) + dttx2 * speed(1+1,j,k)
lhsp__(5,1) = lhs__(5,1)
lhsm__(1,1) = lhs__(1,1)
lhsm__(2,1) = lhs__(2,1) + dttx2 * speed(1-1,j,k)
lhsm__(3,1) = lhs__(3,1)
lhsm__(4,1) = lhs__(4,1) - dttx2 * speed(1+1,j,k)
lhsm__(5,1) = lhs__(5,1)
endif
if(i + 2 .lt. problem_size-1) then
m = i + 2
lhs__(1,2) = 0.0d0
ru1 = c3c4*1.0d0/u(1,m-1,j,k)
ru1 = dmax1(dx2+con43*ru1,
> dx5+c1c5*ru1,
> dxmax+ru1,
> dx1)
lhs__(2,2) = - dttx2 * us(m-1,j,k) - dttx1 * ru1
ru1 = c3c4*1.0d0/u(1,m,j,k)
ru1 = dmax1(dx2+con43*ru1,
> dx5+c1c5*ru1,
> dxmax+ru1,
> dx1)
lhs__(3,2) = 1.0d0 + c2dttx1 * ru1
ru1 = c3c4*1.0d0/u(1,m+1,j,k)
ru1 = dmax1(dx2+con43*ru1,
> dx5+c1c5*ru1,
> dxmax+ru1,
> dx1)
lhs__(4,2) = dttx2 * us(m+1,j,k) - dttx1 * ru1
lhs__(5,2) = 0.0d0
if(m .eq. 1) then
lhs__(3,2) = lhs__(3,2) + comz5
lhs__(4,2) = lhs__(4,2) - comz4
lhs__(5,2) = lhs__(5,2) + comz1
else if(m .eq. 2) then
lhs__(2,2) = lhs__(2,2) - comz4
lhs__(3,2) = lhs__(3,2) + comz6
lhs__(4,2) = lhs__(4,2) - comz4
lhs__(5,2) = lhs__(5,2) + comz1
else if(m .ge. 3 .and. m .le. nx2-2) then
lhs__(1,2) = lhs__(1,2) + comz1
lhs__(2,2) = lhs__(2,2) - comz4
lhs__(3,2) = lhs__(3,2) + comz6
lhs__(4,2) = lhs__(4,2) - comz4
lhs__(5,2) = lhs__(5,2) + comz1
else if(m .eq. nx2-1) then
lhs__(1,2) = lhs__(1,2) + comz1
lhs__(2,2) = lhs__(2,2) - comz4
lhs__(3,2) = lhs__(3,2) + comz6
lhs__(4,2) = lhs__(4,2) - comz4
else if(m .eq. nx2) then
lhs__(1,2) = lhs__(1,2) + comz1
lhs__(2,2) = lhs__(2,2) - comz4
lhs__(3,2) = lhs__(3,2) + comz5
endif
lhsp__(1,2) = lhs__(1,2)
lhsp__(2,2) = lhs__(2,2) - dttx2 * speed(m-1,j,k)
lhsp__(3,2) = lhs__(3,2)
lhsp__(4,2) = lhs__(4,2) + dttx2 * speed(m+1,j,k)
lhsp__(5,2) = lhs__(5,2)
lhsm__(1,2) = lhs__(1,2)
lhsm__(2,2) = lhs__(2,2) + dttx2 * speed(m-1,j,k)
lhsm__(3,2) = lhs__(3,2)
lhsm__(4,2) = lhs__(4,2) - dttx2 * speed(m+1,j,k)
lhsm__(5,2) = lhs__(5,2)
else if(i + 2 .eq. nx2+1) then
lhs__(1,2) = 0.0d0
lhsp__(1,2) = 0.0d0
lhsm__(1,2) = 0.0d0
lhs__(2,2) = 0.0d0
lhsp__(2,2) = 0.0d0
lhsm__(2,2) = 0.0d0
lhs__(3,2) = 1.0d0
lhsp__(3,2) = 1.0d0
lhsm__(3,2) = 1.0d0
lhs__(4,2) = 0.0d0
lhsp__(4,2) = 0.0d0
lhsm__(4,2) = 0.0d0
lhs__(5,2) = 0.0d0
lhsp__(5,2) = 0.0d0
lhsm__(5,2) = 0.0d0
endif
!********************************** end of init
i1 = i + 1
i2 = i + 2
fac1 = 1.d0/lhs__(3,0)
lhs__(4,0) = fac1*lhs__(4,0)
lhs__(5,0) = fac1*lhs__(5,0)
do m = 1, 3
rhs(m,i,j,k) = fac1*rhs(m,i,j,k)
end do
if(i .le. nx2-1) then
lhs__(3,1)=lhs__(3,1)-lhs__(2,1)*lhs__(4,0)
lhs__(4,1)=lhs__(4,1)-lhs__(2,1)*lhs__(5,0)
lhs__(2,2)=lhs__(2,2)-lhs__(1,2)*lhs__(4,0)
lhs__(3,2)=lhs__(3,2)-lhs__(1,2)*lhs__(5,0)
do m = 1, 3
rhs(m,i1,j,k)=rhs(m,i1,j,k)-lhs__(2,1)*rhs(m,i,j,k)
rhs(m,i2,j,k)=rhs(m,i2,j,k)-lhs__(1,2)*rhs(m,i,j,k)
end do
else
lhs__(3,1)=lhs__(3,1)-lhs__(2,1)*lhs__(4,0)
lhs__(4,1)=lhs__(4,1)-lhs__(2,1)*lhs__(5,0)
fac2 = 1.d0/lhs__(3,1)
do m = 1, 3
rhs(m,i1,j,k) = rhs(m,i1,j,k)-lhs__(2,1)*rhs(m,i,j,k)
rhs(m,i1,j,k) = fac2*rhs(m,i1,j,k)
end do
endif
m = 4
fac1 = 1.d0/lhsp__(3,0)
lhsp__(4,0) = fac1*lhsp__(4,0)
lhsp__(5,0) = fac1*lhsp__(5,0)
rhs(m,i,j,k) = fac1*rhs(m,i,j,k)
lhsp__(3,1)=lhsp__(3,1)-lhsp__(2,1)*lhsp__(4,0)
lhsp__(4,1)=lhsp__(4,1)-lhsp__(2,1)*lhsp__(5,0)
rhs(m,i1,j,k) = rhs(m,i1,j,k)-lhsp__(2,1)*rhs(m,i,j,k)
if(i .lt. nx2) then
lhsp__(2,2)=lhsp__(2,2)-lhsp__(1,2)*lhsp__(4,0)
lhsp__(3,2)=lhsp__(3,2)-lhsp__(1,2)*lhsp__(5,0)
rhs(m,i2,j,k)=rhs(m,i2,j,k)-lhsp__(1,2)*rhs(m,i,j,k)
endif
m = 5
fac1 = 1.d0/lhsm__(3,0)
lhsm__(4,0) = fac1*lhsm__(4,0)
lhsm__(5,0) = fac1*lhsm__(5,0)
rhs(m,i,j,k) = fac1*rhs(m,i,j,k)
lhsm__(3,1)=lhsm__(3,1)-lhsm__(2,1)*lhsm__(4,0)
lhsm__(4,1)=lhsm__(4,1)-lhsm__(2,1)*lhsm__(5,0)
rhs(m,i1,j,k)=rhs(m,i1,j,k) -lhsm__(2,1)*rhs(m,i,j,k)
if(i .lt. nx2) then
lhsm__(2,2)=lhsm__(2,2)-lhsm__(1,2)*lhsm__(4,0)
lhsm__(3,2)=lhsm__(3,2)-lhsm__(1,2)*lhsm__(5,0)
rhs(m,i2,j,k)=rhs(m,i2,j,k) -lhsm__(1,2)*rhs(m,i,j,k)
endif
if(i .eq. nx2) then
rhs(4,i1,j,k) = rhs(4,i1,j,k)/lhsp__(3,1)
rhs(5,i1,j,k) = rhs(5,i1,j,k)/lhsm__(3,1)
do m = 1, 3
rhs(m,i,j,k) = rhs(m,i,j,k)-lhs__(4,0)*rhs(m,i1,j,k)
end do
rhs(4,i,j,k) = rhs(4,i,j,k)-lhsp__(4,0)*rhs(4,i1,j,k)
rhs(5,i,j,k) = rhs(5,i,j,k)-lhsm__(4,0)*rhs(5,i1,j,k)
endif
lhs(0,4,i,j,k) = lhs__(4,0)
lhs(1,4,i,j,k) = lhsp__(4,0)
lhs(2,4,i,j,k) = lhsm__(4,0)
lhs(0,5,i,j,k) = lhs__(5,0)
lhs(1,5,i,j,k) = lhsp__(5,0)
lhs(2,5,i,j,k) = lhsm__(5,0)
lhs__(1,0) = lhs__(1,1)
lhsp__(1,0) = lhsp__(1,1)
lhsm__(1,0) = lhsm__(1,1)
lhs__(1,1) = lhs__(1,2)
lhsp__(1,1) = lhsp__(1,2)
lhsm__(1,1) = lhsm__(1,2)
lhs__(2,0) = lhs__(2,1)
lhsp__(2,0) = lhsp__(2,1)
lhsm__(2,0) = lhsm__(2,1)
lhs__(2,1) = lhs__(2,2)
lhsp__(2,1) = lhsp__(2,2)
lhsm__(2,1) = lhsm__(2,2)
lhs__(3,0) = lhs__(3,1)
lhsp__(3,0) = lhsp__(3,1)
lhsm__(3,0) = lhsm__(3,1)
lhs__(3,1) = lhs__(3,2)
lhsp__(3,1) = lhsp__(3,2)
lhsm__(3,1) = lhsm__(3,2)
lhs__(4,0) = lhs__(4,1)
lhsp__(4,0) = lhsp__(4,1)
lhsm__(4,0) = lhsm__(4,1)
lhs__(4,1) = lhs__(4,2)
lhsp__(4,1) = lhsp__(4,2)
lhsm__(4,1) = lhsm__(4,2)
lhs__(5,0) = lhs__(5,1)
lhsp__(5,0) = lhsp__(5,1)
lhsm__(5,0) = lhsm__(5,1)
lhs__(5,1) = lhs__(5,2)
lhsp__(5,1) = lhsp__(5,2)
lhsm__(5,1) = lhsm__(5,2)
enddo
i = problem_size-3
rhs__(1,2) = rhs(1,i+2,j,k)
rhs__(2,2) = rhs(2,i+2,j,k)
rhs__(3,2) = rhs(3,i+2,j,k)
rhs__(4,2) = rhs(4,i+2,j,k)
rhs__(5,2) = rhs(5,i+2,j,k)
rhs__(1,1) = rhs(1,i+1,j,k)
rhs__(2,1) = rhs(2,i+1,j,k)
rhs__(3,1) = rhs(3,i+1,j,k)
rhs__(4,1) = rhs(4,i+1,j,k)
rhs__(5,1) = rhs(5,i+1,j,k)
rhs__(1,0) = rhs(1,i,j,k)
rhs__(2,0) = rhs(2,i,j,k)
rhs__(3,0) = rhs(3,i,j,k)
rhs__(4,0) = rhs(4,i,j,k)
rhs__(5,0) = rhs(5,i,j,k)
rhs__(1,0) = rhs__(1,0) -
> lhs(0,4,i,j,k)*rhs__(1,1) -
> lhs(0,5,i,j,k)*rhs__(1,2)
rhs__(2,0) = rhs__(2,0) -
> lhs(0,4,i,j,k)*rhs__(2,1) -
> lhs(0,5,i,j,k)*rhs__(2,2)
rhs__(3,0) = rhs__(3,0) -
> lhs(0,4,i,j,k)*rhs__(3,1) -
> lhs(0,5,i,j,k)*rhs__(3,2)
rhs__(4,0) = rhs__(4,0) -
> lhs(1,4,i,j,k)*rhs__(4,1) -
> lhs(1,5,i,j,k)*rhs__(4,2)
rhs__(5,0) = rhs__(5,0) -
> lhs(2,4,i,j,k)*rhs__(5,1) -
> lhs(2,5,i,j,k)*rhs__(5,2)
rhs__(1,2) = rhs__(1,1)
rhs__(2,2) = rhs__(2,1)
rhs__(3,2) = rhs__(3,1)
rhs__(4,2) = rhs__(4,1)
rhs__(5,2) = rhs__(5,1)
rhs__(1,1) = rhs__(1,0)
rhs__(2,1) = rhs__(2,0)
rhs__(3,1) = rhs__(3,0)
rhs__(4,1) = rhs__(4,0)
rhs__(5,1) = rhs__(5,0)
do i = problem_size-4, 0, -1
rhs__(1,0) = rhs(1,i,j,k)
rhs__(2,0) = rhs(2,i,j,k)
rhs__(3,0) = rhs(3,i,j,k)
rhs__(4,0) = rhs(4,i,j,k)
rhs__(5,0) = rhs(5,i,j,k)
rhs__(1,0) = rhs__(1,0) -
> lhs(0,4,i,j,k)*rhs__(1,1) -
> lhs(0,5,i,j,k)*rhs__(1,2)
rhs__(2,0) = rhs__(2,0) -
> lhs(0,4,i,j,k)*rhs__(2,1) -
> lhs(0,5,i,j,k)*rhs__(2,2)
rhs__(3,0) = rhs__(3,0) -
> lhs(0,4,i,j,k)*rhs__(3,1) -
> lhs(0,5,i,j,k)*rhs__(3,2)
rhs__(4,0) = rhs__(4,0) -
> lhs(1,4,i,j,k)*rhs__(4,1) -
> lhs(1,5,i,j,k)*rhs__(4,2)
rhs__(5,0) = rhs__(5,0) -
> lhs(2,4,i,j,k)*rhs__(5,1) -
> lhs(2,5,i,j,k)*rhs__(5,2)
t1 = bt * rhs__(3,2)
t2 = 0.5d0 * ( rhs__(4,2)+rhs__(5,2))
rhs(1,i+2,j,k) = -rhs__(2,2)
rhs(2,i+2,j,k) = rhs__(1,2)
rhs(3,i+2,j,k) = bt * (rhs__(4,2)-rhs__(5,2))
rhs(4,i+2,j,k) = -t1 + t2
rhs(5,i+2,j,k) = t1 + t2
rhs__(1,2) = rhs__(1,1)
rhs__(2,2) = rhs__(2,1)
rhs__(3,2) = rhs__(3,1)
rhs__(4,2) = rhs__(4,1)
rhs__(5,2) = rhs__(5,1)
rhs__(1,1) = rhs__(1,0)
rhs__(2,1) = rhs__(2,0)
rhs__(3,1) = rhs__(3,0)
rhs__(4,1) = rhs__(4,0)
rhs__(5,1) = rhs__(5,0)
end do
t1 = bt * rhs__(3,2)
t2 = 0.5d0 * ( rhs__(4,2)+rhs__(5,2))
rhs(1,1,j,k) = -rhs__(2,2)
rhs(2,1,j,k) = rhs__(1,2)
rhs(3,1,j,k) = bt * (rhs__(4,2)-rhs__(5,2))
rhs(4,1,j,k) = -t1 + t2
rhs(5,1,j,k) = t1 + t2
enddo
enddo
!DVM$ end region
if (timeron) call timer_stop(t_xsolve)
return
end

View File

@@ -0,0 +1,321 @@
c---------------------------------------------------------------------
c---------------------------------------------------------------------
subroutine x_solve
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c this function performs the solution of the approximate factorization
c step in the x-direction for all five matrix components
c simultaneously. The Thomas algorithm is employed to solve the
c systems for the x-lines. Boundary conditions are non-periodic
c---------------------------------------------------------------------
include 'header.h'
integer i, j, k, i1, i2, m
double precision ru1, fac1, fac2, t1,t2,t3
double precision lhs__(5,0:2), lhsm__(5,0:2), lhsp__(5,0:2)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
if (timeron) call timer_start(t_xsolve)
!DVM$ region local(lhs)
!DVM$ parallel (k,j,i) on rhs(*,i,j,k)
!DVM$& ,private(m,ru1,i1,i2,fac1,fac2,lhs__, lhsp__, lhsm__)
!DVM$& ,ACROSS(OUT:rhs(0:0,0:2,0:0,0:0), lhs(0:0,0:0,0:2,0:0,0:0))
!DVM$& ,stage(stage_n)
do k = 1, nz2
do j = 1, ny2
do i = 0, problem_size-1
if(i .eq. 0) then
lhs__(1,0) = 0.0d0
lhsp__(1,0) = 0.0d0
lhsm__(1,0) = 0.0d0
lhs__(2,0) = 0.0d0
lhsp__(2,0) = 0.0d0
lhsm__(2,0) = 0.0d0
lhs__(3,0) = 1.0d0
lhsp__(3,0) = 1.0d0
lhsm__(3,0) = 1.0d0
lhs__(4,0) = 0.0d0
lhsp__(4,0) = 0.0d0
lhsm__(4,0) = 0.0d0
lhs__(5,0) = 0.0d0
lhsp__(5,0) = 0.0d0
lhsm__(5,0) = 0.0d0
lhs__(1,1) = 0.0d0
ru1 = c3c4*1.0d0/u(1,i,j,k)
ru1 = dmax1(dx2+con43*ru1,
> dx5+c1c5*ru1,
> dxmax+ru1,
> dx1)
lhs__(2,1) = - dttx2 * us(i,j,k) - dttx1 * ru1
ru1 = c3c4*1.0d0/u(1,i+1,j,k)
ru1 = dmax1(dx2+con43*ru1,
> dx5+c1c5*ru1,
> dxmax+ru1,
> dx1)
lhs__(3,1) = 1.0d0 + c2dttx1 * ru1
ru1 = c3c4*1.0d0/u(1,i+2,j,k)
ru1 = dmax1(dx2+con43*ru1,
> dx5+c1c5*ru1,
> dxmax+ru1,
> dx1)
lhs__(4,1) = dttx2 * us(i+2,j,k) - dttx1 * ru1
lhs__(5,1) = 0.0d0
lhs__(3,1) = lhs__(3,1) + comz5
lhs__(4,1) = lhs__(4,1) - comz4
lhs__(5,1) = lhs__(5,1) + comz1
lhsp__(1,1) = lhs__(1,1)
lhsp__(2,1) = lhs__(2,1) - dttx2 * speed(i,j,k)
lhsp__(3,1) = lhs__(3,1)
lhsp__(4,1) = lhs__(4,1) + dttx2 * speed(i+2,j,k)
lhsp__(5,1) = lhs__(5,1)
lhsm__(1,1) = lhs__(1,1)
lhsm__(2,1) = lhs__(2,1) + dttx2 * speed(i,j,k)
lhsm__(3,1) = lhs__(3,1)
lhsm__(4,1) = lhs__(4,1) - dttx2 * speed(i+2,j,k)
lhsm__(5,1) = lhs__(5,1)
else
do m = 1, 5
lhs__(m,0) = lhs(0,m,i,j,k)
lhsp__(m,0) = lhs(1,m,i,j,k)
lhsm__(m,0) = lhs(2,m,i,j,k)
lhs__(m,1) = lhs(0,m,i+1,j,k)
lhsp__(m,1) = lhs(1,m,i+1,j,k)
lhsm__(m,1) = lhs(2,m,i+1,j,k)
enddo
endif
if(i + 2 .lt. problem_size-1) then
m = i + 2
lhs__(1,2) = 0.0d0
ru1 = c3c4*1.0d0/u(1,m-1,j,k)
ru1 = dmax1(dx2+con43*ru1,
> dx5+c1c5*ru1,
> dxmax+ru1,
> dx1)
lhs__(2,2) = - dttx2 * us(m-1,j,k) - dttx1 * ru1
ru1 = c3c4*1.0d0/u(1,m,j,k)
ru1 = dmax1(dx2+con43*ru1,
> dx5+c1c5*ru1,
> dxmax+ru1,
> dx1)
lhs__(3,2) = 1.0d0 + c2dttx1 * ru1
ru1 = c3c4*1.0d0/u(1,m+1,j,k)
ru1 = dmax1(dx2+con43*ru1,
> dx5+c1c5*ru1,
> dxmax+ru1,
> dx1)
lhs__(4,2) = dttx2 * us(m+1,j,k) - dttx1 * ru1
lhs__(5,2) = 0.0d0
if(m .eq. 1) then
lhs__(3,2) = lhs__(3,2) + comz5
lhs__(4,2) = lhs__(4,2) - comz4
lhs__(5,2) = lhs__(5,2) + comz1
else if(m .eq. 2) then
lhs__(2,2) = lhs__(2,2) - comz4
lhs__(3,2) = lhs__(3,2) + comz6
lhs__(4,2) = lhs__(4,2) - comz4
lhs__(5,2) = lhs__(5,2) + comz1
else if(m .ge. 3 .and. m .le. nx2-2) then
lhs__(1,2) = lhs__(1,2) + comz1
lhs__(2,2) = lhs__(2,2) - comz4
lhs__(3,2) = lhs__(3,2) + comz6
lhs__(4,2) = lhs__(4,2) - comz4
lhs__(5,2) = lhs__(5,2) + comz1
else if(m .eq. nx2-1) then
lhs__(1,2) = lhs__(1,2) + comz1
lhs__(2,2) = lhs__(2,2) - comz4
lhs__(3,2) = lhs__(3,2) + comz6
lhs__(4,2) = lhs__(4,2) - comz4
else if(m .eq. nx2) then
lhs__(1,2) = lhs__(1,2) + comz1
lhs__(2,2) = lhs__(2,2) - comz4
lhs__(3,2) = lhs__(3,2) + comz5
endif
lhsp__(1,2) = lhs__(1,2)
lhsp__(2,2) = lhs__(2,2) - dttx2 * speed(m-1,j,k)
lhsp__(3,2) = lhs__(3,2)
lhsp__(4,2) = lhs__(4,2) + dttx2 * speed(m+1,j,k)
lhsp__(5,2) = lhs__(5,2)
lhsm__(1,2) = lhs__(1,2)
lhsm__(2,2) = lhs__(2,2) + dttx2 * speed(m-1,j,k)
lhsm__(3,2) = lhs__(3,2)
lhsm__(4,2) = lhs__(4,2) - dttx2 * speed(m+1,j,k)
lhsm__(5,2) = lhs__(5,2)
else if(i + 2 .eq. nx2+1) then
lhs__(1,2) = 0.0d0
lhsp__(1,2) = 0.0d0
lhsm__(1,2) = 0.0d0
lhs__(2,2) = 0.0d0
lhsp__(2,2) = 0.0d0
lhsm__(2,2) = 0.0d0
lhs__(3,2) = 1.0d0
lhsp__(3,2) = 1.0d0
lhsm__(3,2) = 1.0d0
lhs__(4,2) = 0.0d0
lhsp__(4,2) = 0.0d0
lhsm__(4,2) = 0.0d0
lhs__(5,2) = 0.0d0
lhsp__(5,2) = 0.0d0
lhsm__(5,2) = 0.0d0
endif
!********************************** end of init
i1 = i + 1
i2 = i + 2
fac1 = 1.d0/lhs__(3,0)
lhs__(4,0) = fac1*lhs__(4,0)
lhs__(5,0) = fac1*lhs__(5,0)
do m = 1, 3
rhs(m,i,j,k) = fac1*rhs(m,i,j,k)
end do
if(i .le. nx2-1) then
lhs__(3,1)=lhs__(3,1)-lhs__(2,1)*lhs__(4,0)
lhs__(4,1)=lhs__(4,1)-lhs__(2,1)*lhs__(5,0)
lhs__(2,2)=lhs__(2,2)-lhs__(1,2)*lhs__(4,0)
lhs__(3,2)=lhs__(3,2)-lhs__(1,2)*lhs__(5,0)
do m = 1, 3
rhs(m,i1,j,k)=rhs(m,i1,j,k)-lhs__(2,1)*rhs(m,i,j,k)
rhs(m,i2,j,k)=rhs(m,i2,j,k)-lhs__(1,2)*rhs(m,i,j,k)
end do
else
lhs__(3,1)=lhs__(3,1)-lhs__(2,1)*lhs__(4,0)
lhs__(4,1)=lhs__(4,1)-lhs__(2,1)*lhs__(5,0)
if (lhs__(3,1) .ne. 0) then
fac2 = 1.d0/lhs__(3,1)
else
fac2 = 0
endif
do m = 1, 3
rhs(m,i1,j,k) = rhs(m,i1,j,k)-lhs__(2,1)*rhs(m,i,j,k)
rhs(m,i1,j,k) = fac2*rhs(m,i1,j,k)
end do
endif
m = 4
fac1 = 1.d0/lhsp__(3,0)
lhsp__(4,0) = fac1*lhsp__(4,0)
lhsp__(5,0) = fac1*lhsp__(5,0)
rhs(m,i,j,k) = fac1*rhs(m,i,j,k)
lhsp__(3,1)=lhsp__(3,1)-lhsp__(2,1)*lhsp__(4,0)
lhsp__(4,1)=lhsp__(4,1)-lhsp__(2,1)*lhsp__(5,0)
rhs(m,i1,j,k) = rhs(m,i1,j,k)-lhsp__(2,1)*rhs(m,i,j,k)
if(i .lt. nx2) then
lhsp__(2,2)=lhsp__(2,2)-lhsp__(1,2)*lhsp__(4,0)
lhsp__(3,2)=lhsp__(3,2)-lhsp__(1,2)*lhsp__(5,0)
rhs(m,i2,j,k)=rhs(m,i2,j,k)-lhsp__(1,2)*rhs(m,i,j,k)
endif
m = 5
fac1 = 1.d0/lhsm__(3,0)
lhsm__(4,0) = fac1*lhsm__(4,0)
lhsm__(5,0) = fac1*lhsm__(5,0)
rhs(m,i,j,k) = fac1*rhs(m,i,j,k)
lhsm__(3,1)=lhsm__(3,1)-lhsm__(2,1)*lhsm__(4,0)
lhsm__(4,1)=lhsm__(4,1)-lhsm__(2,1)*lhsm__(5,0)
rhs(m,i1,j,k)=rhs(m,i1,j,k) -lhsm__(2,1)*rhs(m,i,j,k)
if(i .lt. nx2) then
lhsm__(2,2)=lhsm__(2,2)-lhsm__(1,2)*lhsm__(4,0)
lhsm__(3,2)=lhsm__(3,2)-lhsm__(1,2)*lhsm__(5,0)
rhs(m,i2,j,k)=rhs(m,i2,j,k) -lhsm__(1,2)*rhs(m,i,j,k)
endif
if(i .eq. nx2) then
rhs(4,i1,j,k) = rhs(4,i1,j,k)/lhsp__(3,1)
rhs(5,i1,j,k) = rhs(5,i1,j,k)/lhsm__(3,1)
do m = 1, 3
rhs(m,i,j,k) = rhs(m,i,j,k)-lhs__(4,0)*rhs(m,i1,j,k)
end do
rhs(4,i,j,k) = rhs(4,i,j,k)-lhsp__(4,0)*rhs(4,i1,j,k)
rhs(5,i,j,k) = rhs(5,i,j,k)-lhsm__(4,0)*rhs(5,i1,j,k)
endif
do m = 1, 5
lhs(0,m,i,j,k) = lhs__(m,0)
lhs(1,m,i,j,k) = lhsp__(m,0)
lhs(2,m,i,j,k) = lhsm__(m,0)
lhs(0,m,i+1,j,k) = lhs__(m,1)
lhs(1,m,i+1,j,k) = lhsp__(m,1)
lhs(2,m,i+1,j,k) = lhsm__(m,1)
if (i .lt. nx2) then
lhs(0,m,i+2,j,k) = lhs__(m,2)
lhs(1,m,i+2,j,k) = lhsp__(m,2)
lhs(2,m,i+2,j,k) = lhsm__(m,2)
endif
enddo
enddo
enddo
enddo
!DVM$ PARALLEL (k,j,i) on rhs(*,i,j,k),ACROSS(rhs(0:0,0:2,0:0,0:0))
!DVM$& ,stage(stage_n)
do k = 1, nz2
do j = 1, ny2
do i = problem_size-3, 0, -1
rhs(1,i,j,k) = rhs(1,i,j,k) -
& lhs(0,4,i,j,k)*rhs(1,i+1,j,k) -
& lhs(0,5,i,j,k)*rhs(1,i+2,j,k)
rhs(2,i,j,k) = rhs(2,i,j,k) -
& lhs(0,4,i,j,k)*rhs(2,i+1,j,k) -
& lhs(0,5,i,j,k)*rhs(2,i+2,j,k)
rhs(3,i,j,k) = rhs(3,i,j,k) -
& lhs(0,4,i,j,k)*rhs(3,i+1,j,k) -
& lhs(0,5,i,j,k)*rhs(3,i+2,j,k)
rhs(4,i,j,k) = rhs(4,i,j,k) -
& lhs(1,4,i,j,k)*rhs(4,i+1,j,k) -
& lhs(1,5,i,j,k)*rhs(4,i+2,j,k)
rhs(5,i,j,k) = rhs(5,i,j,k) -
& lhs(2,4,i,j,k)*rhs(5,i+1,j,k) -
& lhs(2,5,i,j,k)*rhs(5,i+2,j,k)
end do
enddo
enddo
!DVM$ PARALLEL (k,j,i) on rhs(*,i,j,k),PRIVATE(t1,t2,t3)
do k = 1, nz2
do j = 1, ny2
do i = 1, nx2
t1 = bt * rhs(3,i,j,k)
t2 = 0.5d0 * (rhs(4,i,j,k)+rhs(5,i,j,k))
t3 = rhs(1,i,j,k)
rhs(1,i,j,k) = -rhs(2,i,j,k)
rhs(2,i,j,k) = t3
rhs(3,i,j,k) = bt * (rhs(4,i,j,k)-rhs(5,i,j,k))
rhs(4,i,j,k) = -t1 + t2
rhs(5,i,j,k) = t1 + t2
end do
enddo
enddo
!DVM$ end region
if (timeron) call timer_stop(t_xsolve)
return
end

View File

@@ -0,0 +1,396 @@
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c---------------------------------------------------------------------
subroutine y_solve
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c this function performs the solution of the approximate factorization
c step in the y-direction for all five matrix components
c simultaneously. The Thomas algorithm is employed to solve the
c systems for the y-lines. Boundary conditions are non-periodic
c---------------------------------------------------------------------
include 'header.h'
integer i, j, k, j1, j2, m, m1
double precision ru1, fac1, fac2, rhs__(5,0:2),t1,t2
double precision lhs__(5,0:2), lhsm__(5,0:2), lhsp__(5,0:2)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
if (timeron) call timer_start(t_ysolve)
!DVM$ region local(lhs)
!DVM$ parallel (k,i) on u(*,i,*,k)
!DVM$& , CUDA_BLOCK(32,4)
!DVM$& ,private(m,j1,j2,fac1,fac2,ru1,lhs__,lhsp__,lhsm__,j,rhs__,
!DVM$& t1,t2)
do k = 1, nz2
do i = 1, nx2
lhs__(1,0) = 0.0d0
lhsp__(1,0) = 0.0d0
lhsm__(1,0) = 0.0d0
lhs__(2,0) = 0.0d0
lhsp__(2,0) = 0.0d0
lhsm__(2,0) = 0.0d0
lhs__(3,0) = 1.0d0
lhsp__(3,0) = 1.0d0
lhsm__(3,0) = 1.0d0
lhs__(4,0) = 0.0d0
lhsp__(4,0) = 0.0d0
lhsm__(4,0) = 0.0d0
lhs__(5,0) = 0.0d0
lhsp__(5,0) = 0.0d0
lhsm__(5,0) = 0.0d0
lhs__(1,1) = 0.0d0
ru1 = c3c4*1.0d0/u(1,i,1-1,k)
ru1 = dmax1( dy3 + con43 * ru1,
> dy5 + c1c5*ru1,
> dymax + ru1,
> dy1)
lhs__(2,1) = - dtty2 * vs(i,1-1,k) - dtty1 * ru1
ru1 = c3c4*1.0d0/u(1,i,1,k)
ru1 = dmax1( dy3 + con43 * ru1,
> dy5 + c1c5*ru1,
> dymax + ru1,
> dy1)
lhs__(3,1) = 1.0d0 + c2dtty1 * ru1
ru1 = c3c4*1.0d0/u(1,i,1+1,k)
ru1 = dmax1( dy3 + con43 * ru1,
> dy5 + c1c5*ru1,
> dymax + ru1,
> dy1)
lhs__(4,1) = dtty2 * vs(i,1+1,k) - dtty1 * ru1
lhs__(5,1) = 0.0d0
lhs__(3,1) = lhs__(3,1) + comz5
lhs__(4,1) = lhs__(4,1) - comz4
lhs__(5,1) = lhs__(5,1) + comz1
lhsp__(1,1) = lhs__(1,1)
lhsp__(2,1) = lhs__(2,1) - dtty2 * speed(i,1-1,k)
lhsp__(3,1) = lhs__(3,1)
lhsp__(4,1) = lhs__(4,1) + dtty2 * speed(i,1+1,k)
lhsp__(5,1) = lhs__(5,1)
lhsm__(1,1) = lhs__(1,1)
lhsm__(2,1) = lhs__(2,1) + dtty2 * speed(i,1-1,k)
lhsm__(3,1) = lhs__(3,1)
lhsm__(4,1) = lhs__(4,1) - dtty2 * speed(i,1+1,k)
lhsm__(5,1) = lhs__(5,1)
do j = 0, ny2+1
if(j + 2 .lt. ny2 + 1) then
m = j + 2
lhs__(1,2) = 0.0d0
ru1 = c3c4*1.0d0/u(1,i,m-1,k)
ru1 = dmax1( dy3 + con43 * ru1,
> dy5 + c1c5*ru1,
> dymax + ru1,
> dy1)
lhs__(2,2) = - dtty2 * vs(i,m-1,k) - dtty1 * ru1
ru1 = c3c4*1.0d0/u(1,i,m,k)
ru1 = dmax1( dy3 + con43 * ru1,
> dy5 + c1c5*ru1,
> dymax + ru1,
> dy1)
lhs__(3,2) = 1.0d0 + c2dtty1 * ru1
ru1 = c3c4*1.0d0/u(1,i,m+1,k)
ru1 = dmax1( dy3 + con43 * ru1,
> dy5 + c1c5*ru1,
> dymax + ru1,
> dy1)
lhs__(4,2) = dtty2 * vs(i,m+1,k) - dtty1 * ru1
lhs__(5,2) = 0.0d0
if(m .eq. 1) then
lhs__(3,2) = lhs__(3,2) + comz5
lhs__(4,2) = lhs__(4,2) - comz4
lhs__(5,2) = lhs__(5,2) + comz1
else if(m .eq. 2) then
lhs__(2,2) = lhs__(2,2) - comz4
lhs__(3,2) = lhs__(3,2) + comz6
lhs__(4,2) = lhs__(4,2) - comz4
lhs__(5,2) = lhs__(5,2) + comz1
else if(m .ge. 3 .and. m .le. ny2-2) then
lhs__(1,2) = lhs__(1,2) + comz1
lhs__(2,2) = lhs__(2,2) - comz4
lhs__(3,2) = lhs__(3,2) + comz6
lhs__(4,2) = lhs__(4,2) - comz4
lhs__(5,2) = lhs__(5,2) + comz1
else if(m .eq. ny2-1) then
lhs__(1,2) = lhs__(1,2) + comz1
lhs__(2,2) = lhs__(2,2) - comz4
lhs__(3,2) = lhs__(3,2) + comz6
lhs__(4,2) = lhs__(4,2) - comz4
else if(m .eq. ny2) then
lhs__(1,2) = lhs__(1,2) + comz1
lhs__(2,2) = lhs__(2,2) - comz4
lhs__(3,2) = lhs__(3,2) + comz5
endif
lhsp__(1,2) = lhs__(1,2)
lhsp__(2,2) = lhs__(2,2) - dtty2 * speed(i,m-1,k)
lhsp__(3,2) = lhs__(3,2)
lhsp__(4,2) = lhs__(4,2) + dtty2 * speed(i,m+1,k)
lhsp__(5,2) = lhs__(5,2)
lhsm__(1,2) = lhs__(1,2)
lhsm__(2,2) = lhs__(2,2) + dtty2 * speed(i,m-1,k)
lhsm__(3,2) = lhs__(3,2)
lhsm__(4,2) = lhs__(4,2) - dtty2 * speed(i,m+1,k)
lhsm__(5,2) = lhs__(5,2)
else if(j + 2 .eq. ny2+1) then
lhs__(1,2) = 0.0d0
lhsp__(1,2) = 0.0d0
lhsm__(1,2) = 0.0d0
lhs__(2,2) = 0.0d0
lhsp__(2,2) = 0.0d0
lhsm__(2,2) = 0.0d0
lhs__(3,2) = 1.0d0
lhsp__(3,2) = 1.0d0
lhsm__(3,2) = 1.0d0
lhs__(4,2) = 0.0d0
lhsp__(4,2) = 0.0d0
lhsm__(4,2) = 0.0d0
lhs__(5,2) = 0.0d0
lhsp__(5,2) = 0.0d0
lhsm__(5,2) = 0.0d0
endif
!********************************** end of init
j1 = j + 1
j2 = j + 2
fac1 = 1.d0/lhs__(3,0)
lhs__(4,0) = fac1*lhs__(4,0)
lhs__(5,0) = fac1*lhs__(5,0)
do m = 1, 3
rhs(m,i,j,k) = fac1*rhs(m,i,j,k)
end do
if(j .le. ny2-1) then
lhs__(3,1)=lhs__(3,1)-lhs__(2,1)*lhs__(4,0)
lhs__(4,1)=lhs__(4,1)-lhs__(2,1)*lhs__(5,0)
lhs__(2,2)=lhs__(2,2)-lhs__(1,2)*lhs__(4,0)
lhs__(3,2)=lhs__(3,2)-lhs__(1,2)*lhs__(5,0)
do m = 1, 3
rhs(m,i,j1,k)=rhs(m,i,j1,k)-lhs__(2,1)*rhs(m,i,j,k)
rhs(m,i,j2,k)=rhs(m,i,j2,k)-lhs__(1,2)*rhs(m,i,j,k)
end do
else
lhs__(3,1)=lhs__(3,1)-lhs__(2,1)*lhs__(4,0)
lhs__(4,1)=lhs__(4,1)-lhs__(2,1)*lhs__(5,0)
fac2 = 1.d0/lhs__(3,1)
do m = 1, 3
rhs(m,i,j1,k) = rhs(m,i,j1,k)-lhs__(2,1)*rhs(m,i,j,k)
rhs(m,i,j1,k) = fac2*rhs(m,i,j1,k)
end do
endif
m = 4
fac1 = 1.d0/lhsp__(3,0)
lhsp__(4,0) = fac1*lhsp__(4,0)
lhsp__(5,0) = fac1*lhsp__(5,0)
rhs(m,i,j,k) = fac1*rhs(m,i,j,k)
lhsp__(3,1)=lhsp__(3,1)-lhsp__(2,1)*lhsp__(4,0)
lhsp__(4,1)=lhsp__(4,1)-lhsp__(2,1)*lhsp__(5,0)
rhs(m,i,j1,k) = rhs(m,i,j1,k)-lhsp__(2,1)*rhs(m,i,j,k)
if(j .lt. ny2) then
lhsp__(2,2)=lhsp__(2,2)-lhsp__(1,2)*lhsp__(4,0)
lhsp__(3,2)=lhsp__(3,2)-lhsp__(1,2)*lhsp__(5,0)
rhs(m,i,j2,k)=rhs(m,i,j2,k)-lhsp__(1,2)*rhs(m,i,j,k)
endif
m = 5
fac1 = 1.d0/lhsm__(3,0)
lhsm__(4,0) = fac1*lhsm__(4,0)
lhsm__(5,0) = fac1*lhsm__(5,0)
rhs(m,i,j,k) = fac1*rhs(m,i,j,k)
lhsm__(3,1)=lhsm__(3,1)-lhsm__(2,1)*lhsm__(4,0)
lhsm__(4,1)=lhsm__(4,1)-lhsm__(2,1)*lhsm__(5,0)
rhs(m,i,j1,k)=rhs(m,i,j1,k) -lhsm__(2,1)*rhs(m,i,j,k)
if(j .lt. ny2) then
lhsm__(2,2)=lhsm__(2,2)-lhsm__(1,2)*lhsm__(4,0)
lhsm__(3,2)=lhsm__(3,2)-lhsm__(1,2)*lhsm__(5,0)
rhs(m,i,j2,k)=rhs(m,i,j2,k) -lhsm__(1,2)*rhs(m,i,j,k)
endif
if(j .eq. ny2) then
rhs(4,i,j1,k) = rhs(4,i,j1,k)/lhsp__(3,1)
rhs(5,i,j1,k) = rhs(5,i,j1,k)/lhsm__(3,1)
do m = 1, 3
rhs(m,i,j,k) = rhs(m,i,j,k)-lhs__(4,0)*rhs(m,i,j1,k)
end do
rhs(4,i,j,k) = rhs(4,i,j,k)-lhsp__(4,0)*rhs(4,i,j1,k)
rhs(5,i,j,k) = rhs(5,i,j,k)-lhsm__(4,0)*rhs(5,i,j1,k)
endif
lhs(0,4,i,j,k) = lhs__(4,0)
lhs(1,4,i,j,k) = lhsp__(4,0)
lhs(2,4,i,j,k) = lhsm__(4,0)
lhs(0,5,i,j,k) = lhs__(5,0)
lhs(1,5,i,j,k) = lhsp__(5,0)
lhs(2,5,i,j,k) = lhsm__(5,0)
lhs__(1,0) = lhs__(1,1)
lhsp__(1,0) = lhsp__(1,1)
lhsm__(1,0) = lhsm__(1,1)
lhs__(1,1) = lhs__(1,2)
lhsp__(1,1) = lhsp__(1,2)
lhsm__(1,1) = lhsm__(1,2)
lhs__(2,0) = lhs__(2,1)
lhsp__(2,0) = lhsp__(2,1)
lhsm__(2,0) = lhsm__(2,1)
lhs__(2,1) = lhs__(2,2)
lhsp__(2,1) = lhsp__(2,2)
lhsm__(2,1) = lhsm__(2,2)
lhs__(3,0) = lhs__(3,1)
lhsp__(3,0) = lhsp__(3,1)
lhsm__(3,0) = lhsm__(3,1)
lhs__(3,1) = lhs__(3,2)
lhsp__(3,1) = lhsp__(3,2)
lhsm__(3,1) = lhsm__(3,2)
lhs__(4,0) = lhs__(4,1)
lhsp__(4,0) = lhsp__(4,1)
lhsm__(4,0) = lhsm__(4,1)
lhs__(4,1) = lhs__(4,2)
lhsp__(4,1) = lhsp__(4,2)
lhsm__(4,1) = lhsm__(4,2)
lhs__(5,0) = lhs__(5,1)
lhsp__(5,0) = lhsp__(5,1)
lhsm__(5,0) = lhsm__(5,1)
lhs__(5,1) = lhs__(5,2)
lhsp__(5,1) = lhsp__(5,2)
lhsm__(5,1) = lhsm__(5,2)
enddo
j = problem_size-3
rhs__(1,2) = rhs(1,i,j+2,k)
rhs__(2,2) = rhs(2,i,j+2,k)
rhs__(3,2) = rhs(3,i,j+2,k)
rhs__(4,2) = rhs(4,i,j+2,k)
rhs__(5,2) = rhs(5,i,j+2,k)
rhs__(1,1) = rhs(1,i,j+1,k)
rhs__(2,1) = rhs(2,i,j+1,k)
rhs__(3,1) = rhs(3,i,j+1,k)
rhs__(4,1) = rhs(4,i,j+1,k)
rhs__(5,1) = rhs(5,i,j+1,k)
rhs__(1,0) = rhs(1,i,j,k)
rhs__(2,0) = rhs(2,i,j,k)
rhs__(3,0) = rhs(3,i,j,k)
rhs__(4,0) = rhs(4,i,j,k)
rhs__(5,0) = rhs(5,i,j,k)
rhs__(1,0) = rhs__(1,0) -
> lhs(0,4,i,j,k)*rhs__(1,1) -
> lhs(0,5,i,j,k)*rhs__(1,2)
rhs__(2,0) = rhs__(2,0) -
> lhs(0,4,i,j,k)*rhs__(2,1) -
> lhs(0,5,i,j,k)*rhs__(2,2)
rhs__(3,0) = rhs__(3,0) -
> lhs(0,4,i,j,k)*rhs__(3,1) -
> lhs(0,5,i,j,k)*rhs__(3,2)
rhs__(4,0) = rhs__(4,0) -
> lhs(1,4,i,j,k)*rhs__(4,1) -
> lhs(1,5,i,j,k)*rhs__(4,2)
rhs__(5,0) = rhs__(5,0) -
> lhs(2,4,i,j,k)*rhs__(5,1) -
> lhs(2,5,i,j,k)*rhs__(5,2)
rhs__(1,2) = rhs__(1,1)
rhs__(2,2) = rhs__(2,1)
rhs__(3,2) = rhs__(3,1)
rhs__(4,2) = rhs__(4,1)
rhs__(5,2) = rhs__(5,1)
rhs__(1,1) = rhs__(1,0)
rhs__(2,1) = rhs__(2,0)
rhs__(3,1) = rhs__(3,0)
rhs__(4,1) = rhs__(4,0)
rhs__(5,1) = rhs__(5,0)
do j = problem_size-4, 0, -1
rhs__(1,0) = rhs(1,i,j,k)
rhs__(2,0) = rhs(2,i,j,k)
rhs__(3,0) = rhs(3,i,j,k)
rhs__(4,0) = rhs(4,i,j,k)
rhs__(5,0) = rhs(5,i,j,k)
rhs__(1,0) = rhs__(1,0) -
> lhs(0,4,i,j,k)*rhs__(1,1) -
> lhs(0,5,i,j,k)*rhs__(1,2)
rhs__(2,0) = rhs__(2,0) -
> lhs(0,4,i,j,k)*rhs__(2,1) -
> lhs(0,5,i,j,k)*rhs__(2,2)
rhs__(3,0) = rhs__(3,0) -
> lhs(0,4,i,j,k)*rhs__(3,1) -
> lhs(0,5,i,j,k)*rhs__(3,2)
rhs__(4,0) = rhs__(4,0) -
> lhs(1,4,i,j,k)*rhs__(4,1) -
> lhs(1,5,i,j,k)*rhs__(4,2)
rhs__(5,0) = rhs__(5,0) -
> lhs(2,4,i,j,k)*rhs__(5,1) -
> lhs(2,5,i,j,k)*rhs__(5,2)
t1 = bt * rhs__(1,2)
t2 = 0.5d0 * (rhs__(4,2) + rhs__(5,2))
rhs(1,i,j+2,k) = bt * (rhs__(4,2) - rhs__(5,2))
rhs(2,i,j+2,k) = -rhs__(3,2)
rhs(3,i,j+2,k) = rhs__(2,2)
rhs(4,i,j+2,k) = -t1 + t2
rhs(5,i,j+2,k) = t1 + t2
rhs__(1,2) = rhs__(1,1)
rhs__(2,2) = rhs__(2,1)
rhs__(3,2) = rhs__(3,1)
rhs__(4,2) = rhs__(4,1)
rhs__(5,2) = rhs__(5,1)
rhs__(1,1) = rhs__(1,0)
rhs__(2,1) = rhs__(2,0)
rhs__(3,1) = rhs__(3,0)
rhs__(4,1) = rhs__(4,0)
rhs__(5,1) = rhs__(5,0)
enddo
t1 = bt * rhs__(1,2)
t2 = 0.5d0 * (rhs__(4,2) + rhs__(5,2))
rhs(1,i,j+2,k) = bt * (rhs__(4,2) - rhs__(5,2))
rhs(2,i,j+2,k) = -rhs__(3,2)
rhs(3,i,j+2,k) = rhs__(2,2)
rhs(4,i,j+2,k) = -t1 + t2
rhs(5,i,j+2,k) = t1 + t2
enddo
enddo
!DVM$ end region
if (timeron) call timer_stop(t_ysolve)
return
end

View File

@@ -0,0 +1,330 @@
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c---------------------------------------------------------------------
subroutine y_solve
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c this function performs the solution of the approximate factorization
c step in the y-direction for all five matrix components
c simultaneously. The Thomas algorithm is employed to solve the
c systems for the y-lines. Boundary conditions are non-periodic
c---------------------------------------------------------------------
include 'header.h'
integer i, j, k, j1, j2, m, m1
double precision ru1, fac1, fac2, t1,t2,t3
double precision lhs__(5,0:2), lhsm__(5,0:2), lhsp__(5,0:2)
c---------------------------------------------------------------------
c---------------------------------------------------------------------
if (timeron) call timer_start(t_ysolve)
!DVM$ region local(lhs)
!DVM$ parallel (k,j,i) on rhs(*,i,j,k)
!DVM$& ,private(m,j1,j2,fac1,fac2,ru1,lhs__,lhsp__,lhsm__)
!DVM$& ,ACROSS(OUT:rhs(0:0,0:0,0:2,0:0), lhs(0:0,0:0,0:0,0:2,0:0))
!DVM$& ,stage(stage_n)
do k = 1, nz2
do j = 0, problem_size-1
do i = 1, nx2
if (j .eq. 0) then
lhs__(1,0) = 0.0d0
lhsp__(1,0) = 0.0d0
lhsm__(1,0) = 0.0d0
lhs__(2,0) = 0.0d0
lhsp__(2,0) = 0.0d0
lhsm__(2,0) = 0.0d0
lhs__(3,0) = 1.0d0
lhsp__(3,0) = 1.0d0
lhsm__(3,0) = 1.0d0
lhs__(4,0) = 0.0d0
lhsp__(4,0) = 0.0d0
lhsm__(4,0) = 0.0d0
lhs__(5,0) = 0.0d0
lhsp__(5,0) = 0.0d0
lhsm__(5,0) = 0.0d0
lhs__(1,1) = 0.0d0
ru1 = c3c4*1.0d0/u(1,i,j,k)
ru1 = dmax1( dy3 + con43 * ru1,
> dy5 + c1c5*ru1,
> dymax + ru1,
> dy1)
lhs__(2,1) = - dtty2 *
> u(3,i,0,k) * (1.0d0/u(1,i,j,k))- dtty1 * ru1
ru1 = c3c4*1.0d0/u(1,i,j+1,k)
ru1 = dmax1( dy3 + con43 * ru1,
> dy5 + c1c5*ru1,
> dymax + ru1,
> dy1)
lhs__(3,1) = 1.0d0 + c2dtty1 * ru1
ru1 = c3c4*1.0d0/u(1,i,j+2,k)
ru1 = dmax1( dy3 + con43 * ru1,
> dy5 + c1c5*ru1,
> dymax + ru1,
> dy1)
lhs__(4,1) = dtty2 *
> u(3,i,2,k) * (1.0d0/u(1,i,j+2,k)) - dtty1 * ru1
lhs__(5,1) = 0.0d0
lhs__(3,1) = lhs__(3,1) + comz5
lhs__(4,1) = lhs__(4,1) - comz4
lhs__(5,1) = lhs__(5,1) + comz1
lhsp__(1,1) = lhs__(1,1)
lhsp__(2,1) = lhs__(2,1) - dtty2 * speed(i,j,k)
lhsp__(3,1) = lhs__(3,1)
lhsp__(4,1) = lhs__(4,1) + dtty2 * speed(i,j+2,k)
lhsp__(5,1) = lhs__(5,1)
lhsm__(1,1) = lhs__(1,1)
lhsm__(2,1) = lhs__(2,1) + dtty2 * speed(i,j,k)
lhsm__(3,1) = lhs__(3,1)
lhsm__(4,1) = lhs__(4,1) - dtty2 * speed(i,j+2,k)
lhsm__(5,1) = lhs__(5,1)
else
do m = 1, 5
lhs__(m,0) = lhs(0,m,i,j,k)
lhsp__(m,0) = lhs(1,m,i,j,k)
lhsm__(m,0) = lhs(2,m,i,j,k)
lhs__(m,1) = lhs(0,m,i,j+1,k)
lhsp__(m,1) = lhs(1,m,i,j+1,k)
lhsm__(m,1) = lhs(2,m,i,j+1,k)
enddo
endif
if(j + 2 .lt. ny2 + 1) then
m = j + 2
lhs__(1,2) = 0.0d0
ru1 = c3c4*1.0d0/u(1,i,m-1,k)
ru1 = dmax1( dy3 + con43 * ru1,
> dy5 + c1c5*ru1,
> dymax + ru1,
> dy1)
lhs__(2,2) = - dtty2 *
> u(3,i,m-1,k) * (1.0d0/u(1,i,m-1,k)) - dtty1 * ru1
ru1 = c3c4*1.0d0/u(1,i,m,k)
ru1 = dmax1( dy3 + con43 * ru1,
> dy5 + c1c5*ru1,
> dymax + ru1,
> dy1)
lhs__(3,2) = 1.0d0 + c2dtty1 * ru1
ru1 = c3c4*1.0d0/u(1,i,m+1,k)
ru1 = dmax1( dy3 + con43 * ru1,
> dy5 + c1c5*ru1,
> dymax + ru1,
> dy1)
lhs__(4,2) = dtty2 *
> u(3,i,m+1,k) * (1.0d0/u(1,i,m+1,k)) - dtty1 * ru1
lhs__(5,2) = 0.0d0
if(m .eq. 1) then
lhs__(3,2) = lhs__(3,2) + comz5
lhs__(4,2) = lhs__(4,2) - comz4
lhs__(5,2) = lhs__(5,2) + comz1
else if(m .eq. 2) then
lhs__(2,2) = lhs__(2,2) - comz4
lhs__(3,2) = lhs__(3,2) + comz6
lhs__(4,2) = lhs__(4,2) - comz4
lhs__(5,2) = lhs__(5,2) + comz1
else if(m .ge. 3 .and. m .le. ny2-2) then
lhs__(1,2) = lhs__(1,2) + comz1
lhs__(2,2) = lhs__(2,2) - comz4
lhs__(3,2) = lhs__(3,2) + comz6
lhs__(4,2) = lhs__(4,2) - comz4
lhs__(5,2) = lhs__(5,2) + comz1
else if(m .eq. ny2-1) then
lhs__(1,2) = lhs__(1,2) + comz1
lhs__(2,2) = lhs__(2,2) - comz4
lhs__(3,2) = lhs__(3,2) + comz6
lhs__(4,2) = lhs__(4,2) - comz4
else if(m .eq. ny2) then
lhs__(1,2) = lhs__(1,2) + comz1
lhs__(2,2) = lhs__(2,2) - comz4
lhs__(3,2) = lhs__(3,2) + comz5
endif
lhsp__(1,2) = lhs__(1,2)
lhsp__(2,2) = lhs__(2,2) - dtty2 * speed(i,m-1,k)
lhsp__(3,2) = lhs__(3,2)
lhsp__(4,2) = lhs__(4,2) + dtty2 * speed(i,m+1,k)
lhsp__(5,2) = lhs__(5,2)
lhsm__(1,2) = lhs__(1,2)
lhsm__(2,2) = lhs__(2,2) + dtty2 * speed(i,m-1,k)
lhsm__(3,2) = lhs__(3,2)
lhsm__(4,2) = lhs__(4,2) - dtty2 * speed(i,m+1,k)
lhsm__(5,2) = lhs__(5,2)
else if(j + 2 .eq. ny2+1) then
lhs__(1,2) = 0.0d0
lhsp__(1,2) = 0.0d0
lhsm__(1,2) = 0.0d0
lhs__(2,2) = 0.0d0
lhsp__(2,2) = 0.0d0
lhsm__(2,2) = 0.0d0
lhs__(3,2) = 1.0d0
lhsp__(3,2) = 1.0d0
lhsm__(3,2) = 1.0d0
lhs__(4,2) = 0.0d0
lhsp__(4,2) = 0.0d0
lhsm__(4,2) = 0.0d0
lhs__(5,2) = 0.0d0
lhsp__(5,2) = 0.0d0
lhsm__(5,2) = 0.0d0
endif
!********************************** end of init
j1 = j + 1
j2 = j + 2
fac1 = 1.d0/lhs__(3,0)
lhs__(4,0) = fac1*lhs__(4,0)
lhs__(5,0) = fac1*lhs__(5,0)
do m = 1, 3
rhs(m,i,j,k) = fac1*rhs(m,i,j,k)
end do
if(j .le. ny2-1) then
lhs__(3,1)=lhs__(3,1)-lhs__(2,1)*lhs__(4,0)
lhs__(4,1)=lhs__(4,1)-lhs__(2,1)*lhs__(5,0)
lhs__(2,2)=lhs__(2,2)-lhs__(1,2)*lhs__(4,0)
lhs__(3,2)=lhs__(3,2)-lhs__(1,2)*lhs__(5,0)
do m = 1, 3
rhs(m,i,j1,k)=rhs(m,i,j1,k)-lhs__(2,1)*rhs(m,i,j,k)
rhs(m,i,j2,k)=rhs(m,i,j2,k)-lhs__(1,2)*rhs(m,i,j,k)
end do
else
lhs__(3,1)=lhs__(3,1)-lhs__(2,1)*lhs__(4,0)
lhs__(4,1)=lhs__(4,1)-lhs__(2,1)*lhs__(5,0)
if (lhs__(3,1) .ne. 0) then
fac2 = 1.d0/lhs__(3,1)
else
fac2 = 0
endif
do m = 1, 3
rhs(m,i,j1,k) = rhs(m,i,j1,k)-lhs__(2,1)*rhs(m,i,j,k)
rhs(m,i,j1,k) = fac2*rhs(m,i,j1,k)
end do
endif
m = 4
fac1 = 1.d0/lhsp__(3,0)
lhsp__(4,0) = fac1*lhsp__(4,0)
lhsp__(5,0) = fac1*lhsp__(5,0)
rhs(m,i,j,k) = fac1*rhs(m,i,j,k)
lhsp__(3,1)=lhsp__(3,1)-lhsp__(2,1)*lhsp__(4,0)
lhsp__(4,1)=lhsp__(4,1)-lhsp__(2,1)*lhsp__(5,0)
rhs(m,i,j1,k) = rhs(m,i,j1,k)-lhsp__(2,1)*rhs(m,i,j,k)
if(j .lt. ny2) then
lhsp__(2,2)=lhsp__(2,2)-lhsp__(1,2)*lhsp__(4,0)
lhsp__(3,2)=lhsp__(3,2)-lhsp__(1,2)*lhsp__(5,0)
rhs(m,i,j2,k)=rhs(m,i,j2,k)-lhsp__(1,2)*rhs(m,i,j,k)
endif
m = 5
fac1 = 1.d0/lhsm__(3,0)
lhsm__(4,0) = fac1*lhsm__(4,0)
lhsm__(5,0) = fac1*lhsm__(5,0)
rhs(m,i,j,k) = fac1*rhs(m,i,j,k)
lhsm__(3,1)=lhsm__(3,1)-lhsm__(2,1)*lhsm__(4,0)
lhsm__(4,1)=lhsm__(4,1)-lhsm__(2,1)*lhsm__(5,0)
rhs(m,i,j1,k)=rhs(m,i,j1,k) -lhsm__(2,1)*rhs(m,i,j,k)
if(j .lt. ny2) then
lhsm__(2,2)=lhsm__(2,2)-lhsm__(1,2)*lhsm__(4,0)
lhsm__(3,2)=lhsm__(3,2)-lhsm__(1,2)*lhsm__(5,0)
rhs(m,i,j2,k)=rhs(m,i,j2,k) -lhsm__(1,2)*rhs(m,i,j,k)
endif
if(j .eq. ny2) then
rhs(4,i,j1,k) = rhs(4,i,j1,k)/lhsp__(3,1)
rhs(5,i,j1,k) = rhs(5,i,j1,k)/lhsm__(3,1)
do m = 1, 3
rhs(m,i,j,k) = rhs(m,i,j,k)-lhs__(4,0)*rhs(m,i,j1,k)
end do
rhs(4,i,j,k) = rhs(4,i,j,k)-lhsp__(4,0)*rhs(4,i,j1,k)
rhs(5,i,j,k) = rhs(5,i,j,k)-lhsm__(4,0)*rhs(5,i,j1,k)
endif
do m = 1,5
lhs(0,m,i,j,k) = lhs__(m,0)
lhs(1,m,i,j,k) = lhsp__(m,0)
lhs(2,m,i,j,k) = lhsm__(m,0)
lhs(0,m,i,j+1,k) = lhs__(m,1)
lhs(1,m,i,j+1,k) = lhsp__(m,1)
lhs(2,m,i,j+1,k) = lhsm__(m,1)
if (j .lt. ny2) then
lhs(0,m,i,j+2,k) = lhs__(m,2)
lhs(1,m,i,j+2,k) = lhsp__(m,2)
lhs(2,m,i,j+2,k) = lhsm__(m,2)
endif
enddo
enddo
enddo
enddo
!DVM$ parallel (k,j,i) on rhs(*,i,j,k),ACROSS(rhs(0:0,0:0,0:2,0:0))
!DVM$& ,stage(stage_n)
do k = 1, nz2
do j = problem_size-3, 0, -1
do i = 1, nx2
rhs(1,i,j,k) = rhs(1,i,j,k) -
& lhs(0,4,i,j,k)*rhs(1,i,j+1,k) -
& lhs(0,5,i,j,k)*rhs(1,i,j+2,k)
rhs(2,i,j,k) = rhs(2,i,j,k) -
& lhs(0,4,i,j,k)*rhs(2,i,j+1,k) -
& lhs(0,5,i,j,k)*rhs(2,i,j+2,k)
rhs(3,i,j,k) = rhs(3,i,j,k) -
& lhs(0,4,i,j,k)*rhs(3,i,j+1,k) -
& lhs(0,5,i,j,k)*rhs(3,i,j+2,k)
rhs(4,i,j,k) = rhs(4,i,j,k) -
& lhs(1,4,i,j,k)*rhs(4,i,j+1,k) -
& lhs(1,5,i,j,k)*rhs(4,i,j+2,k)
rhs(5,i,j,k) = rhs(5,i,j,k) -
& lhs(2,4,i,j,k)*rhs(5,i,j+1,k) -
& lhs(2,5,i,j,k)*rhs(5,i,j+2,k)
enddo
enddo
enddo
!DVM$ parallel (k,j,i) on rhs(*,i,j,k),PRIVATE(t1,t2,t3)
do k = 1, nz2
do j = 1, ny2
do i = 1, nx2
t1 = bt * rhs(1,i,j,k)
t2 = 0.5d0 * (rhs(4,i,j,k) + rhs(5,i,j,k))
t3 = rhs(2,i,j,k)
rhs(1,i,j,k) = bt * (rhs(4,i,j,k) - rhs(5,i,j,k))
rhs(2,i,j,k) = -rhs(3,i,j,k)
rhs(3,i,j,k) = t3
rhs(4,i,j,k) = -t1 + t2
rhs(5,i,j,k) = t1 + t2
enddo
enddo
enddo
!DVM$ end region
if (timeron) call timer_stop(t_ysolve)
return
end

View File

@@ -0,0 +1,433 @@
c---------------------------------------------------------------------
c---------------------------------------------------------------------
subroutine z_solve
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c this function performs the solution of the approximate factorization
c step in the z-direction for all five matrix components
c simultaneously. The Thomas algorithm is employed to solve the
c systems for the z-lines. Boundary conditions are non-periodic
c---------------------------------------------------------------------
include 'header.h'
integer i, j, k, k1, k2, m, m1
double precision ru1, fac1, fac2, rhs__(5,0:2)
double precision lhs__(5,0:2), lhsm__(5,0:2), lhsp__(5,0:2)
double precision t1,t2,t3,ac,xvel,yvel,zvel,btuz,ac2u,uzik1
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c Prepare for z-solve, array redistribution
c---------------------------------------------------------------------
if (timeron) call timer_start(t_zsolve)
!DVM$ region local(lhs)
!DVM$ parallel (j,i) on u(*,i,j,*)
!DVM$& , CUDA_BLOCK(32,4)
!DVM$& ,private(m,k1,k2,ru1,fac1,fac2,k,lhs__,lhsp__,lhsm__,rhs__,
!DVM$& t1,t2,t3,ac,xvel,yvel,zvel,btuz,ac2u,uzik1)
do j = 1, ny2
do i = 1, nx2
lhs__(1,0) = 0.0d0
lhsp__(1,0) = 0.0d0
lhsm__(1,0) = 0.0d0
lhs__(2,0) = 0.0d0
lhsp__(2,0) = 0.0d0
lhsm__(2,0) = 0.0d0
lhs__(3,0) = 1.0d0
lhsp__(3,0) = 1.0d0
lhsm__(3,0) = 1.0d0
lhs__(4,0) = 0.0d0
lhsp__(4,0) = 0.0d0
lhsm__(4,0) = 0.0d0
lhs__(5,0) = 0.0d0
lhsp__(5,0) = 0.0d0
lhsm__(5,0) = 0.0d0
lhs__(1,1) = 0.0d0
ru1 = c3c4*1.0d0/u(1,i,j,0)
ru1 = dmax1(dz4 + con43 * ru1,
> dz5 + c1c5 * ru1,
> dzmax + ru1,
> dz1)
lhs__(2,1) = - dttz2 * ws(i,j,0) - dttz1 * ru1
ru1 = c3c4*1.0d0/u(1,i,j,1)
ru1 = dmax1(dz4 + con43 * ru1,
> dz5 + c1c5 * ru1,
> dzmax + ru1,
> dz1)
lhs__(3,1) = 1.0d0 + c2dttz1 * ru1
ru1 = c3c4*1.0d0/u(1,i,j,2)
ru1 = dmax1(dz4 + con43 * ru1,
> dz5 + c1c5 * ru1,
> dzmax + ru1,
> dz1)
lhs__(4,1) = dttz2 * ws(i,j,2) - dttz1 * ru1
lhs__(5,1) = 0.0d0
lhs__(3,1) = lhs__(3,1) + comz5
lhs__(4,1) = lhs__(4,1) - comz4
lhs__(5,1) = lhs__(5,1) + comz1
lhsp__(1,1) = lhs__(1,1)
lhsp__(2,1) = lhs__(2,1) - dttz2 * speed(i,j,1-1)
lhsp__(3,1) = lhs__(3,1)
lhsp__(4,1) = lhs__(4,1) + dttz2 * speed(i,j,1+1)
lhsp__(5,1) = lhs__(5,1)
lhsm__(1,1) = lhs__(1,1)
lhsm__(2,1) = lhs__(2,1) + dttz2 * speed(i,j,1-1)
lhsm__(3,1) = lhs__(3,1)
lhsm__(4,1) = lhs__(4,1) - dttz2 * speed(i,j,1+1)
lhsm__(5,1) = lhs__(5,1)
do k = 0, nz2+1
if(k + 2 .lt. nz2 + 1) then
m = k + 2
lhs__(1,2) = 0.0d0
ru1 = c3c4*1.0d0/u(1,i,j,m-1)
ru1 = dmax1(dz4 + con43 * ru1,
> dz5 + c1c5 * ru1,
> dzmax + ru1,
> dz1)
lhs__(2,2) = - dttz2 * ws(i,j,m-1) - dttz1 * ru1
ru1 = c3c4*1.0d0/u(1,i,j,m)
ru1 = dmax1(dz4 + con43 * ru1,
> dz5 + c1c5 * ru1,
> dzmax + ru1,
> dz1)
lhs__(3,2) = 1.0d0 + c2dttz1 * ru1
ru1 = c3c4*1.0d0/u(1,i,j,m+1)
ru1 = dmax1(dz4 + con43 * ru1,
> dz5 + c1c5 * ru1,
> dzmax + ru1,
> dz1)
lhs__(4,2) = dttz2 * ws(i,j,m+1) - dttz1 * ru1
lhs__(5,2) = 0.0d0
if(m .eq. 1) then
lhs__(3,2) = lhs__(3,2) + comz5
lhs__(4,2) = lhs__(4,2) - comz4
lhs__(5,2) = lhs__(5,2) + comz1
else if(m .eq. 2) then
lhs__(2,2) = lhs__(2,2) - comz4
lhs__(3,2) = lhs__(3,2) + comz6
lhs__(4,2) = lhs__(4,2) - comz4
lhs__(5,2) = lhs__(5,2) + comz1
else if(m .ge. 3 .and. m .le. nz2-2) then
lhs__(1,2) = lhs__(1,2) + comz1
lhs__(2,2) = lhs__(2,2) - comz4
lhs__(3,2) = lhs__(3,2) + comz6
lhs__(4,2) = lhs__(4,2) - comz4
lhs__(5,2) = lhs__(5,2) + comz1
else if(m .eq. nz2-1) then
lhs__(1,2) = lhs__(1,2) + comz1
lhs__(2,2) = lhs__(2,2) - comz4
lhs__(3,2) = lhs__(3,2) + comz6
lhs__(4,2) = lhs__(4,2) - comz4
else if(m .eq. nz2) then
lhs__(1,2) = lhs__(1,2) + comz1
lhs__(2,2) = lhs__(2,2) - comz4
lhs__(3,2) = lhs__(3,2) + comz5
endif
lhsp__(1,2) = lhs__(1,2)
lhsp__(2,2) = lhs__(2,2) - dttz2 * speed(i,j,m-1)
lhsp__(3,2) = lhs__(3,2)
lhsp__(4,2) = lhs__(4,2) + dttz2 * speed(i,j,m+1)
lhsp__(5,2) = lhs__(5,2)
lhsm__(1,2) = lhs__(1,2)
lhsm__(2,2) = lhs__(2,2) + dttz2 * speed(i,j,m-1)
lhsm__(3,2) = lhs__(3,2)
lhsm__(4,2) = lhs__(4,2) - dttz2 * speed(i,j,m+1)
lhsm__(5,2) = lhs__(5,2)
else if(k + 2 .eq. nz2+1) then
lhs__(1,2) = 0.0d0
lhsp__(1,2) = 0.0d0
lhsm__(1,2) = 0.0d0
lhs__(2,2) = 0.0d0
lhsp__(2,2) = 0.0d0
lhsm__(2,2) = 0.0d0
lhs__(3,2) = 1.0d0
lhsp__(3,2) = 1.0d0
lhsm__(3,2) = 1.0d0
lhs__(4,2) = 0.0d0
lhsp__(4,2) = 0.0d0
lhsm__(4,2) = 0.0d0
lhs__(5,2) = 0.0d0
lhsp__(5,2) = 0.0d0
lhsm__(5,2) = 0.0d0
endif
!********************************** end of init
k1 = k + 1
k2 = k + 2
fac1 = 1.d0/lhs__(3,0)
lhs__(4,0) = fac1*lhs__(4,0)
lhs__(5,0) = fac1*lhs__(5,0)
do m = 1, 3
rhs(m,i,j,k) = fac1*rhs(m,i,j,k)
end do
if(k .le. nz2-1) then
lhs__(3,1)=lhs__(3,1)-lhs__(2,1)*lhs__(4,0)
lhs__(4,1)=lhs__(4,1)-lhs__(2,1)*lhs__(5,0)
lhs__(2,2)=lhs__(2,2)-lhs__(1,2)*lhs__(4,0)
lhs__(3,2)=lhs__(3,2)-lhs__(1,2)*lhs__(5,0)
do m = 1, 3
rhs(m,i,j,k1)=rhs(m,i,j,k1)-lhs__(2,1)*rhs(m,i,j,k)
rhs(m,i,j,k2)=rhs(m,i,j,k2)-lhs__(1,2)*rhs(m,i,j,k)
end do
else
lhs__(3,1)=lhs__(3,1)-lhs__(2,1)*lhs__(4,0)
lhs__(4,1)=lhs__(4,1)-lhs__(2,1)*lhs__(5,0)
fac2 = 1.d0/lhs__(3,1)
do m = 1, 3
rhs(m,i,j,k1) = rhs(m,i,j,k1)-lhs__(2,1)*rhs(m,i,j,k)
rhs(m,i,j,k1) = fac2*rhs(m,i,j,k1)
end do
endif
m = 4
fac1 = 1.d0/lhsp__(3,0)
lhsp__(4,0) = fac1*lhsp__(4,0)
lhsp__(5,0) = fac1*lhsp__(5,0)
rhs(m,i,j,k) = fac1*rhs(m,i,j,k)
lhsp__(3,1)=lhsp__(3,1)-lhsp__(2,1)*lhsp__(4,0)
lhsp__(4,1)=lhsp__(4,1)-lhsp__(2,1)*lhsp__(5,0)
rhs(m,i,j,k1) = rhs(m,i,j,k1)-lhsp__(2,1)*rhs(m,i,j,k)
if(k .lt. nz2) then
lhsp__(2,2)=lhsp__(2,2)-lhsp__(1,2)*lhsp__(4,0)
lhsp__(3,2)=lhsp__(3,2)-lhsp__(1,2)*lhsp__(5,0)
rhs(m,i,j,k2)=rhs(m,i,j,k2)-lhsp__(1,2)*rhs(m,i,j,k)
endif
m = 5
fac1 = 1.d0/lhsm__(3,0)
lhsm__(4,0) = fac1*lhsm__(4,0)
lhsm__(5,0) = fac1*lhsm__(5,0)
rhs(m,i,j,k) = fac1*rhs(m,i,j,k)
lhsm__(3,1)=lhsm__(3,1)-lhsm__(2,1)*lhsm__(4,0)
lhsm__(4,1)=lhsm__(4,1)-lhsm__(2,1)*lhsm__(5,0)
rhs(m,i,j,k1)=rhs(m,i,j,k1) -lhsm__(2,1)*rhs(m,i,j,k)
if(k .lt. nz2) then
lhsm__(2,2)=lhsm__(2,2)-lhsm__(1,2)*lhsm__(4,0)
lhsm__(3,2)=lhsm__(3,2)-lhsm__(1,2)*lhsm__(5,0)
rhs(m,i,j,k2)=rhs(m,i,j,k2) -lhsm__(1,2)*rhs(m,i,j,k)
endif
if(k .eq. nz2) then
rhs(4,i,j,k1) = rhs(4,i,j,k1)/lhsp__(3,1)
rhs(5,i,j,k1) = rhs(5,i,j,k1)/lhsm__(3,1)
do m = 1, 3
rhs(m,i,j,k) = rhs(m,i,j,k)-lhs__(4,0)*rhs(m,i,j,k1)
end do
rhs(4,i,j,k) = rhs(4,i,j,k)-lhsp__(4,0)*rhs(4,i,j,k1)
rhs(5,i,j,k) = rhs(5,i,j,k)-lhsm__(4,0)*rhs(5,i,j,k1)
endif
lhs(0,4,i,j,k) = lhs__(4,0)
lhs(1,4,i,j,k) = lhsp__(4,0)
lhs(2,4,i,j,k) = lhsm__(4,0)
lhs(0,5,i,j,k) = lhs__(5,0)
lhs(1,5,i,j,k) = lhsp__(5,0)
lhs(2,5,i,j,k) = lhsm__(5,0)
lhs__(1,0) = lhs__(1,1)
lhsp__(1,0) = lhsp__(1,1)
lhsm__(1,0) = lhsm__(1,1)
lhs__(1,1) = lhs__(1,2)
lhsp__(1,1) = lhsp__(1,2)
lhsm__(1,1) = lhsm__(1,2)
lhs__(2,0) = lhs__(2,1)
lhsp__(2,0) = lhsp__(2,1)
lhsm__(2,0) = lhsm__(2,1)
lhs__(2,1) = lhs__(2,2)
lhsp__(2,1) = lhsp__(2,2)
lhsm__(2,1) = lhsm__(2,2)
lhs__(3,0) = lhs__(3,1)
lhsp__(3,0) = lhsp__(3,1)
lhsm__(3,0) = lhsm__(3,1)
lhs__(3,1) = lhs__(3,2)
lhsp__(3,1) = lhsp__(3,2)
lhsm__(3,1) = lhsm__(3,2)
lhs__(4,0) = lhs__(4,1)
lhsp__(4,0) = lhsp__(4,1)
lhsm__(4,0) = lhsm__(4,1)
lhs__(4,1) = lhs__(4,2)
lhsp__(4,1) = lhsp__(4,2)
lhsm__(4,1) = lhsm__(4,2)
lhs__(5,0) = lhs__(5,1)
lhsp__(5,0) = lhsp__(5,1)
lhsm__(5,0) = lhsm__(5,1)
lhs__(5,1) = lhs__(5,2)
lhsp__(5,1) = lhsp__(5,2)
lhsm__(5,1) = lhsm__(5,2)
enddo
k = problem_size-3
rhs__(1,2) = rhs(1,i,j,k+2)
rhs__(2,2) = rhs(2,i,j,k+2)
rhs__(3,2) = rhs(3,i,j,k+2)
rhs__(4,2) = rhs(4,i,j,k+2)
rhs__(5,2) = rhs(5,i,j,k+2)
rhs__(1,1) = rhs(1,i,j,k+1)
rhs__(2,1) = rhs(2,i,j,k+1)
rhs__(3,1) = rhs(3,i,j,k+1)
rhs__(4,1) = rhs(4,i,j,k+1)
rhs__(5,1) = rhs(5,i,j,k+1)
rhs__(1,0) = rhs(1,i,j,k)
rhs__(2,0) = rhs(2,i,j,k)
rhs__(3,0) = rhs(3,i,j,k)
rhs__(4,0) = rhs(4,i,j,k)
rhs__(5,0) = rhs(5,i,j,k)
rhs__(1,0) = rhs__(1,0) -
> lhs(0,4,i,j,k)*rhs__(1,1) -
> lhs(0,5,i,j,k)*rhs__(1,2)
rhs__(2,0) = rhs__(2,0) -
> lhs(0,4,i,j,k)*rhs__(2,1) -
> lhs(0,5,i,j,k)*rhs__(2,2)
rhs__(3,0) = rhs__(3,0) -
> lhs(0,4,i,j,k)*rhs__(3,1) -
> lhs(0,5,i,j,k)*rhs__(3,2)
rhs__(4,0) = rhs__(4,0) -
> lhs(1,4,i,j,k)*rhs__(4,1) -
> lhs(1,5,i,j,k)*rhs__(4,2)
rhs__(5,0) = rhs__(5,0) -
> lhs(2,4,i,j,k)*rhs__(5,1) -
> lhs(2,5,i,j,k)*rhs__(5,2)
rhs__(1,2) = rhs__(1,1)
rhs__(2,2) = rhs__(2,1)
rhs__(3,2) = rhs__(3,1)
rhs__(4,2) = rhs__(4,1)
rhs__(5,2) = rhs__(5,1)
rhs__(1,1) = rhs__(1,0)
rhs__(2,1) = rhs__(2,0)
rhs__(3,1) = rhs__(3,0)
rhs__(4,1) = rhs__(4,0)
rhs__(5,1) = rhs__(5,0)
do k = problem_size-4, 0, -1
rhs__(1,0) = rhs(1,i,j,k)
rhs__(2,0) = rhs(2,i,j,k)
rhs__(3,0) = rhs(3,i,j,k)
rhs__(4,0) = rhs(4,i,j,k)
rhs__(5,0) = rhs(5,i,j,k)
rhs__(1,0) = rhs__(1,0) -
> lhs(0,4,i,j,k)*rhs__(1,1) -
> lhs(0,5,i,j,k)*rhs__(1,2)
rhs__(2,0) = rhs__(2,0) -
> lhs(0,4,i,j,k)*rhs__(2,1) -
> lhs(0,5,i,j,k)*rhs__(2,2)
rhs__(3,0) = rhs__(3,0) -
> lhs(0,4,i,j,k)*rhs__(3,1) -
> lhs(0,5,i,j,k)*rhs__(3,2)
rhs__(4,0) = rhs__(4,0) -
> lhs(1,4,i,j,k)*rhs__(4,1) -
> lhs(1,5,i,j,k)*rhs__(4,2)
rhs__(5,0) = rhs__(5,0) -
> lhs(2,4,i,j,k)*rhs__(5,1) -
> lhs(2,5,i,j,k)*rhs__(5,2)
xvel = us(i,j,k+2)
yvel = vs(i,j,k+2)
zvel = ws(i,j,k+2)
ac = speed(i,j,k+2)
ac2u = ac*ac
uzik1 = u(1,i,j,k+2)
btuz = bt * uzik1
t1 = btuz/ac * (rhs__(4,2) + rhs__(5,2))
t2 = rhs__(3,2) + t1
t3 = btuz * (rhs__(4,2) - rhs__(5,2))
rhs__(3,2) = uzik1*rhs__(1,2) + yvel*t2
rhs__(4,2) = zvel*t2 + t3
rhs__(5,2) = uzik1*(-xvel*rhs__(2,2) +
> yvel*rhs__(1,2)) + qs(i,j,k+2)*t2 +
> c2iv*ac2u*t1 + zvel*t3
rhs__(1,2) = t2
rhs__(2,2) = -uzik1*rhs__(2,2) + xvel*t2
u(1,i,j,k+2) = u(1,i,j,k+2) + rhs__(1,2)
u(2,i,j,k+2) = u(2,i,j,k+2) + rhs__(2,2)
u(3,i,j,k+2) = u(3,i,j,k+2) + rhs__(3,2)
u(4,i,j,k+2) = u(4,i,j,k+2) + rhs__(4,2)
u(5,i,j,k+2) = u(5,i,j,k+2) + rhs__(5,2)
rhs__(1,2) = rhs__(1,1)
rhs__(2,2) = rhs__(2,1)
rhs__(3,2) = rhs__(3,1)
rhs__(4,2) = rhs__(4,1)
rhs__(5,2) = rhs__(5,1)
rhs__(1,1) = rhs__(1,0)
rhs__(2,1) = rhs__(2,0)
rhs__(3,1) = rhs__(3,0)
rhs__(4,1) = rhs__(4,0)
rhs__(5,1) = rhs__(5,0)
end do
xvel = us(i,j,k+2)
yvel = vs(i,j,k+2)
zvel = ws(i,j,k+2)
ac = speed(i,j,k+2)
ac2u = ac*ac
uzik1 = u(1,i,j,k+2)
btuz = bt * uzik1
t1 = btuz/ac * (rhs__(4,2) + rhs__(5,2))
t2 = rhs__(3,2) + t1
t3 = btuz * (rhs__(4,2) - rhs__(5,2))
rhs__(3,2) = uzik1*rhs__(1,2) + yvel*t2
rhs__(4,2) = zvel*t2 + t3
rhs__(5,2) = uzik1*(-xvel*rhs__(2,2) +
> yvel*rhs__(1,2)) + qs(i,j,k+2)*t2 +
> c2iv*ac2u*t1 + zvel*t3
rhs__(1,2) = t2
rhs__(2,2) = -uzik1*rhs__(2,2) + xvel*t2
u(1,i,j,k+2) = u(1,i,j,k+2) + rhs__(1,2)
u(2,i,j,k+2) = u(2,i,j,k+2) + rhs__(2,2)
u(3,i,j,k+2) = u(3,i,j,k+2) + rhs__(3,2)
u(4,i,j,k+2) = u(4,i,j,k+2) + rhs__(4,2)
u(5,i,j,k+2) = u(5,i,j,k+2) + rhs__(5,2)
enddo
enddo
!DVM$ end region
if (timeron) call timer_stop(t_zsolve)
return
end

View File

@@ -0,0 +1,338 @@
c---------------------------------------------------------------------
c---------------------------------------------------------------------
subroutine z_solve
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c this function performs the solution of the approximate factorization
c step in the z-direction for all five matrix components
c simultaneously. The Thomas algorithm is employed to solve the
c systems for the z-lines. Boundary conditions are non-periodic
c---------------------------------------------------------------------
include 'header.h'
integer i, j, k, k1, k2, m
double precision ru1, fac1, fac2
double precision lhs__(5,0:2), lhsm__(5,0:2), lhsp__(5,0:2)
double precision t1,t2,t3,ac,xvel,yvel,zvel,btuz,ac2u,uzik1
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c---------------------------------------------------------------------
c Prepare for z-solve, array redistribution
c---------------------------------------------------------------------
if (timeron) call timer_start(t_zsolve)
!DVM$ region local(lhs)
!DVM$ parallel (k,j,i) on rhs(*,i,j,k)
!DVM$& ,private(m,k1,k2,ru1,fac1,fac2,k,lhs__,lhsp__,lhsm__)
!DVM$& ,ACROSS(OUT:rhs(0:0,0:0,0:0,0:2), lhs(0:0,0:0,0:0,0:0,0:2))
!DVM$& ,stage(stage_n)
do k = 0, problem_size-1
do j = 1, ny2
do i = 1, nx2
if (k .eq. 0) then
lhs__(1,0) = 0.0d0
lhsp__(1,0) = 0.0d0
lhsm__(1,0) = 0.0d0
lhs__(2,0) = 0.0d0
lhsp__(2,0) = 0.0d0
lhsm__(2,0) = 0.0d0
lhs__(3,0) = 1.0d0
lhsp__(3,0) = 1.0d0
lhsm__(3,0) = 1.0d0
lhs__(4,0) = 0.0d0
lhsp__(4,0) = 0.0d0
lhsm__(4,0) = 0.0d0
lhs__(5,0) = 0.0d0
lhsp__(5,0) = 0.0d0
lhsm__(5,0) = 0.0d0
lhs__(1,1) = 0.0d0
ru1 = c3c4*1.0d0/u(1,i,j,k)
ru1 = dmax1(dz4 + con43 * ru1,
> dz5 + c1c5 * ru1,
> dzmax + ru1,
> dz1)
lhs__(2,1) = - dttz2 * ws(i,j,k) - dttz1 * ru1
ru1 = c3c4*1.0d0/u(1,i,j,k+1)
ru1 = dmax1(dz4 + con43 * ru1,
> dz5 + c1c5 * ru1,
> dzmax + ru1,
> dz1)
lhs__(3,1) = 1.0d0 + c2dttz1 * ru1
ru1 = c3c4*1.0d0/u(1,i,j,k+2)
ru1 = dmax1(dz4 + con43 * ru1,
> dz5 + c1c5 * ru1,
> dzmax + ru1,
> dz1)
lhs__(4,1) = dttz2 * ws(i,j,k+2) - dttz1 * ru1
lhs__(5,1) = 0.0d0
lhs__(3,1) = lhs__(3,1) + comz5
lhs__(4,1) = lhs__(4,1) - comz4
lhs__(5,1) = lhs__(5,1) + comz1
lhsp__(1,1) = lhs__(1,1)
lhsp__(2,1) = lhs__(2,1) - dttz2 * speed(i,j,k)
lhsp__(3,1) = lhs__(3,1)
lhsp__(4,1) = lhs__(4,1) + dttz2 * speed(i,j,k+2)
lhsp__(5,1) = lhs__(5,1)
lhsm__(1,1) = lhs__(1,1)
lhsm__(2,1) = lhs__(2,1) + dttz2 * speed(i,j,k)
lhsm__(3,1) = lhs__(3,1)
lhsm__(4,1) = lhs__(4,1) - dttz2 * speed(i,j,k+2)
lhsm__(5,1) = lhs__(5,1)
else
do m = 1, 5
lhs__(m,0) = lhs(0,m,i,j,k)
lhsp__(m,0) = lhs(1,m,i,j,k)
lhsm__(m,0) = lhs(2,m,i,j,k)
lhs__(m,1) = lhs(0,m,i,j,k+1)
lhsp__(m,1) = lhs(1,m,i,j,k+1)
lhsm__(m,1) = lhs(2,m,i,j,k+1)
enddo
endif
if(k + 2 .lt. nz2 + 1) then
m = k + 2
lhs__(1,2) = 0.0d0
ru1 = c3c4*1.0d0/u(1,i,j,m-1)
ru1 = dmax1(dz4 + con43 * ru1,
> dz5 + c1c5 * ru1,
> dzmax + ru1,
> dz1)
lhs__(2,2) = - dttz2 * ws(i,j,m-1) - dttz1 * ru1
ru1 = c3c4*1.0d0/u(1,i,j,m)
ru1 = dmax1(dz4 + con43 * ru1,
> dz5 + c1c5 * ru1,
> dzmax + ru1,
> dz1)
lhs__(3,2) = 1.0d0 + c2dttz1 * ru1
ru1 = c3c4*1.0d0/u(1,i,j,m+1)
ru1 = dmax1(dz4 + con43 * ru1,
> dz5 + c1c5 * ru1,
> dzmax + ru1,
> dz1)
lhs__(4,2) = dttz2 * ws(i,j,m+1) - dttz1 * ru1
lhs__(5,2) = 0.0d0
if(m .eq. 1) then
lhs__(3,2) = lhs__(3,2) + comz5
lhs__(4,2) = lhs__(4,2) - comz4
lhs__(5,2) = lhs__(5,2) + comz1
else if(m .eq. 2) then
lhs__(2,2) = lhs__(2,2) - comz4
lhs__(3,2) = lhs__(3,2) + comz6
lhs__(4,2) = lhs__(4,2) - comz4
lhs__(5,2) = lhs__(5,2) + comz1
else if(m .ge. 3 .and. m .le. nz2-2) then
lhs__(1,2) = lhs__(1,2) + comz1
lhs__(2,2) = lhs__(2,2) - comz4
lhs__(3,2) = lhs__(3,2) + comz6
lhs__(4,2) = lhs__(4,2) - comz4
lhs__(5,2) = lhs__(5,2) + comz1
else if(m .eq. nz2-1) then
lhs__(1,2) = lhs__(1,2) + comz1
lhs__(2,2) = lhs__(2,2) - comz4
lhs__(3,2) = lhs__(3,2) + comz6
lhs__(4,2) = lhs__(4,2) - comz4
else if(m .eq. nz2) then
lhs__(1,2) = lhs__(1,2) + comz1
lhs__(2,2) = lhs__(2,2) - comz4
lhs__(3,2) = lhs__(3,2) + comz5
endif
lhsp__(1,2) = lhs__(1,2)
lhsp__(2,2) = lhs__(2,2) - dttz2 * speed(i,j,m-1)
lhsp__(3,2) = lhs__(3,2)
lhsp__(4,2) = lhs__(4,2) + dttz2 * speed(i,j,m+1)
lhsp__(5,2) = lhs__(5,2)
lhsm__(1,2) = lhs__(1,2)
lhsm__(2,2) = lhs__(2,2) + dttz2 * speed(i,j,m-1)
lhsm__(3,2) = lhs__(3,2)
lhsm__(4,2) = lhs__(4,2) - dttz2 * speed(i,j,m+1)
lhsm__(5,2) = lhs__(5,2)
else if(k + 2 .eq. nz2+1) then
lhs__(1,2) = 0.0d0
lhsp__(1,2) = 0.0d0
lhsm__(1,2) = 0.0d0
lhs__(2,2) = 0.0d0
lhsp__(2,2) = 0.0d0
lhsm__(2,2) = 0.0d0
lhs__(3,2) = 1.0d0
lhsp__(3,2) = 1.0d0
lhsm__(3,2) = 1.0d0
lhs__(4,2) = 0.0d0
lhsp__(4,2) = 0.0d0
lhsm__(4,2) = 0.0d0
lhs__(5,2) = 0.0d0
lhsp__(5,2) = 0.0d0
lhsm__(5,2) = 0.0d0
endif
!********************************** end of init
k1 = k + 1
k2 = k + 2
fac1 = 1.d0/lhs__(3,0)
lhs__(4,0) = fac1*lhs__(4,0)
lhs__(5,0) = fac1*lhs__(5,0)
do m = 1, 3
rhs(m,i,j,k) = fac1*rhs(m,i,j,k)
end do
if(k .le. nz2-1) then
lhs__(3,1)=lhs__(3,1)-lhs__(2,1)*lhs__(4,0)
lhs__(4,1)=lhs__(4,1)-lhs__(2,1)*lhs__(5,0)
lhs__(2,2)=lhs__(2,2)-lhs__(1,2)*lhs__(4,0)
lhs__(3,2)=lhs__(3,2)-lhs__(1,2)*lhs__(5,0)
do m = 1, 3
rhs(m,i,j,k1)=rhs(m,i,j,k1)-lhs__(2,1)*rhs(m,i,j,k)
rhs(m,i,j,k2)=rhs(m,i,j,k2)-lhs__(1,2)*rhs(m,i,j,k)
end do
else
lhs__(3,1)=lhs__(3,1)-lhs__(2,1)*lhs__(4,0)
lhs__(4,1)=lhs__(4,1)-lhs__(2,1)*lhs__(5,0)
if (lhs__(3,1) .ne. 0) then
fac2 = 1.d0/lhs__(3,1)
else
fac2 = 0
endif
do m = 1, 3
rhs(m,i,j,k1) = rhs(m,i,j,k1)-lhs__(2,1)*rhs(m,i,j,k)
rhs(m,i,j,k1) = fac2*rhs(m,i,j,k1)
end do
endif
m = 4
fac1 = 1.d0/lhsp__(3,0)
lhsp__(4,0) = fac1*lhsp__(4,0)
lhsp__(5,0) = fac1*lhsp__(5,0)
rhs(m,i,j,k) = fac1*rhs(m,i,j,k)
lhsp__(3,1)=lhsp__(3,1)-lhsp__(2,1)*lhsp__(4,0)
lhsp__(4,1)=lhsp__(4,1)-lhsp__(2,1)*lhsp__(5,0)
rhs(m,i,j,k1) = rhs(m,i,j,k1)-lhsp__(2,1)*rhs(m,i,j,k)
if(k .lt. nz2) then
lhsp__(2,2)=lhsp__(2,2)-lhsp__(1,2)*lhsp__(4,0)
lhsp__(3,2)=lhsp__(3,2)-lhsp__(1,2)*lhsp__(5,0)
rhs(m,i,j,k2)=rhs(m,i,j,k2)-lhsp__(1,2)*rhs(m,i,j,k)
endif
m = 5
fac1 = 1.d0/lhsm__(3,0)
lhsm__(4,0) = fac1*lhsm__(4,0)
lhsm__(5,0) = fac1*lhsm__(5,0)
rhs(m,i,j,k) = fac1*rhs(m,i,j,k)
lhsm__(3,1)=lhsm__(3,1)-lhsm__(2,1)*lhsm__(4,0)
lhsm__(4,1)=lhsm__(4,1)-lhsm__(2,1)*lhsm__(5,0)
rhs(m,i,j,k1)=rhs(m,i,j,k1) -lhsm__(2,1)*rhs(m,i,j,k)
if(k .lt. nz2) then
lhsm__(2,2)=lhsm__(2,2)-lhsm__(1,2)*lhsm__(4,0)
lhsm__(3,2)=lhsm__(3,2)-lhsm__(1,2)*lhsm__(5,0)
rhs(m,i,j,k2)=rhs(m,i,j,k2) -lhsm__(1,2)*rhs(m,i,j,k)
endif
if(k .eq. nz2) then
rhs(4,i,j,k1) = rhs(4,i,j,k1)/lhsp__(3,1)
rhs(5,i,j,k1) = rhs(5,i,j,k1)/lhsm__(3,1)
do m = 1, 3
rhs(m,i,j,k) = rhs(m,i,j,k)-lhs__(4,0)*rhs(m,i,j,k1)
end do
rhs(4,i,j,k) = rhs(4,i,j,k)-lhsp__(4,0)*rhs(4,i,j,k1)
rhs(5,i,j,k) = rhs(5,i,j,k)-lhsm__(4,0)*rhs(5,i,j,k1)
endif
do m = 1,5
lhs(0,m,i,j,k) = lhs__(m,0)
lhs(1,m,i,j,k) = lhsp__(m,0)
lhs(2,m,i,j,k) = lhsm__(m,0)
lhs(0,m,i,j,k+1) = lhs__(m,1)
lhs(1,m,i,j,k+1) = lhsp__(m,1)
lhs(2,m,i,j,k+1) = lhsm__(m,1)
if (k .lt. nz2) then
lhs(0,m,i,j,k+2) = lhs__(m,2)
lhs(1,m,i,j,k+2) = lhsp__(m,2)
lhs(2,m,i,j,k+2) = lhsm__(m,2)
endif
enddo
enddo
enddo
enddo
!DVM$ parallel (k,j,i) on rhs(*,i,j,k),ACROSS(rhs(0:0,0:0,0:0,0:2))
!DVM$& ,stage(stage_n)
do k = problem_size-3, 0, -1
do j = 1, ny2
do i = 1, nx2
rhs(1,i,j,k) = rhs(1,i,j,k) -
& lhs(0,4,i,j,k)*rhs(1,i,j,k+1) -
& lhs(0,5,i,j,k)*rhs(1,i,j,k+2)
rhs(2,i,j,k) = rhs(2,i,j,k) -
& lhs(0,4,i,j,k)*rhs(2,i,j,k+1) -
& lhs(0,5,i,j,k)*rhs(2,i,j,k+2)
rhs(3,i,j,k) = rhs(3,i,j,k) -
& lhs(0,4,i,j,k)*rhs(3,i,j,k+1) -
& lhs(0,5,i,j,k)*rhs(3,i,j,k+2)
rhs(4,i,j,k) = rhs(4,i,j,k) -
& lhs(1,4,i,j,k)*rhs(4,i,j,k+1) -
& lhs(1,5,i,j,k)*rhs(4,i,j,k+2)
rhs(5,i,j,k) = rhs(5,i,j,k) -
& lhs(2,4,i,j,k)*rhs(5,i,j,k+1) -
& lhs(2,5,i,j,k)*rhs(5,i,j,k+2)
enddo
enddo
enddo
!DVM$ parallel (k,j,i) on u(*,i,j,k)
!DVM$& ,private(t1,t2,t3,ac,xvel,yvel,zvel,btuz,ac2u,uzik1)
do k = 1, nz2
do j = 1, ny2
do i = 1, nx2
xvel = us(i,j,k)
yvel = vs(i,j,k)
zvel = ws(i,j,k)
ac = speed(i,j,k)
ac2u = ac*ac
uzik1 = u(1,i,j,k)
btuz = bt * uzik1
t1 = btuz/ac * (rhs(4,i,j,k) + rhs(5,i,j,k))
t2 = rhs(3,i,j,k) + t1
t3 = btuz * (rhs(4,i,j,k) - rhs(5,i,j,k))
u(1,i,j,k) = u(1,i,j,k) + t2
u(2,i,j,k) = u(2,i,j,k)-uzik1*rhs(2,i,j,k)+xvel*t2
u(3,i,j,k) = u(3,i,j,k)+uzik1*rhs(1,i,j,k)+yvel*t2
u(4,i,j,k) = u(4,i,j,k)+ zvel*t2 + t3
u(5,i,j,k) = u(5,i,j,k)+ uzik1*(-xvel*rhs(2,i,j,k) +
& yvel*rhs(1,i,j,k)) + qs(i,j,k)*t2 +
& c2iv*ac2u*t1 + zvel*t3
enddo
enddo
enddo
!DVM$ end region
if (timeron) call timer_stop(t_zsolve)
return
end

Some files were not shown because too many files have changed in this diff Show More