! *** generated by SAPFOR with version 2415 and build date: May 4 2025 14:48:40 ! *** Enabled options ***: ! *** maximum shadow width is 50 percent ! *** generated by SAPFOR !-------------------------------------------------------------------------! ! ! ! N A S P A R A L L E L B E N C H M A R K S 3.3.1 ! ! ! ! D V M H V E R S I O N ! ! ! ! S P ! ! ! !-------------------------------------------------------------------------! !-------------------------------------------------------------------------! !--------------------------------------------------------------------- ! ! Authors: ! Original: ! R. Van der Wijngaart ! W. Saphir ! H. Jin ! Optimize for DVMH: ! Kolganov A.S. !--------------------------------------------------------------------- !--------------------------------------------------------------------- subroutine sp include 'header_sp.h' integer :: i,niter,step,fstatus,n3 external timer_read_sp double precision :: mflops,t,tmax,timer_read_sp,trecs(t_last) logical :: verified character :: class character :: t_names(t_last)*8 !--------------------------------------------------------------------- ! Read input file (if it exists), else take ! defaults from parameters !--------------------------------------------------------------------- open (unit = 2,file = 'timer.flag',status = 'old',iostat = fstatus &) if (fstatus .eq. 0) then timeron = .TRUE. t_names(t_total) = 'total' t_names(t_rhsx) = 'rhsx' t_names(t_rhsy) = 'rhsy' t_names(t_rhsz) = 'rhsz' t_names(t_rhs) = 'rhs' t_names(t_xsolve) = 'xsolve' t_names(t_ysolve) = 'ysolve' t_names(t_zsolve) = 'zsolve' t_names(t_rdis1) = 'redist1' t_names(t_rdis2) = 'redist2' t_names(t_tzetar) = 'tzetar' t_names(t_ninvr) = 'ninvr' t_names(t_pinvr) = 'pinvr' t_names(t_txinvr) = 'txinvr' t_names(t_add) = 'add' close (unit = 2) else timeron = .FALSE. endif write (unit = *,fmt = 1000) open (unit = 2,file = 'inputsp.data',status = 'old',iostat = fstat &us) if (fstatus .eq. 0) then write (unit = *,fmt = 233) 233 format(' Reading from input file inputsp.data') read (unit = 2,fmt = *) niter read (unit = 2,fmt = *) dt read (unit = 2,fmt = *) grid_points(1),grid_points(2),grid_poin &ts(3) close (unit = 2) else write (unit = *,fmt = 234) niter = niter_default dt = dt_default grid_points(1) = problem_size grid_points(2) = problem_size grid_points(3) = problem_size endif 234 format(' No input file inputsp.data. Using compiled defaults') open (unit = 2,file = 'inputStage',status = 'old',iostat = fstatus &) if (fstatus .eq. 0) then read (unit = 2,fmt = *) stage_n close (unit = 2) else stage_n = 0 endif write (unit = *,fmt = *) 'stage = ',stage_n write (unit = *,fmt = 1001) problem_size,problem_size,problem_size write (unit = *,fmt = 1002) niter,dt write (unit = *,fmt = *) 1000 format(//, ' NAS Parallel Benchmarks (NPB3.3.1-DVMH)', & ' - SP Benchmark', /) 1001 format(' Size: ', i4, 'x', i4, 'x', i4) 1002 format(' Iterations: ', i4, ' dt: ', F11.7) 1003 format(' Number of available threads: ', i5) if (problem_size .gt. imax .or. problem_size .gt. jmax .or. proble &m_size .gt. kmax) then print *, (grid_points(i), i = 1,3) print *, ' Problem size too big for compiled array sizes' goto 999 endif nx2 = problem_size - 2 ny2 = problem_size - 2 nz2 = problem_size - 2 call set_constants_sp() call exact_rhs_sp() call initialize_sp() call adi_first_sp() call adi_first_sp() call initialize_sp() do i = 1,t_last call timer_clear_sp(i) enddo call timer_start_sp(1) ! DVM$ BARRIER do step = 1,niter if (mod (step,20) .eq. 0 .or. step .eq. 1) then write (unit = *,fmt = 200) step 200 format(' Time step ', i4) endif call adi() enddo call timer_stop_sp(1) tmax = timer_read_sp (1) call verify_sp(niter,class,verified) if (tmax .ne. 0.) then n3 = problem_size * problem_size * problem_size t = (problem_size + problem_size + problem_size) / 3.0 mflops = (881.174 * float (n3) - 4683.91 * t** 2 + 11484.5 * t &- 19272.4) * float (niter) / (tmax * 1000000.0d0) else mflops = 0.0 endif call print_results_sp('SP',class,problem_size,problem_size, &problem_si &ze,niter,tmax,mflops,' floating point',verified,npbversio &n,compiletime,cs1,cs2,cs3,cs4,cs5,cs6,'(none)') !--------------------------------------------------------------------- ! More timers !--------------------------------------------------------------------- if (.not.(timeron)) goto 999 do i = 1,t_last trecs(i) = timer_read_sp (i) enddo if (tmax .eq. 0.0) tmax = 1.0 write (unit = *,fmt = 800) 800 format(' SECTION Time (secs)') do i = 1,t_last write (unit = *,fmt = 810) t_names(i),trecs(i),trecs(i) * 100. &/ tmax if (i .eq. t_rhs) then t = trecs(t_rhsx) + trecs(t_rhsy) + trecs(t_rhsz) write (unit = *,fmt = 820) 'sub-rhs',t,t * 100. / tmax t = trecs(t_rhs) - t write (unit = *,fmt = 820) 'rest-rhs',t,t * 100. / tmax else if (i .eq. t_zsolve) then t = trecs(t_zsolve) - trecs(t_rdis1) - trecs(t_rdis2) write (unit = *,fmt = 820) 'sub-zsol',t,t * 100. / tmax else if (i .eq. t_rdis2) then t = trecs(t_rdis1) + trecs(t_rdis2) write (unit = *,fmt = 820) 'redist',t,t * 100. / tmax endif 810 format(2x,a8,':',f9.3,' (',f6.2,'%)') 820 format(' --> ',a8,':',f9.3,' (',f6.2,'%)') enddo 999 continue end !--------------------------------------------------------------------- !--------------------------------------------------------------------- subroutine adi_first_sp () !--------------------------------------------------------------------- !--------------------------------------------------------------------- call compute_rhs_sp(1) call x_solve_sp() call y_solve_sp() call z_solve_sp() return end !--------------------------------------------------------------------- !--------------------------------------------------------------------- subroutine adi_sp () ! DVM$ INTERVAL 1 !--------------------------------------------------------------------- !--------------------------------------------------------------------- call compute_rhs_sp(1) ! DVM$ INTERVAL 12 ! DVM$ END INTERVAL call x_solve_sp() ! DVM$ INTERVAL 13 ! DVM$ END INTERVAL call y_solve_sp() ! DVM$ INTERVAL 14 ! DVM$ END INTERVAL call z_solve_sp() ! DVM$ END INTERVAL return end