90 BL_PROFILE(
"Operator::Fsmooth()");
92 amrex::Box domain(m_geom[amrlev][mglev].growPeriodicDomain(1));
93 domain.convert(amrex::IntVect::TheNodeVector());
95 int ncomp = b.nComp();
99 amrex::MultiFab Ax(x.boxArray(), x.DistributionMap(), ncomp, nghost);
100 amrex::MultiFab Dx(x.boxArray(), x.DistributionMap(), ncomp, nghost);
101 amrex::MultiFab Rx(x.boxArray(), x.DistributionMap(), ncomp, nghost);
103 if (!m_diagonal_computed)
Util::Abort(
INFO,
"Operator::Diagonal() must be called before using Fsmooth");
107 for (
int ctr = 0; ctr < 2; ctr++)
109 Fapply(amrlev, mglev, Ax, x);
111 amrex::MultiFab::Copy(Dx, x, 0, 0, ncomp, nghost);
112 amrex::MultiFab::Multiply(Dx, *m_diag[amrlev][mglev], 0, 0, ncomp, nghost);
114 amrex::MultiFab::Copy(Rx, Ax, 0, 0, ncomp, nghost);
115 amrex::MultiFab::Subtract(Rx, Dx, 0, 0, ncomp, nghost);
117 for (MFIter mfi(x,
false); mfi.isValid(); ++mfi)
119 Box bx = mfi.grownnodaltilebox();
121 amrex::Array4<amrex::Real>
const& xfab = x.array(mfi);
122 amrex::Array4<const amrex::Real>
const& bfab = b.array(mfi);
123 amrex::Array4<const amrex::Real>
const& Rxfab = Rx.array(mfi);
124 amrex::Array4<const amrex::Real>
const& diagfab = (*m_diag[amrlev][mglev]).array(mfi);
127 for (
int n = 0; n < ncomp; n++)
129 amrex::ParallelFor(bx, [&] AMREX_GPU_DEVICE(
int i,
int j,
int k)
133 if (!domain.contains(i,j,k))
137 else if ( !bx.strictly_contains(i,j,k))
139 xfab(i, j, k, n) = 0.0;
144 xfab(i,j,k,n) = (1. - m_omega) * xfab(i,j,k, n) + m_omega * (bfab(i,j,k, n) - Rxfab(i,j,k, n)) / diagfab(i,j,k,n);
149 amrex::Geometry geom = m_geom[amrlev][mglev];
150 x.setMultiGhost(
true);
151 x.FillBoundary(geom.periodicity());
152 nodalSync(amrlev, mglev, x);
264 BL_PROFILE(
"Operator::restriction()");
266 applyBC(amrlev, cmglev - 1, fine, BCMode::Homogeneous, StateMode::Solution);
268 amrex::Box cdomain = m_geom[amrlev][cmglev].growPeriodicDomain(1);
269 cdomain = cdomain.convert(amrex::IntVect::TheNodeVector());
271 bool need_parallel_copy = !amrex::isMFIterSafe(crse, fine);
273 if (need_parallel_copy) {
274 const BoxArray& ba = amrex::coarsen(fine.boxArray(), 2);
275 cfine.define(ba, fine.DistributionMap(), fine.nComp(), fine.nGrow());
278 MultiFab* pcrse = (need_parallel_copy) ? &cfine : &crse;
280 for (MFIter mfi(*pcrse, amrex::TilingIfNotGPU()); mfi.isValid(); ++mfi)
282 const Box& bx = mfi.grownnodaltilebox(-1,1) & cdomain;
284 amrex::Array4<const amrex::Real>
const& fdata = fine.array(mfi);
285 amrex::Array4<amrex::Real>
const& cdata = pcrse->array(mfi);
287 const Dim3 lo = amrex::lbound(bx), hi = amrex::ubound(bx);
290 for (
int n = 0; n < crse.nComp(); n++)
294 amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE(
int I,
int J,
int K) {
295 int i = 2 * I, j = 2 * J, k = 2 * K;
297 if ((I == lo.x || I == hi.x) &&
298 (J == lo.y || J == hi.y) &&
299 (K == lo.z || K == hi.z))
301 cdata(I, J, K, n) = fdata(i, j, k, n);
303 else if ((J == lo.y || J == hi.y) &&
304 (K == lo.z || K == hi.z))
306 cdata(I, J, K, n) = 0.25 * fdata(i - 1, j, k, n) + 0.5 * fdata(i, j, k, n) + 0.25 * fdata(i + 1, j, k, n);
308 else if ((K == lo.z || K == hi.z) &&
309 (I == lo.x || I == hi.x))
311 cdata(I, J, K, n) = 0.25 * fdata(i, j - 1, k, n) + 0.5 * fdata(i, j, k, n) + 0.25 * fdata(i, j + 1, k, n);
313 else if ((I == lo.x || I == hi.x) &&
314 (J == lo.y || J == hi.y))
316 cdata(I, J, K, n) = 0.25 * fdata(i, j, k - 1, n) + 0.5 * fdata(i, j, k, n) + 0.25 * fdata(i, j, k + 1, n);
318 else if (I == lo.x || I == hi.x)
321 (+fdata(i, j - 1, k - 1, n) + 2.0 * fdata(i, j, k - 1, n) + fdata(i, j + 1, k - 1, n)
322 + 2.0 * fdata(i, j - 1, k, n) + 4.0 * fdata(i, j, k, n) + 2.0 * fdata(i, j + 1, k, n)
323 + fdata(i, j - 1, k + 1, n) + 2.0 * fdata(i, j, k + 1, n) + fdata(i, j + 1, k + 1, n)) / 16.0;
325 else if (J == lo.y || J == hi.y)
328 (+fdata(i - 1, j, k - 1, n) + 2.0 * fdata(i - 1, j, k, n) + fdata(i - 1, j, k + 1, n)
329 + 2.0 * fdata(i, j, k - 1, n) + 4.0 * fdata(i, j, k, n) + 2.0 * fdata(i, j, k + 1, n)
330 + fdata(i + 1, j, k - 1, n) + 2.0 * fdata(i + 1, j, k, n) + fdata(i + 1, j, k + 1, n)) / 16.0;
332 else if (K == lo.z || K == hi.z)
335 (+fdata(i - 1, j - 1, k, n) + 2.0 * fdata(i, j - 1, k, n) + fdata(i + 1, j - 1, k, n)
336 + 2.0 * fdata(i - 1, j, k, n) + 4.0 * fdata(i, j, k, n) + 2.0 * fdata(i + 1, j, k, n)
337 + fdata(i - 1, j + 1, k, n) + 2.0 * fdata(i, j + 1, k, n) + fdata(i + 1, j + 1, k, n)) / 16.0;
341 (fdata(i - 1, j - 1, k - 1, n) + fdata(i - 1, j - 1, k + 1, n) + fdata(i - 1, j + 1, k - 1, n) + fdata(i - 1, j + 1, k + 1, n) +
342 fdata(i + 1, j - 1, k - 1, n) + fdata(i + 1, j - 1, k + 1, n) + fdata(i + 1, j + 1, k - 1, n) + fdata(i + 1, j + 1, k + 1, n)) / 64.0
344 (fdata(i, j - 1, k - 1, n) + fdata(i, j - 1, k + 1, n) + fdata(i, j + 1, k - 1, n) + fdata(i, j + 1, k + 1, n) +
345 fdata(i - 1, j, k - 1, n) + fdata(i + 1, j, k - 1, n) + fdata(i - 1, j, k + 1, n) + fdata(i + 1, j, k + 1, n) +
346 fdata(i - 1, j - 1, k, n) + fdata(i - 1, j + 1, k, n) + fdata(i + 1, j - 1, k, n) + fdata(i + 1, j + 1, k, n)) / 32.0
348 (fdata(i - 1, j, k, n) + fdata(i, j - 1, k, n) + fdata(i, j, k - 1, n) +
349 fdata(i + 1, j, k, n) + fdata(i, j + 1, k, n) + fdata(i, j, k + 1, n)) / 16.0
351 fdata(i, j, k, n) / 8.0;
356 if (need_parallel_copy) {
357 crse.ParallelCopy(cfine);
360 crse.setMultiGhost(
true);
361 crse.FillBoundary(Geom(amrlev,cmglev).periodicity());
362 nodalSync(amrlev, cmglev, crse);
367 BL_PROFILE(
"Operator::interpolation()");
368 amrex::Box fdomain = m_geom[amrlev][fmglev].growPeriodicDomain(2);
369 fdomain.convert(amrex::IntVect::TheNodeVector());
371 bool need_parallel_copy = !amrex::isMFIterSafe(crse, fine);
373 const MultiFab* cmf = &crse;
374 if (need_parallel_copy) {
375 const BoxArray& ba = amrex::coarsen(fine.boxArray(), 2);
376 cfine.define(ba, fine.DistributionMap(), crse.nComp(), crse.nGrow());
377 cfine.ParallelCopy(crse);
381 for (MFIter mfi(fine,
false); mfi.isValid(); ++mfi)
383 Box fine_bx = mfi.validbox() & fdomain;
385 const Box& course_bx = amrex::coarsen(fine_bx, 2);
386 const Box& tmpbx = amrex::refine(course_bx, 2);
388 tmpfab.resize(tmpbx, fine.nComp());
390 const amrex::FArrayBox& crsefab = (*cmf)[mfi];
392 amrex::Array4<const amrex::Real>
const& cdata = crsefab.const_array();
393 amrex::Array4<amrex::Real>
const& fdata = tmpfab.array();
395 for (
int n = 0; n < crse.nComp(); n++)
399 amrex::ParallelFor(fine_bx, [=] AMREX_GPU_DEVICE(
int i,
int j,
int k) {
401 int I = i / 2, J = j / 2, K = k / 2;
403 if (i % 2 == 0 && j % 2 == 0 && k % 2 == 0)
404 fdata(i, j, k, n) = cdata(I, J, K, n);
405 else if (j % 2 == 0 && k % 2 == 0)
406 fdata(i, j, k, n) = 0.5 * (cdata(I, J, K, n) + cdata(I + 1, J, K, n));
407 else if (k % 2 == 0 && i % 2 == 0)
408 fdata(i, j, k, n) = 0.5 * (cdata(I, J, K, n) + cdata(I, J + 1, K, n));
409 else if (i % 2 == 0 && j % 2 == 0)
410 fdata(i, j, k, n) = 0.5 * (cdata(I, J, K, n) + cdata(I, J, K + 1, n));
412 fdata(i, j, k, n) = 0.25 * (cdata(I, J, K, n) + cdata(I, J + 1, K, n) +
413 cdata(I, J, K + 1, n) + cdata(I, J + 1, K + 1, n));
415 fdata(i, j, k, n) = 0.25 * (cdata(I, J, K, n) + cdata(I, J, K + 1, n) +
416 cdata(I + 1, J, K, n) + cdata(I + 1, J, K + 1, n));
418 fdata(i, j, k, n) = 0.25 * (cdata(I, J, K, n) + cdata(I + 1, J, K, n) +
419 cdata(I, J + 1, K, n) + cdata(I + 1, J + 1, K, n));
421 fdata(i, j, k, n) = 0.125 * (cdata(I, J, K, n) +
422 cdata(I + 1, J, K, n) + cdata(I, J + 1, K, n) + cdata(I, J, K + 1, n) +
423 cdata(I, J + 1, K + 1, n) + cdata(I + 1, J, K + 1, n) + cdata(I + 1, J + 1, K, n) +
424 cdata(I + 1, J + 1, K + 1, n));
428 fine[mfi].plus(tmpfab, fine_bx, fine_bx, 0, 0, fine.nComp());
431 fine.setMultiGhost(
true);
432 fine.FillBoundary(Geom(amrlev,fmglev).periodicity());
433 nodalSync(amrlev, fmglev, fine);
527 MultiFab& res,
const MultiFab& ,
const MultiFab& ,
528 MultiFab& fine_res, MultiFab& ,
const MultiFab& )
const
530 BL_PROFILE(
"Operator::Elastic::reflux()");
532 int ncomp = AMREX_SPACEDIM;
534 amrex::Box cdomain(m_geom[crse_amrlev][0].growPeriodicDomain(2));
535 cdomain.convert(amrex::IntVect::TheNodeVector());
537 const Geometry& cgeom = m_geom[crse_amrlev][0];
539 const BoxArray& fba = fine_res.boxArray();
540 const DistributionMapping& fdm = fine_res.DistributionMap();
542 MultiFab fine_res_for_coarse(amrex::coarsen(fba, 2), fdm, ncomp, 2);
543 fine_res_for_coarse.ParallelCopy(res, 0, 0, ncomp, 0, 0, cgeom.periodicity());
545 applyBC(crse_amrlev + 1, 0, fine_res, BCMode::Inhomogeneous, StateMode::Solution);
549 const int coarse_fine_node = 1;
550 const int fine_fine_node = 2;
552 amrex::iMultiFab nodemask(amrex::coarsen(fba, 2), fdm, 1, 2);
553 nodemask.ParallelCopy(*m_nd_fine_mask[crse_amrlev], 0, 0, 1, 0, 0, cgeom.periodicity());
555 for (MFIter mfi(fine_res_for_coarse,
false); mfi.isValid(); ++mfi)
557 const Box& bx = mfi.grownnodaltilebox(-1,1) & cdomain;
559 amrex::Array4<const int>
const& nmask = nodemask.array(mfi);
562 amrex::Array4<amrex::Real>
const& cdata = fine_res_for_coarse.array(mfi);
563 amrex::Array4<const amrex::Real>
const& fdata = fine_res.array(mfi);
565 const Dim3 lo = amrex::lbound(bx), hi = amrex::ubound(bx);
567 for (
int n = 0; n < fine_res.nComp(); n++)
571 amrex::ParallelFor(bx, [=] AMREX_GPU_DEVICE(
int I,
int J,
int K) {
572 int i = I * 2, j = J * 2, k = K * 2;
574 if (nmask(I, J, K) == fine_fine_node || nmask(I, J, K) == coarse_fine_node)
576 if ((I == lo.x || I == hi.x) &&
577 (J == lo.y || J == hi.y) &&
578 (K == lo.z || K == hi.z))
579 cdata(I, J, K, n) = fdata(i, j, k, n);
580 else if ((J == lo.y || J == hi.y) &&
581 (K == lo.z || K == hi.z))
582 cdata(I, J, K, n) = 0.25 * fdata(i - 1, j, k, n) + 0.5 * fdata(i, j, k, n) + 0.25 * fdata(i + 1, j, k, n);
583 else if ((K == lo.z || K == hi.z) &&
584 (I == lo.x || I == hi.x))
585 cdata(I, J, K, n) = 0.25 * fdata(i, j - 1, k, n) + 0.5 * fdata(i, j, k, n) + 0.25 * fdata(i, j + 1, k, n);
586 else if ((I == lo.x || I == hi.x) &&
587 (J == lo.y || J == hi.y))
588 cdata(I, J, K, n) = 0.25 * fdata(i, j, k - 1, n) + 0.5 * fdata(i, j, k, n) + 0.25 * fdata(i, j, k + 1, n);
589 else if (I == lo.x || I == hi.x)
591 (+fdata(i, j - 1, k - 1, n) + 2.0 * fdata(i, j, k - 1, n) + fdata(i, j + 1, k - 1, n)
592 + 2.0 * fdata(i, j - 1, k, n) + 4.0 * fdata(i, j, k, n) + 2.0 * fdata(i, j + 1, k, n)
593 + fdata(i, j - 1, k + 1, n) + 2.0 * fdata(i, j, k + 1, n) + fdata(i, j + 1, k + 1, n)) / 16.0;
594 else if (J == lo.y || J == hi.y)
596 (+fdata(i - 1, j, k - 1, n) + 2.0 * fdata(i - 1, j, k, n) + fdata(i - 1, j, k + 1, n)
597 + 2.0 * fdata(i, j, k - 1, n) + 4.0 * fdata(i, j, k, n) + 2.0 * fdata(i, j, k + 1, n)
598 + fdata(i + 1, j, k - 1, n) + 2.0 * fdata(i + 1, j, k, n) + fdata(i + 1, j, k + 1, n)) / 16.0;
599 else if (K == lo.z || K == hi.z)
601 (+fdata(i - 1, j - 1, k, n) + 2.0 * fdata(i, j - 1, k, n) + fdata(i + 1, j - 1, k, n)
602 + 2.0 * fdata(i - 1, j, k, n) + 4.0 * fdata(i, j, k, n) + 2.0 * fdata(i + 1, j, k, n)
603 + fdata(i - 1, j + 1, k, n) + 2.0 * fdata(i, j + 1, k, n) + fdata(i + 1, j + 1, k, n)) / 16.0;
606 (fdata(i - 1, j - 1, k - 1, n) + fdata(i - 1, j - 1, k + 1, n) + fdata(i - 1, j + 1, k - 1, n) + fdata(i - 1, j + 1, k + 1, n) +
607 fdata(i + 1, j - 1, k - 1, n) + fdata(i + 1, j - 1, k + 1, n) + fdata(i + 1, j + 1, k - 1, n) + fdata(i + 1, j + 1, k + 1, n)) / 64.0
609 (fdata(i, j - 1, k - 1, n) + fdata(i, j - 1, k + 1, n) + fdata(i, j + 1, k - 1, n) + fdata(i, j + 1, k + 1, n) +
610 fdata(i - 1, j, k - 1, n) + fdata(i + 1, j, k - 1, n) + fdata(i - 1, j, k + 1, n) + fdata(i + 1, j, k + 1, n) +
611 fdata(i - 1, j - 1, k, n) + fdata(i - 1, j + 1, k, n) + fdata(i + 1, j - 1, k, n) + fdata(i + 1, j + 1, k, n)) / 32.0
613 (fdata(i - 1, j, k, n) + fdata(i, j - 1, k, n) + fdata(i, j, k - 1, n) +
614 fdata(i + 1, j, k, n) + fdata(i, j + 1, k, n) + fdata(i, j, k + 1, n)) / 16.0
616 fdata(i, j, k, n) / 8.0;
625 res.ParallelCopy(fine_res_for_coarse, 0, 0, ncomp, 0, 0, cgeom.periodicity());
628 res.setMultiGhost(
true);
629 res.FillBoundaryAndSync(Geom(crse_amrlev).periodicity());
630 nodalSync(crse_amrlev, 0, res);