% Encoding: UTF-8

@COMMENT{BibTeX export based on data in FAU CRIS: https://cris.fau.de/}
@COMMENT{For any questions please write to cris-support@fau.de}

@inproceedings{faucris.107108584,
 author = {Bartuschat, Dominik and Gmeiner, Björn and Thönnes, Dominik and Kohl, Nils and Rüde, Ulrich and Drzisga, Daniel Peter and Huber, Markus and John, Lorenz and Waluga, Christian and Wohlmuth, B. I. and Bauer, Simon and Mohr, Marcus and Bunge, Hans-Peter},
 booktitle = {SIAM Conference on Parallel Processing for Scientific Computing (SIAM PP 18)},
 date = {2018-03-07/2018-03-10},
 faupublication = {yes},
 peerreviewed = {unknown},
 title = {{A} {Finite} {Element} {Multigrid} {Framework} for  {Extreme}-{Scale} {Earth} {Mantle} {Convection} {Simulations}},
 url = {https://www10.cs.fau.de/publications/talks/2018/Bartuschat{\_}Tokyo{\_}SIAMPP2018{\_}2018-03-10.pdf},
 venue = {Tokyo},
 year = {2018}
}

@inproceedings{faucris.206226591,
 author = {Bartuschat, Dominik and Gmeiner, Björn and Thönnes, Dominik and Kohl, Nils and Rüde, Ulrich and Drzisga, Daniel and Huber, Markus and John, Lorenz and Waluga, Christian and Wohlmuth, B. I. and Bauer, Simon and Mohr, Marcus and Bunge, Hans-Peter},
 booktitle = {SIAM Conference on Parallel Processing for Scientific Computing (SIAM PP 18)},
 date = {2018-03-07/2018-03-10},
 faupublication = {yes},
 peerreviewed = {unknown},
 title = {{A} {Finite} {Element} {Multigrid} {Framework} for  {Extreme}-{Scale} {Earth} {Mantle} {Convection} {Simulations}},
 url = {https://www10.cs.fau.de/publications/talks/2018/Thoennes{\_}Tokio{\_}SIAMPP18{\_}2018-03-09.pdf},
 venue = {Tokyo},
 year = {2018}
}

@incollection{faucris.106130464,
 abstract = {<span style="color: rgb(74, 74, 74); font-family: Arial, Helvetica, sans-serif; font-size: 11px; line-height: 18px;">Many problems in computational science and engineering require the numerical solution of partial differential equations and thus the solution of large, sparse linear systems of equations. Multigrid is known to be one of the most efficient methods for this purpose, and therefore many software packages exist that are also able to run on large HPC clusters. However, the concrete multigrid algorithm and its implementation highly depends on the underlying problem and hardware. Therefore, changes in the code or many different variants are necessary to cover all relevant cases. We try to generalize the data structures and multigrid components required to solve elliptic PDEs on Hierarchical Hybrid Grids (HHG) that are a compromise between structured and unstructured grids. Out goal is the automatic generation of the HHG data structures for arbitrary primitives. As a first step, we implemented a generic 2D prototype including a multigrid solver for the two-dimensional Poisson problem. We show that the multigrid algorithm is highly scalable up to more than 450,000 cores.</span>},
 address = {Berlin},
 author = {Kuckuk, Sebastian and Gmeiner, Björn and Köstler, Harald and Rüde, Ulrich},
 booktitle = {Parallel Computing: Accelerating Computational Science and Engineering (CSE)},
 doi = {10.3233/978-1-61499-381-0-813},
 faupublication = {yes},
 isbn = {978-1-61499-380-3},
 keywords = {Hierarchical Hybrid Grids; Juqueen; MPI; parallel multigrid},
 note = {UnivIS-Import:2015-04-20:Pub.2014.tech.IMMD.lsinfs.agener},
 pages = {813-822},
 peerreviewed = {unknown},
 publisher = {IOS Press},
 series = {Advances in Parallel Computing},
 title = {{A} {Generic} {Prototype} to {Benchmark} {Algorithms} and {Data} {Structures}},
 url = {http://ebooks.iospress.nl/volumearticle/35957},
 volume = {25},
 year = {2014}
}

@article{faucris.113550404,
 author = {Köstler, Harald and Gmeiner, Björn},
 doi = {10.1007/s13218-013-0263-2},
 faupublication = {yes},
 journal = {Künstliche Intelligenz},
 note = {UnivIS-Import:2015-03-09:Pub.2013.tech.IMMD.lsinfs.amulti{\_}2},
 pages = {221-223},
 peerreviewed = {Yes},
 title = {{A} {Multi}-objective {Genetic} {Algorithm} for {Build} {Order} {Optimization} in {StarCraft} {II}},
 volume = {27},
 year = {2013}
}

@misc{faucris.112679864,
 author = {Gmeiner, Björn and Gradl, Tobias and Köstler, Harald and Rüde, Ulrich},
 faupublication = {yes},
 note = {UnivIS-Import:2016-06-30:Pub.2011.tech.IMMD.lsinfs.analys},
 peerreviewed = {automatic},
 title = {{Analysis} of a flat {Highly} {Parallel} {Geometric} {Multigrid} {Algorithm} for {Hierarchical} {Hybrid} {Grids}},
 url = {https://www10.cs.fau.de/publications/reports/TechRep{\_}2011-03.pdf},
 year = {2011}
}

@misc{faucris.117275884,
 abstract = {This article presents a systematic quantitative performance analysis for large finite element computations on extreme scale computing systems. Three parallel iterative solvers for the Stokes system, discretized by low order tetrahedral elements, are compared with respect to their numerical efficiency and their scalability running on up to 786 432 parallel threads. A genuine multigrid method for the saddle point system using an Uzawa-type smoother provides the best overall performance with respect to memory consumption and time-to-solution. The largest system solved on a Blue Gene/Q system has more than ten trillion (1.1 &middot; 1013) unknowns and requires about 13 minutes compute time. Despite the matrix free and highly optimized implementation, the memory requirement for the solution vector and the auxiliary vectors is about 200 TByte. Brandt&rsquo;s notion of &ldquo;textbook multigrid efficiency&rdquo; is employed to study the algorithmic performance of iterative solvers. A recent extension of this paradigm to &ldquo;parallel textbook multigrid efficiency&rdquo; makes it possible to assess also the efficiency of parallel iterative solvers for a given hardware architecture in absolute terms. The efficiency of the method is demonstrated for simulating incompressible fluid flow in a pipe filled with spherical obstacles.},
 author = {Gmeiner, Björn and Huber, Markus and John, Lorenz and Rüde, Ulrich and Wohlmuth, B. I.},
 faupublication = {yes},
 keywords = {Stokes system; stabilized finite element methods; Uzawa multigrid; hierarchical hybrid grids; scalability; parallel textbook multigrid efficiency; flow around sphere pack},
 peerreviewed = {automatic},
 title = {{A} quantitative performance analysis for {Stokes} solvers at the extreme scale.},
 year = {2015}
}

@article{faucris.108219144,
 author = {Gmeiner, Björn and Huber, Markus and John, Lorenz and Rüde, Ulrich and Wohlmuth, B. I.},
 doi = {10.1016/j.jocs.2016.06.006},
 faupublication = {yes},
 journal = {Journal of Computational Science},
 keywords = {Flow around sphere pack; Hierarchical hybrid grids; Parallel textbook multigrid efficiency; Scalability; Stokes system; Uzawa smoother},
 note = {UnivIS-Import:2017-01-09:Pub.2016.tech.IMMD.lsinfs.aquant},
 pages = {509-521},
 peerreviewed = {unknown},
 title = {{A} quantitative performance study for {Stokes} solvers at the extreme scale},
 url = {http://www.sciencedirect.com/science/article/pii/S1877750316301077},
 volume = {17},
 year = {2016}
}

@phdthesis{faucris.201216044,
 author = {Gmeiner, Björn},
 faupublication = {yes},
 peerreviewed = {automatic},
 school = {Friedrich-Alexander-Universität Erlangen-Nürnberg},
 title = {{Design} and {Analysis} of {Hierarchical} {Hybrid} {Multigrid} {Methods} for {Peta}-{Scale} {Systems} and {Beyond}},
 url = {https://www10.cs.fau.de/publications/dissertations/Diss{\_}2013-Gmeiner.pdf},
 year = {2013}
}

@inproceedings{faucris.112847944,
 address = {Oberwolfach},
 author = {Wohlmuth, B. I. and Gmeiner, Björn and Stengel, H. and Rüde, Ulrich and Waluga, Christian},
 booktitle = {MFO Report Nr. 24},
 doi = {10.4171/OWR/2014/24},
 faupublication = {no},
 note = {UnivIS-Import:2016-06-01:Pub.2014.tech.IMMD.lsinfs.dualan},
 pages = {1356-1359},
 peerreviewed = {unknown},
 publisher = {EMS Publishing House},
 title = {{Dual} and {Hybrid} {Hierarchical} {Grids} for {Fast} {Geophysical} {Flow} {Simulations}},
 venue = {Oberwolfach},
 volume = {24},
 year = {2014}
}

@inproceedings{faucris.122617924,
 abstract = {Conductor tracks comprise a frequency dependent attenuation of electromagnetic waves, since with increasing frequency the current flow is displaced to the near surface region due to the skin effect. Therefore, the effective length of the conductor is increased by the surface roughness, while its effective cross-section is decreased by current displacement, both leading to higher metallization loss. In this paper, surface topographies of typical conductor materials were recorded by confocal microscopy and rebuilt as 3D CAD models. Subsequent electromagnetic simulations reveal the influence due to roughness on high frequency characteristics for physical vapor deposited, thick film and photochemically etched microstrips.},
 author = {Talai, Armin and Gmeiner, Björn and Wegener, Carsten Moritz and Roosen, Andreas and Kölpin, Alexander and Steinhäußer, Frank and Deisinger, Ulrike and Bittner, Achim and Schmid, Ulrich and Weigel, Robert},
 booktitle = {Electromagnetics in Advanced Applications (ICEAA), 2014 International Conference on},
 date = {2014-08-03/2014-08-08},
 doi = {10.1109/ICEAA.2014.6903887},
 faupublication = {yes},
 note = {UnivIS-Import:2015-04-17:Pub.2014.tech.IMMD.lsinfs.electr},
 pages = {415-418},
 peerreviewed = {Yes},
 publisher = {Institute of Electrical and Electronics Engineers Inc.},
 title = {{Electromagnetic} analysis of conductor track surface roughnesses from 1 {GHz} to 110 {GHz}},
 url = {http://ieeexplore.ieee.org/xpls/abs{\_}all.jsp?arnumber=6903887},
 venue = {Palm Beach},
 year = {2014}
}

@inproceedings{faucris.122512544,
 abstract = {Recent investigations demonstrated that porosification of LTCC enables a local modification of material properties, in particular of the effective permittivity. These studies revealed that the electric field strength is highest at the lower conductor track edges on the porosified substrate. The local field strength peaks cause substantial influence on the effectiveness of permittivity reduction due near-surface material abrasions. In this paper, the influences of these randomly fringed boundaries on electromagnetic properties are investigated for different degrees of fraying. The gained results are compared to measured screen print edges by scanning electron microscope image analysis of co-fired and post-fired gold conductors, and will contribute for reliable circuit design on porous LTCC in the future. &copy; 2013 IEEE.},
 author = {Talai, Armin and Steinhäußer, Frank and Gmeiner, Björn and Bittner, Achim and Rüde, Ulrich and Schmid, Ulrich and Weigel, Robert and Kölpin, Alexander},
 booktitle = {Electromagnetics in Advanced Applications (ICEAA), 2013 International Conference on},
 date = {2013-09-09/2013-09-13},
 doi = {10.1109/ICEAA.2013.6632433},
 faupublication = {yes},
 keywords = {cofired; Conductor edge; field simulation; fringed microstrip; inhomogeneous substrate; LTCC; porosification; reduction of permittivity; structural modeling},
 note = {UnivIS-Import:2015-04-16:Pub.2013.tech.IMMD.lsinfs.electr{\_}6},
 pages = {1189-1192},
 peerreviewed = {Yes},
 title = {{Electromagnetic} {Analysis} of {Fringed} {Microstrip} {Lines} on {Porosified} {LTCC}},
 url = {http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=6632433&isnumber=6632166},
 venue = {Torino},
 year = {2013}
}

@inproceedings{faucris.121782364,
 abstract = {Recent investigations demonstrated that porosification offers the possibility of high quality antenna integration on LTCC by local reduction of the relative permittivity. In order to take advantage of this technology, further research for the deposition of conductors on porosified areas has to be done. Since the near-surface material removal is strong, conductors are expected to penetrate substantially into the porosified substrate. In this paper a new, randomized structural modeling technique of porosified LTCC is presented, which is based on scanning electron microscope imaging. Finite 3D field simulations, performed with this model, show the electromagnetic effects of different vertical microstrip positions on the effective permittivity and the corresponding scattering parameters. &copy; 2013 European Microwave Association.},
 address = {Nürnberg},
 author = {Talai, Armin and Weigel, Robert and Kölpin, Alexander and Gmeiner, Björn and Rüde, Ulrich and Steinhäußer, Frank and Bittner, Achim and Schmid, Ulrich},
 booktitle = {Microwave Conference (EUMC), 2013 European},
 date = {2013-10-07/2013-10-10},
 faupublication = {yes},
 keywords = {Field simulation; inhomogeneous substrate; LTCC; porosification; reduction of permittivity; structural modeling},
 note = {UnivIS-Import:2015-04-16:Pub.2013.tech.IMMD.lsinfs.electr{\_}9},
 pages = {440-443},
 peerreviewed = {Yes},
 publisher = {IEEE},
 title = {{Electromagnetic} field analysis with advanced structural modeling of microstrips on porosified {LTCC}},
 venue = {Nürnberg},
 year = {2013}
}

@article{faucris.122954304,
 abstract = {<span style="color: rgb(0, 0, 0); font-family: Arial, 'Lucida Grande', Geneva, Verdana, Helvetica, 'Lucida Sans Unicode', sans-serif; line-height: 18px;">A variety of geologic observations point to fast upper mantle flow that may exceed plate tectonic velocities by an order of magnitude. At the same time there is mounting evidence from seismology for flow-like structures in the upper 100&ndash;200&nbsp;km of the mantle. Here we present a set of geodynamic simulations to link these observations. In a synthetic setting, we include asthenospheric channels of varying thickness, with an extreme case of 100&nbsp;km, and a significant viscosity contrast of up to 4 orders of magnitude relative to the deeper mantle. Using our new global high-resolution code TERRA-NEO, we obtain an increase in velocity by a factor of 10 between a 1000&nbsp;km thick and the very thin channel, translating into velocities of &sim;&nbsp;20&nbsp;cm/a within the narrow asthenosphere. We further present and verify a simple Poiseuille flow model, predicting that the upper mantle velocity scales with the inverse of the asthenosphere thickness.</span>},
 author = {Weißmüller, Jens and Gmeiner, Björn and Ghelichkhan, Siavash and Huber, Markus and John, Lorenz and Wohlmuth, B. I. and Rüde, Ulrich and Bunge, Hans-Peter},
 doi = {10.1002/2015GL063727},
 faupublication = {yes},
 journal = {Geophysical Research Letters},
 keywords = {rheology;asthenosphere;mantle convection;high-performance computing},
 pages = {7429-7435},
 peerreviewed = {Yes},
 title = {{Fast} asthenosphere motion in high-resolution global mantle flow models},
 url = {http://onlinelibrary.wiley.com/wol1/doi/10.1002/2015GL063727/full},
 volume = {42},
 year = {2015}
}

@inproceedings{faucris.106413824,
 abstract = {In this article we consider the application of the Hierarchical Hybrid Grid Framework (HHG) to the geodynamical problem of simulating mantle convection. We describe the generation of a refined icosahedral grid and a further subdivision of the resulting prisms into tetrahedral elements. Based on this mesh, we present performance results for HHG and compare these to the also Finite Element program TERRA, which is a well-known code for mantle convection using a matrix-free representation of the stiffness matrix. In our analysis we consider the most time consuming part of TERRA's solution algorithm and evaluate it in a strong scaling setup. Finally we present strong and weak scaling results for HHG to verify its parallel concepts, algorithms and grid flexibility on Jugene. © 2012 IEEE.},
 author = {Gmeiner, Björn and Mohr, Marcus and Rüde, Ulrich},
 booktitle = {Proceedings of the 11th International Symposium on Parallel and  Distributed Computing},
 date = {2012-06-25/2012-06-29},
 doi = {10.1109/ISPDC.2012.49},
 editor = {FAU Erlangen},
 faupublication = {yes},
 isbn = {978-1-4673-2599-8},
 keywords = {HHG; Jugene; mantle-convection; TERRA},
 note = {UnivIS-Import:2015-04-16:Pub.2012.tech.IMMD.lsinfs.hierar{\_}5},
 pages = {309-314},
 peerreviewed = {Yes},
 title = {{Hierarchical} {Hybrid} {Grids} for {Mantle} {Convection}: {A} {First} {Study}},
 url = {http://www.computer.org/csdl/proceedings/ispdc/2012/4805/00/4805a309-abs.html},
 venue = {München},
 year = {2012}
}

@inproceedings{faucris.118255104,
 abstract = {<span style="color: rgb(0, 0, 0); font-family: Arial, Helvetica, sans-serif; font-size: 15.3333px;">Generating and analyzing the dynamics of molecular systems is a true challenge tomolecular simulation. It includes processes that happen on the femtosecond scale,such as photoinduced nonadiabatic (bio)chemical reactions, and touches the range ofseconds, being e.g. relevant in biophysics to cellular processes or in material sciencesto crack propagation. Thus, many orders of magnitude in time need to be covered eitherconcurrently or hierarchically. In the latest edition of this series of Winter Schools in 2009we addressed the topic of Multiscale Simulation Methods in Molecular Sciences with astrong focus on methods which cover diversities of length scales. The key issue of thepresent school is to dwell on hierarchical methods for dynamics having primarily in mindsystems described in terms of many atoms or molecules. One extreme end of relevant timescales is found in the sub-femtosecond range but which influence dynamical events whichare orders of magnitude slower. Examples for such phenomena might be photo-inducedswitching of individual molecules, which results in large-amplitude relaxation in liquidsor photodriven phase transitions of liquid crystals, phenomena for which nonadiabaticquantum dynamics methods were developed. The other end of relevant time scales isfound in a broad range of microseconds, seconds or beyond and which governs e.g.non-equilibrium dynamics in polymer flows or blood cells in complex geometries likemicrovessels. Special mesoscopic techniques are applied for these time- and length-scalesto couple the atomistic nature of particles to the hydrodynamics of flows....</span>},
 address = {Jülich},
 author = {Gmeiner, Björn and Gradl, Tobias and Köstler, Harald and Rüde, Ulrich},
 booktitle = {NIC Symposium 2012 - Proceedings},
 date = {2012-03-05/2012-03-09},
 faupublication = {yes},
 isbn = {978-3-89336-758-0},
 note = {UnivIS-Import:2015-04-16:Pub.2012.tech.IMMD.lsinfs.highly{\_}0},
 pages = {323-330},
 peerreviewed = {unknown},
 publisher = {FZ Jülich},
 series = {NIC Series},
 title = {{Highly} {Parallel} {Geometric} {Multigrid} {Algorithm} for {Hierarchical} {Hybrid} {Grids}},
 url = {http://hdl.handle.net/2128/4538},
 venue = {Jülich},
 volume = {45},
 year = {2012}
}

@incollection{faucris.123690204,
 abstract = {<span style="color: rgb(51, 51, 51); font-family: &quot;Helvetica Neue&quot;, Arial, Helvetica, sans-serif; font-size: 13px;">Even on modern supercomputer architectures, Earth mantle simulations are so compute intensive that they are considered grand challenge applications. The dominating roadblocks in this branch of Geophysics are model complexity and uncertainty in parameters and data, e.g., rheology and seismically imaged mantle heterogeneity, as well as the enormous space and time scales that must be resolved in the computational models. This article reports on a massively parallel all-at-once multigrid solver for the Stokes system as it arises in mantle convection models. The solver employs the hierarchical hybrid grids framework and demonstrates that a system with coupled velocity components and with more than a trillion (1.&thinsp;7 &sdot;&thinsp;10</span><span style="outline: 0px; font-size: 0.9rem; line-height: 1; vertical-align: text-top; color: rgb(51, 51, 51); font-family: &quot;Helvetica Neue&quot;, Arial, Helvetica, sans-serif;">12</span><span style="color: rgb(51, 51, 51); font-family: &quot;Helvetica Neue&quot;, Arial, Helvetica, sans-serif; font-size: 13px;">) degrees of freedom can be solved in about 1,000&thinsp;s using 40,960 compute cores of JUQUEEN. The simulation framework is used to investigate the influence of asthenosphere thickness and viscosity on upper mantle velocities in a static scenario. Additionally, results for a time-dependent simulation with a time-variable temperature-dependent viscosity model are presented.</span>},
 address = {Berlin, Heidelberg, New York},
 author = {Bauer, Simon and Bunge, Hans-Peter and Drzisga, Daniel Peter and Gmeiner, Björn and Huber, Markus and John, Lorenz and Mohr, Marcus and Rüde, Ulrich and Stengel, Holger and Waluga, Christian and Weißmüller, Jens and Wellein, Gerhard and Wittmann, Markus and Wohlmuth, B. I.},
 booktitle = {113},
 doi = {10.1007/978-3-319-40528-5{\_}10},
 editor = {Bungartz H., Neumann P., Nagel E.},
 faupublication = {yes},
 isbn = {978-3-319-40526-1},
 pages = {211-235},
 peerreviewed = {unknown},
 publisher = {Springer},
 series = {Lecture Notes in Computational Science and Engineering},
 title = {{Hybrid} {Parallel} {Multigrid} {Methods} for {Geodynamical} {Simulations}},
 url = {http://link.springer.com/chapter/10.1007%2F978-3-319-40528-5{\_}10},
 volume = {113},
 year = {2016}
}

@article{faucris.108338164,
 abstract = {In this work, we discuss a family of finite elemen t discretizations for the incompressible Stokes problem using continuous pressure approximations on simplicial meshes. We show that after a simple and cheap correction, the mass-fluxes obtained by the considered schemes preserve local conservation on dual cells without reducing the convergence order. This allows the direct coupling to vertex-centered finite volume discretizations of transport equations. Further, we can postprocess the mass fluxes independently for each dual box to obtain an elementwise conservative velocity approximation of optimal order that can be used in cell-centered finite volume or discontinuous Galerkin schemes. Numerical examples for stable and stabilized methods are given to support our theoretical findings. Moreover, we demonstrate the coupling to vertex- and cell-centered finite volume methods for advective transport.},
 author = {Gmeiner, Björn and Waluga, Christian and Wohlmuth, B. I.},
 doi = {10.1137/140959675},
 faupublication = {yes},
 journal = {SIAM Journal on Numerical Analysis},
 keywords = {Local mass conservation; Mixed finite elements; Stabilization; Stokes equations},
 note = {UnivIS-Import:2016-06-01:Pub.2014.tech.IMMD.lsinfs.localm},
 pages = {2931-2956},
 peerreviewed = {Yes},
 title = {{Local} mass-corrections for continuous pressure approximations of incompressible flow},
 url = {http://www.terraneo.fau.de/docs/gmeiner-waluga-wohlmuth-2014-mass-conservation.pdf},
 volume = {52},
 year = {2014}
}

@inproceedings{faucris.214091142,
 author = {Weißmüller, Jens and Gmeiner, Björn and Mohr, Marcus and Waluga, Christian and Wohlmuth, B. I. and Rüde, Ulrich and Bunge, Hans-Peter},
 booktitle = {EGU General Assembly Conference Abstracts},
 faupublication = {yes},
 pages = {10153},
 peerreviewed = {unknown},
 series = {EGU General Assembly Conference Abstracts},
 title = {{Mantle} convection on modern supercomputers},
 volume = {17},
 year = {2015}
}

@inproceedings{faucris.122071444,
 abstract = {In many applications, physical models consisting of a Stokes-type equation that is coupled to a convection-dominated transport equation play an important role, e.g., in mantle-convection or ice-sheet dynamics. In the iterative treatment of such problems the computational cost is usually dominated by the solution procedure for the Stokes part. Hence, we focus on massively scalable and fast multigrid solvers for the arising saddle point problem. To gain deeper insight into the performance characteristics, we evaluate the multigrid efficiency systematically and address the methodology of algorithmic resilience. Three methods based on the HHG software framework will be presented and are shown to solve FE systems with half a billion unknowns even on standard workstations. On petascale systems they furthermore exhibit excellent scalability. This together with the optimised performance on each node leads to superior supercomputing efficiency. Indefinite systems with up to ten trillion (1013) unknowns can be solved in less than 13 minutes compute time.},
 author = {Gmeiner, Björn and Huber, Markus and John, Lorenz and Rüde, Ulrich and Waluga, Christian and Wohlmuth, B. I.},
 booktitle = {NIC Symposium 2016 Proceedings},
 date = {2016-02-11/2016-02-12},
 editor = {Binder K, Müller M, Kremer M, Schnurpfeil A},
 faupublication = {yes},
 isbn = {978-3-95806-109-5},
 pages = {333-341},
 peerreviewed = {unknown},
 title = {{Massively} {Parallel} {Large} {Scale} {Stokes} {Flow} {Simulation}},
 url = {http://juser.fz-juelich.de/record/280633/files/NIC{\_}Series{\_}48.pdf},
 venue = {John von Neumann Institute for Computing (NIC), Jülich},
 year = {2016}
}

@article{faucris.114286084,
 author = {Gmeiner, Björn and Gradl, Tobias and Gaspar, Francisco and Rüde, Ulrich},
 doi = {10.1016/j.camwa.2012.12.006},
 faupublication = {yes},
 journal = {Computers & Mathematics with Applications},
 note = {UnivIS-Import:2015-03-09:Pub.2012.tech.IMMD.lsinfs.optimi{\_}5},
 pages = {694-711},
 peerreviewed = {unknown},
 title = {{Optimization} of the multigrid-convergence rate on semi-structured meshes by local {Fourier} analysis},
 url = {http://www.sciencedirect.com/science/article/pii/S089812211200702X},
 volume = {65},
 year = {2012}
}

@inproceedings{faucris.113641264,
 abstract = {This paper presents modeling, forward simulation, and optimization of different opening strategies in the real-time strategy game Starcraft 2. We implemented an event-driven simulator in C# with graphical user interface. In order to find optimal build orders, we employ a modified version of the multi-objective genetic algorithm NSGA II. Procedural constraints e.g. given by the tech-tree or other game mechanisms, are implicitly encoded into the chromosomes. Additionally, the size of the active part of the chromosomes is not known a priori, and the objectives values have a small diversity. The model was tested on different Tech-Pushes and Rushes, and validated with empirical data of expert Starcraft 2 players. © Springer-Verlag London 2012.},
 address = {London},
 author = {Gmeiner, Björn and Donnert, Gerald and Köstler, Harald},
 booktitle = {Research and Development in Intelligent Systems XXIX},
 date = {2012-12-11/2012-12-13},
 doi = {10.1007/978-1-4471-4739-8-28},
 faupublication = {yes},
 isbn = {978-1-4471-4739-8},
 note = {UnivIS-Import:2015-04-16:Pub.2012.tech.IMMD.lsinfs.optimi{\_}7},
 pages = {361-374},
 publisher = {Springer},
 title = {{Optimizing} {Opening} {Strategies} in a {Real}-time {Strategy} {Game} by a {Multi}-objective {Genetic} {Algorithm}},
 url = {http://www.springer.com/computer/ai/book/978-1-4471-4738-1},
 venue = {Cambridge, England},
 year = {2012}
}

@article{faucris.113082904,
 abstract = {This article studies the performance and scalability of a geometric multigrid solver implemented within the hierarchical hybrid grids (HHG) software package on current high performance computing clusters up to nearly 300,000 cores. HHG is based on unstructured tetrahedral finite elements that are regularly refined to obtain a block-structured computational grid. One challenge is the parallel mesh generation from an unstructured input grid that roughly approximates a human head within a 3D magnetic resonance imaging data set. This grid is then regularly refined to create the HHG grid hierarchy. As test platforms, a BlueGene/P cluster located at Jülich supercomputing center and an Intel Xeon 5650 cluster located at the local computing center in Erlangen are chosen. To estimate the quality of our implementation and to predict runtime for the multigrid solver, a detailed performance and communication model is developed and used to evaluate the measured single node performance, as well as weak and strong scaling experiments on both clusters. Thus, for a given problem size, one can predict the number of compute nodes that minimize the overall runtime of the multigrid solver. Overall, HHG scales up to the full machines, where the biggest linear system solved on Jugene had more than one trillion unknowns. Copyright © 2012 John Wiley & Sons, Ltd.},
 author = {Gmeiner, Björn and Köstler, Harald and Stürmer, Markus and Rüde, Ulrich},
 doi = {10.1002/cpe.2968},
 faupublication = {yes},
 journal = {Concurrency and Computation-Practice & Experience},
 keywords = {finite elements; HHG; HPC cluster; Jugene; multigrid; performance model},
 note = {UnivIS-Import:2015-03-09:Pub.2012.tech.IMMD.lsinfs.parall{\_}6},
 pages = {217-240},
 peerreviewed = {Yes},
 title = {{Parallel} multigrid on  hierarchical hybrid grids: a performance study on current high performance computing clusters},
 url = {http://onlinelibrary.wiley.com/doi/10.1002/cpe.2968/pdf},
 volume = {26},
 year = {2012}
}

@article{faucris.117707524,
 abstract = {This article studies the performance and scalability of a geometric multigrid solver implemented within the hierarchical hybrid grids (HHG) software package on current high performance computing clusters up to nearly 300,000 cores. HHG is based on unstructured tetrahedral finite elements that are regularly refined to obtain a block-structured computational grid. One challenge is the parallel mesh generation from an unstructured input grid that roughly approximates a human head within a 3D magnetic resonance imaging data set. This grid is then regularly refined to create the HHG grid hierarchy. As test platforms, a BlueGene/P cluster located at J&uuml;lich supercomputing center and an Intel Xeon 5650 cluster located at the local computing center in Erlangen are chosen. To estimate the quality of our implementation and to predict runtime for the multigrid solver, a detailed performance and communication model is developed and used to evaluate the measured single node performance, as well as weak and strong scaling experiments on both clusters. Thus, for a given problem size, one can predict the number of compute nodes that minimize the overall runtime of the multigrid solver. Overall, HHG scales up to the full machines, where the biggest linear system solved on Jugene had more than one trillion unknowns. Copyright &copy; 2012 John Wiley &amp; Sons, Ltd.},
 author = {Gmeiner, Björn and Köstler, Harald and Stürmer, Markus and Rüde, Ulrich},
 doi = {10.1002/cpe.2968},
 faupublication = {yes},
 journal = {Concurrency and Computation-Practice & Experience},
 keywords = {finite elements; HHG; HPC cluster; Jugene; multigrid; performance model},
 note = {UnivIS-Import:2015-03-09:Pub.2014.tech.IMMD.lsinfs.parall{\_}4},
 pages = {217-240},
 peerreviewed = {Yes},
 title = {{Parallel} multigrid on hierarchical hybrid grids: a performance {Study} on current high performance computing clusters},
 url = {http://onlinelibrary.wiley.com/doi/10.1002/cpe.2968/pdf},
 volume = {26},
 year = {2014}
}

@article{faucris.122301784,
 abstract = {This article studies the performance and scalability of a geometric multigrid solver implemented within the hierarchical hybrid grids (HHG) software package on current high performance computing clusters up to nearly 300, 000 cores. HHG is based on unstructured tetrahedral finite elements that are regularly refined to obtain a block-structured computational grid. One challenge is the parallel mesh generation from an unstructured input grid that roughly approximates a human head within a 3D magnetic resonance imaging data set. This grid is then regularly refined to create the HHG grid hierarchy. As test platforms, a BlueGene/P cluster located at J&uuml;lich supercomputing center and an Intel Xeon 5650 cluster located at the local computing center in Erlangen are chosen. To estimate the quality of our implementation and to predict runtime for the multigrid solver, a detailed performance and communication model is developed and used to evaluate the measured single node performance, as well as weak and strong scaling experiments on both clusters. Thus, for a given problem size, one can predict the number of compute nodes that minimize the overall runtime of the multigrid solver. Overall, HHG scales up to the full machines, where the biggest linear system solved on Jugene had more than one trillion unknowns.},
 author = {Gmeiner, Björn and Köstler, Harald and Stürmer, Markus and Rüde, Ulrich},
 doi = {10.1002/cpe.3557},
 faupublication = {yes},
 journal = {Concurrency and Computation-Practice & Experience},
 keywords = {HHG; multigrid; finite elements; Jugene; HPC cluster; performance model},
 pages = {2369-2369},
 peerreviewed = {Yes},
 title = {{Parallel} multigrid on hierarchical hybrid grids: a performance study on current high performance computing clusters (vol 26, pg 217, 2014)},
 url = {http://onlinelibrary.wiley.com/doi/10.1002/cpe.2968/pdf},
 volume = {27},
 year = {2015}
}

@article{faucris.115286204,
 abstract = {In many applications involving incompressible fluid flow, the Stokes system plays an important role. Complex flow problems may require extremely fine resolutions, easily resulting in saddle-point problems with more than a trillion (1012) unknowns. Even on the most advanced supercomputers, the fast solution of such systems of equations is a highly nontrivial and challenging task. In this work we consider a realization of an iterative saddle-point solver which is based mathematically on the Schur-complement formulation of the pressure and algorithmically on the abstract concept of hierarchical hybrid grids. The design of our fast multigrid solver is guided by an innovative performance analysis for the computational kernels in combination with a quantification of the communication overhead. Excellent node performance and good scalability to almost a million parallel threads are demonstrated on different characteristic types of modern supercomputers.},
 author = {Gmeiner, Björn and Rüde, Ulrich and Stengel, Holger and Waluga, Christian and Wohlmuth, B. I.},
 doi = {10.1137/130941353},
 faupublication = {yes},
 journal = {SIAM Journal on Scientific Computing},
 keywords = {hierarchical hybrid grids; multigrid methods; parallel solver; node performance; Stokes system},
 note = {UnivIS-Import:2015-04-14:Pub.2015.tech.IMMD.lsinfs.perfor},
 pages = {C143 - C 168},
 peerreviewed = {Yes},
 title = {{Performance} and {Scalability} of {Hierarchical} {Hybrid} {Multigrid} {Solvers} for {Stokes} {Systems}},
 url = {http://epubs.siam.org/doi/pdf/10.1137/130941353},
 volume = {37},
 year = {2015}
}

@incollection{faucris.107999364,
 abstract = {In this article we present a performance study of our finite element package Hierarchical Hybrid Grids (HHG) on current European supercomputers. HHG is designed to close the gap between the flexibility of finite elements and the efficiency of geometric multigrid by using a compromise between structured and unstructured grids. A coarse input finite element mesh is refined in a structured way, resulting in semi-structured meshes. Within this article we compare and analyze the efficiencies of the stencil-based code on those clusters. © 2014 Springer-Verlag.},
 address = {Berlin, Heidelberg, New York},
 author = {Gmeiner, Björn and Rüde, Ulrich and Wasniewski, J.},
 booktitle = {Large-Scale Scientific Computing},
 doi = {10.1007/978-3-662-43880-0},
 faupublication = {yes},
 isbn = {978-3-662-43879-4},
 keywords = {HHG; Parallel multigrid; Performance analysis},
 note = {UnivIS-Import:2015-04-20:Pub.2014.tech.IMMD.lsinfs.petasc},
 pages = {439-447},
 peerreviewed = {Yes},
 publisher = {Springer},
 series = {Lecture Notes in Computer Science},
 title = {{Peta}-{Scale} {Hierarchical} {Hybrid} {Multigrid} {Using} {Hybrid} {Parallelization}},
 url = {http://link.springer.com/chapter/10.1007/978-3-662-43880-0{\_}50},
 volume = {8353},
 year = {2014}
}

@article{faucris.111744644,
 abstract = {Fault tolerant massively parallel multigrid methods for elliptic partial differential equations are a step towards resilient solvers. Here, we combine domain partitioning with geometric multigrid methods to obtain fast and fault-robust solvers for three-dimensional problems. The recovery strategy is based on the redundant storage of ghost values, as they are commonly used in distributed memory parallel programs. In the case of a fault, the redundant interface values can be easily recovered, while the lost inner unknowns are recomputed approximately with recovery algorithms using multigrid cycles for solving a local Dirichlet problem. Different strategies are compared and evaluated with respect to performance, computational cost, and speedup. Especially effective are asynchronous strategies combining global solves with accelerated local recovery. By this, multiple faults can be fully compensated with respect to both the number of iterations and run-time. For illustration, we use a state-of-the-art petascale supercomputer to study failure scenarios when solving systems with up to 6 &middot; 1011 (0.6 trillion) unknowns.},
 author = {Huber, Markus and Gmeiner, Björn and Rüde, Ulrich and Wohlmuth, B. I.},
 doi = {10.1137/15M1026122},
 faupublication = {yes},
 journal = {SIAM Journal on Scientific Computing},
 keywords = {fault tolerant algorithms; massively parallel and asynchronous multigrid},
 pages = {217-239},
 peerreviewed = {Yes},
 title = {{Resilience} for {Massively} {Parallel} {Multigrid} {Solvers}},
 url = {http://epubs.siam.org/doi/pdf/10.1137/15M1026122},
 volume = {38},
 year = {2016}
}

@misc{faucris.107747464,
 abstract = {The computational complexity of naive, sampling-based uncertainty quantification for 3D partial differential equations is extremely high. Multilevel approaches, such as multilevel Monte Carlo (MLMC), can reduce the complexity significantly, but to exploit them fully in a parallel environment, sophisticated scheduling strategies are needed. Often fast algorithms that are executed in parallel are essential to compute fine level samples in 3D, whereas to compute individual coarse level samples only moderate numbers of processors can be employed efficiently. We make use of multiple instances of a parallel multigrid solver combined with advanced load balancing techniques. In particular, we optimize the concurrent execution across the three layers of the MLMC method: parallelization across levels, across samples, and across the spatial grid. The overall efficiency and performance of these methods will be analyzed. Here the &rdquo;scalability window&rdquo; of the multigrid solver is revealed as being essential, i.e., the property that the solution can be computed with a range of process numbers while maintaining good parallel efficiency. We evaluate the new scheduling strategies in a series of numerical tests, and conclude the paper demonstrating large 3D scaling experiments.},
 author = {Rüde, Ulrich and Drzisga, Daniel Peter and Gmeiner, Björn and Wohlmuth, B. I. and Scheichl, Robert},
 faupublication = {yes},
 peerreviewed = {automatic},
 title = {{Scheduling} massively parallel multigrid for multilevel {Monte} {Carlo} methods},
 url = {https://arxiv.org/pdf/1607.03252v1.pdf},
 year = {2016}
}

@article{faucris.107428904,
 abstract = {<span style="color: rgb(89, 89, 89); font-family: &quot;Noto Sans&quot;, Helvetica, Roboto, Arial, sans-serif; font-size: 13px;">In this work, we extend Achi Brandt&#39;s notion of textbook multigrid efficiency (TME) to massively parallel algorithms. Using a finite element based geometric multigrid implementation, we recall the classical view on TME with experiments for scalar linear equations with constant and varying coefficients as well as linear systems with saddle-point structure. To extend the idea of TME to the parallel setting, we give a new characterization of a work unit (WU) in an architecture-aware fashion by taking into account performance modeling techniques. We illustrate our newly introduced parallel TME measure by large-scale computations, solving problems with up to 200 billion unknowns on a TOP-10 supercomputer.</span>},
 author = {Gmeiner, Björn and Rüde, Ulrich and Stengel, Holger and Waluga, Christian and Wohlmuth, B. I.},
 doi = {10.4208/nmtma.2015.w10si},
 faupublication = {yes},
 journal = {Numerical Mathematics-Theory Methods and Applications},
 note = {UnivIS-Import:2015-04-14:Pub.2015.tech.IMMD.lsinfs.toward},
 pages = {22-46},
 peerreviewed = {Yes},
 title = {{Towards} {Textbook} {Efficiency} for {Parallel} {Multigrid}},
 url = {http://journals.cambridge.org/abstract{\_}S100489791500001X},
 volume = {8},
 year = {2015}
}

@article{faucris.108835584,
 author = {Gmeiner, Björn and Köstler, Harald and Rüde, Ulrich},
 faupublication = {yes},
 journal = {GAMM-Mitteilungen},
 note = {UnivIS-Import:2015-03-09:Pub.2013.tech.IMMD.lsinfs.wievie},
 pages = {18-23},
 peerreviewed = {unknown},
 title = {{Wie} viel {Unbekannte} hat das größte {Gleichungssystem}, das man heute lösen kann?},
 url = {http://www.gamm-ev.de/pdf{\_}file/GAMM{\_}1{\_}2013{\_}web.pdf},
 volume = {1},
 year = {2013}
}