% Encoding: UTF-8
@COMMENT{BibTeX export based on data in FAU CRIS: https://cris.fau.de/}
@COMMENT{For any questions please write to cris-support@fau.de}
@inproceedings{faucris.106810044,
abstract = {
Programming image processing algorithms on hardware accelerators such as graphics processing units (GPUs) often exhibits a trade-off between software portability and performance portability. Domain-specific languages (DSLs) have proven to be a promising remedy, which enable optimizations and generation of efficient code from a concise, high-level algorithm representation. The scope of this paper is an optimization framework for image processing DSLs in the form of a source-to-source compiler. To cope with the inter-kernel communication bound via global memory for GPU applications, kernel fusion is investigated as a primary optimization technique to improve temporal locality. In order to enable automatic kernel fusion, we analyze the fusibility of each kernel in the algorithm, in terms of data dependencies, resource utilization, and parallelism granularity. By combining the obtained information with the domain-specific knowledge captured in the DSL, a method to automatically fuse the suitable kernels is proposed and integrated into an open source DSL framework. The novel kernel fusion technique is evaluated on two filter-based image processing applications, for which speedups of up to 1.60 are obtained for an NVIDIA Geforce 745 graphics card target.
},
author = {Qiao, Bo and Reiche, Oliver and Hannig, Frank and Teich, Jürgen},
booktitle = {Proceedings of the 21th International Workshop on Software and Compilers for Embedded Systems (SCOPES)},
date = {2018-05-28/2018-05-30},
doi = {10.1145/3207719.3207723},
faupublication = {yes},
isbn = {978-1-4503-5780-7},
keywords = {Domain-specific Languages; Image Processing; Kernel Fusion},
pages = {76-85},
peerreviewed = {Yes},
title = {{Automatic} {Kernel} {Fusion} for {Image} {Processing} {DSLs}},
venue = {Sankt Goar},
year = {2018}
}