A resampling scheme is proposed for use with
Sequential Monte Carlo (SMC)-based Probability Hypothesis Density(PHD)
filters. It consists of two steps, first, regions of interest are
identified, then an evolutionary resampling is applied for each region.
Applying resampling locally corresponds to treating each target
individually, while the evolutionary resampling introduces a memory to a
group of particles, increasing the robustness of the estimation against
noise outliers. The proposed approach is compared to the original
SMC-PHD filter for tracking multiple targets in a deterministically
moving targets scenario, and a noisy motion scenario. In both cases, the
proposed approach provides more accurate estimates.

},
author = {Halimeh, MHD Modar and Brendel, Andreas and Kellermann, Walter},
booktitle = {European Signal Processing Conference (EUSIPCO)},
date = {2018-09-03/2018-09-07},
faupublication = {yes},
isbn = {978-90-827970-1-5},
keywords = {Multi-target tracking; nonlinear systems; Particle filters},
pages = {647-651},
peerreviewed = {Yes},
title = {{Evolutionary} resampling for multi-target tracking using probability hypothesis density filter},
venue = {Rome},
year = {2018}
}
@inproceedings{faucris.201250445,
abstract = {We propose a method for estimating the distance between a sound source and a pair of recording microphones. The developed algorithm operates in the short-time Fourier transform domain and is based on estimates of the coherent-to-diffuse power ratio, which provides a measure for the amount of reverberation in each time-frequency bin. For a direct use of these estimates, precise knowledge on the room characteristics is necessary, which is in practice usually not available and hard to obtain. Therefore, we use a learning-based method, which adapts to the characteristics of the room in a training phase and estimates the source-microphone distance in a testing phase. The experiments comprise various setups with simulated and real data. It is shown that the proposed method generalizes well for different microphone positions and works robustly for different source signals, directions of arrival, reverberation times, and signal observation intervals. This leads to a high estimation accuracy at a low computational complexity with a small amount of training data.

},
author = {Brendel, Andreas and Kellermann, Walter},
booktitle = {IEEE International Conference on Acoustic, Speech and Signal Processing (ICASSP)},
date = {2018-04-15/2018-04-20},
faupublication = {yes},
isbn = {978-1-5386-4658-8},
keywords = {Acoustic range estimation; Learning; Coherent-to-Diffuse Power Ratio},
pages = {61-65},
peerreviewed = {Yes},
title = {{Learning}-based acoustic source-microphone distance estimation using the coherent-to-diffuse power ratio},
venue = {Calgary},
year = {2018}
}
@inproceedings{faucris.201282638,
abstract = {In this paper, we present a speech enhancement method using two microphones for underdetermined situations. A conventional speech enhancement method for underdetermined situations is time-frequency masking, where speech is enhanced by multiplying zero or one to each time-frequency component appropriately. Extending this method, we switch multiple preconstructed beamformers at each time-frequency bin, each of which suppresses a particular interferer. This method can suppress an interferer even when both the target and an interferer are simultaneously active at a given time-frequency bin. As a switching criterion, selection of minimum value of the outputs of the all beamformers at each time-frequency bin is investigated. Additionally, another method using direction of arrival estimation is also investigated. In experiments, we confirmed that the proposed methods were superior to conventional time-frequency masking and fixed beamforming in the performance of speech enhancemen},
author = {Yamaoka, Kouei and Brendel, Andreas and Ono, Nobutaka and Makino, Shoji and Bürger, Michael and Yamada, Takeshi and Kellermann, Walter},
booktitle = {European Signal Processing Conference (EUSIPCO)},
date = {2018-09-03/2018-09-07},
faupublication = {yes},
isbn = {978-90-827970-1-5},
pages = {1596-1600},
peerreviewed = {Yes},
title = {{Time}-frequency-bin-wise beamformer selection and masking for speech enhancement in underdetermined noisy scenarios},
venue = {Rome},
year = {2018}
}
@inproceedings{faucris.200964036,
abstract = {A new maximum likelihood (ML) estimator for the blind estimation of the reverberation time (RT) is derived. In contrast to previously proposed ML-based reverberation time estimators, the RT estimate is obtained by a simple closed-form expression, which leads to significant computational savings. Moreover, it is shown that the new estimator is unbiased and reaches the Cramer-Rao lower bound.

The proposed RT estimator achieves a similar estimation accuracy but involves a significantly lower computational complexity compared to an ML-based RT estimator that scored among the best at the ACE Challenge.

}, author = {Löllmann, Heinrich and Brendel, Andreas and Kellermann, Walter}, booktitle = {European Signal Processing Conference (EUSIPCO)}, date = {2018-09-03/2018-09-07}, faupublication = {yes}, isbn = {978-90-827970-1-5}, keywords = {Reverberation time, blind estimation, ML estimation}, pages = {1-5}, peerreviewed = {unknown}, title = {{Efficient} {ML}-{Estimator} for {Blind} {Reverberation} {Time} {Estimation}}, venue = {Rome}, year = {2018} } @inproceedings{faucris.201278650, abstract = {We propose an approach for tracking a varying number of simultaneously active acoustic wideband signal sources in an acoustic enclosure. Relying on the assumption of W-disjoint orthogonality, the method uses narrowband position estimates of the sources for the targets. The instantaneous position estimates form clusters rather than single points, as would be required for a conventional Probability Hypothesis Density (PHD) filter. Therefore, we model the position estimates as extended targets and use a special form of the PHD filter, the extended target Gaussian mixture PHD filter, for tracking the targets. This allows to model target birth and death, which correspond to speech onset and end of utterance, respectively. With this model and by using the well-developed theory of Finite Set Statistics (FISST)-based multi-target tracking, we provide a comprehensive, strictly Bayesian treatment of the problem of tracking wideband acoustic sources using narrowband position estimates. We validate the results by tracking a varying number of targets in an enclosure simulated with the image-source metho}, author = {Brendel, Andreas and Kellermann, Walter}, booktitle = {IEEE Sensor Array and Multichannel Signal Processing Workshop (SAM)}, date = {2018-07-08/2018-07-11}, faupublication = {yes}, isbn = {978-1-5386-4752-3/}, pages = {400-404}, peerreviewed = {Yes}, title = {{Tracking} of multiple sources in an acoustic sensor network using an extended {Gaussian} mixture {PHD} filter}, venue = {Sheffield}, year = {2018} } @inproceedings{faucris.211436290, abstract = {This paper investigates localization of an arbitrary number of simultaneously active speakers in an acoustic enclosure. We propose an algorithm capable of estimating the number of speakers, using reliability information to obtain robust estimation results in adverse acoustic scenarios and estimating individual probability distributions describing the position of each speaker using convex geometry tools.

To this end, we start from an established algorithm for localization of acoustic sources based on the EM algorithm. There, the estimation of the number of sources as well as the handling of reverberation

has not been addressed sufficiently. We show improvement in the localization of a higher number of sources and in the robustness in adverse conditions including interference from competing speakers, reverberation and noise.

A range estimation method relying on distributed training in an Acoustic Sensor Network (ASN) is proposed. The relation between the estimated Coherent-to-Diffuse Power Ratio (CDR) which is used as feature and the range of an acoustic source is learned by Gaussian

Process (GP) regression. To this end multiple sensor nodes, each equipped with two microphones are distributed over the area of interest delivering multiple observations of the feature and extending the amount of training data significantly compared to the single node

case. However, the computational power of the sensor nodes in an ASN is usually limited and a transmission of the data to a fusion center is infeasible due to constraints of the transmit power and because such a system would fail if transmission drop outs occur or the fusion center breaks. Hence, we aim at a completely distributed algorithm which is as exact as a corresponding centralized version, computationally simple and update-based, i.e., all sensor nodes have to fulfill exactly the same role in the algorithm. The efficacy of the proposed method is shown by a simulation study.

},
author = {Brendel, Andreas and Kellermann, Walter},
booktitle = {International Workshop on Acoustic Signal Enhancement (IWAENC)},
date = {2018-09-17/2018-09-20},
faupublication = {yes},
isbn = {978-1-5386-8151-0},
keywords = {Acoustic range estimation; Acoustic sensor networks; Online Gaussian process regression},
pages = {246-250},
peerreviewed = {Yes},
title = {{Distance} estimation of acoustic sources using the coherent-to-diffuse power ratio based on distributed training},
venue = {Tokyo},
year = {2018}
}
@article{faucris.211436983,
author = {Brendel, Andreas and Kellermann, Walter},
doi = {10.1109/JSTSP.2019.2900911},
faupublication = {yes},
journal = {IEEE Journal of Selected Topics in Signal Processing},
peerreviewed = {Yes},
title = {{Distributed} {Source} {Localization} in {Acoustic} {Sensor} {Networks} using the {Coherent}-to-{Diffuse} {Power} {Ratio}},
year = {2019}
}
@inproceedings{faucris.119637144,
abstract = {In this contribution, we propose a new localization approach for multiple simultaneously active sound sources using acoustic sensor networks (ASNs). It is based on the averaged directivity pattern (ADP) approach, which explicitly models the influence of reverberation on its direction of arrival (DOA) estimates and has shown to be robust against additive noise and reverberation. We develop a framework capable of handling prior information, the integration of several DOA estimators and reliability information. In particular, we derive range information from the estimated ADP-energy map and use a contribution removal technique to tackle the problem of ghost sources. Experiments with simulated room impulse responses show theefficacy of this approac}, author = {Brendel, Andreas and Kellermann, Walter}, booktitle = {IEEE 7th International Workshop on Computational Advances in Multi-Sensor Adaptive Processing (CAMSAP)}, date = {2017-12-10/2017-12-13}, doi = {10.1109/CAMSAP.2017.8313167}, faupublication = {yes}, isbn = {978-1-5386-1251-4}, keywords = {Acoustic Sensor Networks; Localization; Averaged Directivity Pattern}, pages = {1-5}, peerreviewed = {Yes}, title = {{Localization} of multiple simultaneously active sources in acoustic sensor networks using {ADP}}, venue = {Curaçao}, year = {2017} } @inproceedings{faucris.203410840, abstract = {The real-time capability of the TRINICON (TRIple-N-

Independent component analysis for CONvolutive mix-

tures) framework for Blind Source Separation (BSS) is not

guaranteed for devices of low computational power like

the ones typically used in sensor networks. In order to

ensure real-time capability in this case, as many algorith-

mic parts as possible are distributed on the available nodes.

This should lead to an optimal resource exploitation for a

given network model which accounts for the data rates and

latencies of the links between the nodes and for the compu-

tational power of the nodes themselves. The simulation re-

sults for homogeneous and heterogeneous networks show

that TRINICON-BSS is still possible even if the adaptation

exploits only parts of the originally intended signal.

The idea of performing dereverberation using a short-time spatial coherence estimate dates back to 1977 [1], when it was proposed to essentially use the magnitude of the coherence as gain for reverberation suppression. Another heuristic method was recently proposed in [2], where a soft threshold function is used to compute a gain from the coherence magnitude, and the parameters of the threshold function are adapted depending on the histogram of the coherence magnitude in each frequency bin. Short-time coherence estimates have also been investigated in the context of beamforming as a so-called postfilter, and solutions for supression of uncorrelated and diffuse noise have been proposed [3]. In this contribution, we focus on methods where, first, the ratio between direct and reverberation signal components (coherent-to-diffuse ratio, CDR) is estimated from a short-time coherence estimate, and filter weights for reverberation suppression are computed from the CDR using, e.g., the Wiener filter or spectral subtraction rule. We compare and illustrate the behavior of a number of different CDR estimators that have been proposed over the past years, and propose a new variant. Finally, we compare the practical effect of the methods by processing reverberated speech and evaluating the recognition accuracy achieved by an automatic speech recognizer with the processed signals.}, address = {Oldenburg, Germany}, author = {Schwarz, Andreas and Brendel, Andreas and Kellermann, Walter}, booktitle = {Deutsche Jahrestagung für Akustik (DAGA)}, date = {2014-03-10/2014-03-13}, faupublication = {yes}, pages = {525-526}, peerreviewed = {Yes}, title = {{Coherence}-based dereverberation for automatic speech recognition}, venue = {Oldenburg}, year = {2014} } @inproceedings{faucris.201281925, abstract = {A distributed learning-based algorithm for the localization of acoustic sources in an acoustic sensor network is proposed. It is based on estimates of the Coherent-to-Diffuse Power Ratio (CDR), which serve as feature for the source-microphone distance, i.e., the range. The relation between the estimated CDR and the range is learned by using Gaussian processes for non-parametric regression. The range estimates obtained from evaluating the regression function are fused by a weighted least squares estimation, which is implemented recursively, allowing for a distributed version of the algorithm. The resulting method is computationally efficient, works in highly reverberant and noisy scenarios and needs only a small amount of data shared over the network. The training phase of the algorithm requires only a few labeled observations. We show the efficacy of the approach with data obtained from image-source simulatio}, author = {Brendel, Andreas and Kellermann, Walter}, booktitle = {European Signal Processing Conference (EUSIPCO)}, date = {2018-09-03/2018-09-07}, faupublication = {yes}, isbn = {978-90-827970-1-5}, keywords = {Coherent-to-Diffuse Power Ratio; Gaussian Process Regression; Weighted Least Squares; Distributed Algorithm; Acoustic Sensor Network; Localization}, pages = {1586-1590}, peerreviewed = {Yes}, title = {{Learning}-based acoustic source localization in acoustic sensor networks using the coherent-to-diffuse power ratio}, venue = {Rome}, year = {2018} } @inproceedings{faucris.201250061, abstract = {

Many algorithms for localizing, tracking or Direction of Arrival (DOA) estimation of speech sources, rely on the so-called W-disjoint orthogonality, i.e., only one speaker is assumed to be active at a certain time-frequency bin. Based on this assumption, bin-wise DOA estimates can be computed from pairwise phase differences of each time-frequency bin and clustered afterwards. Averaging the estimates of each cluster, i.e., computing the cluster centroids, increases the robustness of the localization estimate. However, clustering can be computationally demanding due to the large amount of DOA estimates, and at the same time highly sensitive to errors as potentially many of them may not be reliable due to noise and reverberation. Therefore, an efficient selection algorithm for reliable Short-Time Fourier Transform (STFT) bins is desirable that aims at increasing the accuracy of the estimate while simultaneously reducing the computational complexity. In this contribution, we investigate different selection methods for STFT bins as suitable for localization algorithms for speech sources, which are based on the W-disjoint orthogonality, and exploit bin-wise speech signal power, Coherent-to-Diffuse Power Ratio (CDR), and Speech Presence Probability (SPP). The effectiveness of the selection processes is studied for different localization algorithms.

},
author = {Brendel, Andreas and Huang, Chengyu and Kellermann, Walter},
booktitle = {EURONOISE 2018},
date = {2018-05-27/2018-05-31},
faupublication = {yes},
pages = {2561-2568},
peerreviewed = {unknown},
title = {{STFT} bin selection for localization algorithms based on the sparsity of speech signal spectra},
venue = {Heraklion, Crete},
year = {2018}
}