Publications

  1. Ben Maman, Meinard Müller, Johannes Zeitler, and Amit Bermano
    Performance Conditioning for Diffusion-Based Multi-Instrument Music Synthesis
    In Proceedings of the IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), 2024.
    @inproceedings{faucris.318741779,
    address = {Seoul, Korea},
    author = {Ben Maman and Meinard Müller and Johannes Zeitler and Amit Bermano},
    booktitle = {Proceedings of the IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    faupublication = {yes},
    peerreviewed = {Yes},
    title = {{Performance} {Conditioning} for {Diffusion}-{Based} {Multi}-{Instrument} {Music} {Synthesis}},
    year = {2024}
    }
  2. Johannes Zeitler, Michael Krause, and Meinard Müller
    Soft Dynamic Time Warping with Variable Step Weights
    In Proceedings of the IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP), 2024.
    @inproceedings{faucris.318554622,
    address = {Seoul, Korea},
    author = {Johannes Zeitler and Michael Krause and Meinard Müller},
    booktitle = {Proceedings of the IEEE International Conference on Acoustics, Speech, and Signal Processing (ICASSP)},
    faupublication = {yes},
    peerreviewed = {Yes},
    title = {{Soft} {Dynamic} {Time} {Warping} with {Variable} {Step} {Weights}},
    year = {2024}
    }
  3. Johannes Zeitler, Simon Deniffel, Michael Krause, and Meinard Müller
    Stabilizing Training with Soft Dynamic Time Warping: A Case Study for Pitch Class Estimation with Weakly Aligned Targets
    In Proceedings of the 24th International Society for Music Information Retrieval Conference, 2023. DOI
    @inproceedings{faucris.307044861,
    abstract = {Soft dynamic time warping (SDTW) is a differentiable loss function that allows for training neural networks from weakly aligned data. Typically, SDTW is used to iteratively compute and refine soft alignments that compensate for temporal deviations between the training data and its weakly annotated targets. One major problem is that a mismatch between the estimated soft alignments and the reference alignments in the early training stage leads to incorrect parameter updates, making the overall training procedure unstable. In this paper, we investigate such stability issues by considering the task of pitch class estimation from music recordings as an illustrative case study. In particular, we introduce and discuss three conceptually different strategies (a hyperparameter scheduling, a diagonal prior, and a sequence unfolding strategy) with the objective of stabilizing intermediate soft alignment results. Finally, we report on experiments that demonstrate the effectiveness of the strategies and discuss efficiency and implementation issues.
  4. Andreas Brendel, Johannes Zeitler, and Walter Kellermann
    Manifold learning-supported estimation of relative transfer functions for spatial filtering
    In Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP), 2022. DOI
    @inproceedings{faucris.268675201,
    abstract = {Many spatial filtering algorithms used for voice capture in, e.g.,
    teleconferencing applications, can benefit from or even rely on knowledge of
    Relative Transfer Functions (RTFs). Accordingly, many RTF estimators have been
    proposed which, however, suffer from performance degradation under acoustically
    adverse conditions or need prior knowledge on the properties of the interfering
    sources. While state-of-the-art RTF estimators ignore prior knowledge about the
    acoustic enclosure, audio signal processing algorithms for teleconferencing
    equipment are often operating in the same or at least a similar acoustic
    enclosure, e.g., a car or an office, such that training data can be collected.
    In this contribution, we use such data to train Variational Autoencoders (VAEs)
    in an unsupervised manner and apply the trained VAEs to enhance imprecise RTF
    estimates. Furthermore, a hybrid between classic RTF estimation and the trained
    VAE is investigated. Comprehensive experiments with real-world data confirm the
    efficacy for the proposed method.},
    author = {Andreas Brendel and Johannes Zeitler and Walter Kellermann},
    booktitle = {Proceedings of the IEEE International Conference on Acoustics, Speech and Signal Processing (ICASSP)},
    date = {2022-05-07/2022-05-27},
    doi = {10.1109/icassp43922.2022.9746045},
    faupublication = {yes},
    peerreviewed = {Yes},
    title = {{Manifold} learning-supported estimation of relative transfer functions for spatial filtering},
    venue = {Singapur},
    year = {2022}
    }
  5. Sebastian Lotter, Maximilian Schäfer, Johannes Zeitler, and Robert Schober
    Saturating Receiver and Receptor Competition in Synaptic DMC: Deterministic and Statistical Signal Models
    IEEE Transactions on Nanobioscience: 1–1, 2021. DOI
    @article{faucris.261835966,
    abstract = {Synaptic communication is based on a biological Molecular Communication (MC) system which may serve as a blueprint for the design of synthetic MC systems. However, the physical modeling of synaptic MC is complicated by the possible saturation of the molecular receiver caused by the competition of neurotransmitters (NTs) for postsynaptic receptors. Receiver saturation renders the system behavior nonlinear in the number of released NTs and is commonly neglected in existing analytical models. Furthermore, due to the ligands’ competition for receptors (and vice versa), the individual binding events at the molecular receiver are in general not statistically independent and the commonly used binomial model for the statistics of the received signal does not apply. Hence, in this work, we propose a novel deterministic model for receptor saturation in terms of a state-space description based on an eigenfunction expansion of Fick’s diffusion equation. The presented solution is numerically stable and computationally efficient. Employing the proposed deterministic model, we show that saturation at the molecular receiver effectively reduces the peak-value of the expected received signal and accelerates the clearance of NTs as compared to the case when receptor occupancy is neglected. We further derive a statistical model for the received signal in terms of the hypergeometric distribution which accounts for the competition of NTs for receptors and the competition of receptors for NTs. The proposed statistical model reveals how the signal statistics are shaped by the number of released NTs, the number of receptors, and the binding kinetics of the receptors, respectively, in the presence of competition. In particular, we show that the impact of these parameters on the signal variance is qualitatively different depending on the relative numbers of NTs and receptors. Finally, the accuracy of the proposed deterministic and statistical models is verified by particle-based computer simulations.
    }, author = {Sebastian Lotter and Maximilian Schäfer and Johannes Zeitler and Robert Schober}, doi = {10.1109/TNB.2021.3092279}, faupublication = {yes}, journal = {IEEE Transactions on Nanobioscience}, keywords = {Receivers; Mathematical model; Analytical models; Computational modeling; Numerical models; Monte Carlo methods; Degradation}, pages = {1-1}, peerreviewed = {Yes}, title = {{Saturating} {Receiver} and {Receptor} {Competition} in {Synaptic} {DMC}: {Deterministic} and {Statistical} {Signal} {Models}}, year = {2021} }
  6. Sebastian Lotter, Maximilian Schäfer, Johannes Zeitler, and Robert Schober
    Receptor Saturation Modeling for Synaptic DMC
    In ICC 2021 - IEEE International Conference on Communications, 2021. DOI
    @inproceedings{faucris.262786628,
    abstract = {Synaptic communication is a natural Molecular Communication (MC) system which may serve as a blueprint for the design of synthetic MC systems. In particular, it features highly specialized mechanisms to enable inter-symbol interference (ISI)-free and energy efficient communication. The understanding of synaptic MC is furthermore critical for disruptive innovations in the context of brain-machine interfaces. However, the physical modeling of synaptic MC is complicated by the possible saturation of the molecular receiver arising from the competition of postsynaptic receptors for neurotransmitters. Saturation renders the system behavior nonlinear and is commonly neglected in existing analytical models. In this work, we propose a novel model for receptor saturation in terms of a nonlinear, state-dependent boundary condition for Fick’s diffusion equation. We solve the resulting boundary-value problem using an eigenfunction expansion of the Laplace operator and the incorporation of the receiver memory as feedback system into the corresponding state-space description. The presented solution is numerically stable and computationally efficient. Furthermore, the proposed model is validated with particle-based stochastic computer simulations.},
    author = {Sebastian Lotter and Maximilian Schäfer and Johannes Zeitler and Robert Schober},
    booktitle = {ICC 2021 - IEEE International Conference on Communications},
    date = {2021-07-14/2021-07-23},
    doi = {10.1109/ICC42927.2021.9500809},
    editor = {IEEE},
    faupublication = {yes},
    isbn = {9781728171227},
    keywords = {Degradation; Neurotransmitters; Computational modeling; Biological system modeling; Stochastic processes; Receivers; Brain modeling},
    peerreviewed = {unknown},
    title = {{Receptor} {Saturation} {Modeling} for {Synaptic} {DMC}},
    venue = {Montreal, QC},
    year = {2021}
    }
  7. Christof Weiß, Johannes Zeitler, Tim Zunner, Florian Schuberth, and Meinard Müller
    Learning Pitch-Class Representations from Score-Audio Pairs of Classical Music
    In Proceedings of the International Society for Music Information Retrieval Conference (ISMIR), 2021. DOI
    @inproceedings{faucris.266302262,
    abstract = {Chroma or pitch-class representations of audio recordings are an
    essential tool in music information retrieval. Traditional chroma
    features relying on signal processing are often influenced by timbral
    properties such as overtones or vibrato and, thus, only roughly
    correspond to the pitch classes indicated by a score. Deep learning
    provides a promising possibility to overcome such problems but requires
    large annotated datasets. Previous approaches therefore use either
    synthetic audio, MIDI-piano recordings, or chord annotations for
    training. Since these strategies have different limitations, we propose
    to learn transcription-like pitch-class representations using
    pre-synchronized score-audio pairs of classical music. We train several
    CNNs with musically inspired architectures and evaluate their
    pitch-class estimates for various instrumentations including orchestra,
    piano, chamber music, and singing. Moreover, we illustrate the learned
    features' behavior when used as input to a chord recognition system. In
    all our experiments, we compare cross-validation with cross-dataset
    evaluation. Obtaining promising results, our strategy shows how to
    leverage the power of deep learning for constructing robust but
    interpretable tonal representation},
    author = {Christof Weiß and Johannes Zeitler, and Tim Zunner and Florian Schuberth and Meinard Müller},
    booktitle = {Proceedings of the International Society for Music Information Retrieval Conference (ISMIR)},
    date = {2021-11-07/2021-11-12},
    doi = {10.5281/zenodo.5624549},
    faupublication = {yes},
    peerreviewed = {Yes},
    title = {{Learning} {Pitch}-{Class} {Representations} from {Score}-{Audio} {Pairs} of {Classical} {Music}},
    venue = {online},
    year = {2021}
    }