@inproceedings{070745caf4124372b4cc647adfd2faec,
title = "End-To-End Source Separation with Adaptive Front-Ends",
abstract = "Source separation and other audio applications have traditionally relied on the use of short-time Fourier transforms as a front-end frequency domain representation step. The unavailability of a neural network equivalent to forward and inverse transforms hinders the implementation of end-to-end learning systems for these applications. We develop an auto-encoder neural network that can act as an equivalent to short-time front-end transforms. We demonstrate the ability of the network to learn optimal, real-valued basis functions directly from the raw waveform of a signal and further show how it can be used as an adaptive front-end for supervised source separation. In terms of separation performance, these transforms significantly outperform their Fourier counterparts. Finally, we also propose and interpret a novel source to distortion ratio based cost function for end-to-end source separation.",
keywords = "Auto-encoders, adaptive transforms, deep learning, source separation",
author = "Shrikant Venkataramani and Jonah Casebeer and Paris Smaragdis",
note = "Publisher Copyright: {\textcopyright} 2018 IEEE.; 52nd Asilomar Conference on Signals, Systems and Computers, ACSSC 2018 ; Conference date: 28-10-2018 Through 31-10-2018",
year = "2018",
month = jul,
day = "2",
doi = "10.1109/ACSSC.2018.8645535",
language = "English (US)",
series = "Conference Record - Asilomar Conference on Signals, Systems and Computers",
publisher = "IEEE Computer Society",
pages = "684--688",
editor = "Matthews, {Michael B.}",
booktitle = "Conference Record of the 52nd Asilomar Conference on Signals, Systems and Computers, ACSSC 2018",
}