@inproceedings{sameer-et-al-icassp-19,
abstract = {We present the Factorial Deep Markov Model (FDMM) for
representation learning of speech. The FDMM learns disentangled, interpretable and lower dimensional latent representations from speech without supervision. We use a static and dynamic latent variable to exploit the fact that information in a speech signal evolves at different time scales. Latent representations learned by the FDMM outperform a baseline ivector system on speaker verification and dialect identification while also reducing the error rate of a phone recognition system in a domain mismatch scenario.},
address = {Brighton, UK},
author = {Sameer Khurana and Shafiq Joty and Ahmed Ali and James Glass},
booktitle = {International Conference on Acoustics, Speech, and Signal Processing},
month = {September},
pages = {6540 -- 6544},
publisher = {IEEE},
series = {ICASSP'19},
title = {A Fatorial Deep Markov Model For Unsupervised Disentangled Representation Learning From Speech},
url = {https://groups.csail.mit.edu/sls/publications/2019/SameerKhurana_ICASSP-2019.pdf},
year = {2019}
}