My research primarily focuses on computer vision and machine learning with a particular emphasis on multisensory machine intelligence involving sight, sound, and touch. The overarching goal of my research is to empower machines to emulate and enhance human capabilities in seeing, hearing, and feeling, ultimately enabling them to comprehensively perceive, understand, and interact with the multisensory world.
Prospective Students: I am always seeking self-motivated students to join my group. If you are interested, here is some more information.
@inproceedings{zhang2025towards,title={Towards Perception-Informed Latent HRTF Representations},author={Zhang, You and Francl, Andrew and Gao, Ruohan and Calamia, Paul and Duan, Zhiyao and Ananthabhotla, Ishwarya},booktitle={IEEE Workshop on Applications of Signal Processing to Audio and Acoustics (WASPAA)},year={2025},}
@inproceedings{liu2025haae,title={Hearing Anywhere in Any Environment},author={Liu, Xiulong and Kumar, Anurag and Calamia, Paul and Garí, Sebastià V. Amengual and Murdock, Calvin and Ananthabhotla, Ishwarya and Robinson, Philip and Shlizerman, Eli and Ithapu, Vamsi Krishna and Gao, Ruohan},booktitle={Conference on Computer Vision and Pattern Recognition (CVPR)},year={2025},}
@inproceedings{wang2024haa,title={Hearing Anything Anywhere},author={Wang*, Mason L. and Sawata*, Ryosuke and Clarke, Samuel and Gao, Ruohan and Wu, Shangzhe and Wu, Jiajun},booktitle={Conference on Computer Vision and Pattern Recognition (CVPR)},year={2024},}
2023
The ObjectFolder Benchmark: Multisensory Object-Centric Learning with Neural and Real Objects
@inproceedings{gao2023ObjectFolderBM,title={The ObjectFolder Benchmark: Multisensory Object-Centric Learning with Neural and Real Objects},author={Gao*, Ruohan and Dou*, Yiming and Li*, Hao and Agarwal, Tanmay and Bohg, Jeannette and Li, Yunzhu and Fei-Fei, Li and Wu, Jiajun},booktitle={Conference on Computer Vision and Pattern Recognition (CVPR)},year={2023},}
RealImpact: A Dataset of Impact Sound Fields for Real Objects
@inproceedings{clarke2023realimpact,title={RealImpact: A Dataset of Impact Sound Fields for Real Objects},author={Clarke, Samuel and Gao, Ruohan and Wang, Mason and Rau, Mark and Xu, Julia and Rau, Mark and Wang, Jui-Hsien and James, Doug and Wu, Jiajun},booktitle={Conference on Computer Vision and Pattern Recognition (CVPR)},year={2023},}
2022
See, Hear, and Feel: Smart Sensory Fusion for Robotic Manipulation
@inproceedings{li2022seehearfeel,title={See, Hear, and Feel: Smart Sensory Fusion for Robotic Manipulation},author={Li*, Hao and Zhang*, Yizhi and Zhu, Junzhe and Wang, Shaoxiong and Lee, Michelle A. and Xu, Huazhe and Adelson, Edward and Fei-Fei, Li and Gao†, Ruohan and Wu†, Jiajun},booktitle={Conference on Robot Learning (CoRL)},year={2022},}
ObjectFolder 2.0: A Multisensory Object Dataset for Sim2Real Transfer
@inproceedings{gao2022ObjectFolderV2,title={ObjectFolder 2.0: A Multisensory Object Dataset for Sim2Real Transfer},author={Gao*, Ruohan and Si*, Zilin and Chang*, Yen-Yu and Clarke, Samuel and Bohg, Jeannette and Fei-Fei, Li and Yuan, Wenzhen and Wu, Jiajun},booktitle={Conference on Computer Vision and Pattern Recognition (CVPR)},year={2022},}
@inproceedings{chen2022visual,title={Visual Acoustic Matching},author={Chen, Changan and Gao, Ruohan and Calamia, Paul and Grauman, Kristen},booktitle={Conference on Computer Vision and Pattern Recognition (CVPR)},year={2022},}
2021
Geometry-Aware Multi-Task Learning for Binaural Audio Generation from Video
@inproceedings{garg2021geometry,title={Geometry-Aware Multi-Task Learning for Binaural Audio Generation from Video},author={Garg, Rishabh and Gao, Ruohan and Grauman, Kristen},booktitle={British Machine Vision Conference (BMVC)},year={2021},}
Look and Listen: From Semantic to Spatial Audio-Visual Perception
Ruohan Gao
Ph.D. Dissertation, 2021
Michael H. Granof Award, UT Austin’s Top 1 Doctoral Dissertation BibPDFMedia Coverage
@inproceedings{gao2021dissertation,title={Look and Listen: From Semantic to Spatial Audio-Visual Perception},author={Gao, Ruohan},booktitle={Ph.D. Dissertation},year={2021},}
@inproceedings{gao2019visual-sound,title={2.5D Visual Sound},author={Gao, Ruohan and Grauman, Kristen},booktitle={Conference on Computer Vision and Pattern Recognition (CVPR)},year={2019},}
2018
Learning to Separate Object Sounds by Watching Unlabeled Video
@inproceedings{gao2018object-sounds,title={Learning to Separate Object Sounds by Watching Unlabeled Video},author={Gao, Ruohan and Feris, Rogerio and Grauman, Kristen},booktitle={European Conference on Computer Vision (ECCV)},year={2018},}