Prospective students: I am actively looking for strong and self-motivated PhD/MS students, postdocs, and (remote) interns interested in building systems for machine learning to join my group. If you are interested in working with me, please read the FAQ info and send me an email with your CV and transcripts.
@article{miao26flexllm,title={FlexLLM: Token-Level Co-Serving of LLM Inference and Fine-Tuning with SLO Guarantees},author={Oliaro, Gabriele and Miao, Xupeng and Cheng, Xinhao and Kada, Vineeth and Gao, Ruohan and Huang, Yingyi and Delacourt, Remi and Yang, April and Wang, Yingcheng and Wu, Mengdi and Unger, Colin and Jia, Zhihao},journal={Proceedings of NSDI Conference},year={2026},cofirst={true},}
ASPLOS
SpotServe: Serving Generative Large Language Models on Preemptible Instances (Distinguished Artifact Award), (IEEE Micro Top Picks Honorable Mention)
Xupeng Miao, Chunan Shi, Jiangfei Duan, Xiaoli Xi and 3 more authors
@article{miao24spotserve,title={SpotServe: Serving Generative Large Language Models on Preemptible Instances},author={Miao, Xupeng and Shi, Chunan and Duan, Jiangfei and Xi, Xiaoli and Lin, Dahua and Cui, Bin and Jia, Zhihao},journal={Proceedings of ASPLOS Conference},year={2024},}
ASPLOS
SpecInfer: Accelerating Generative Large Language Model Serving with Speculative Inference and Token Tree Verification
Xupeng Miao, Gabriele Oliaro, Zhihao Zhang, Xinhao Cheng and 10 more authors
@article{miao23specinfer,title={SpecInfer: Accelerating Generative Large Language Model Serving with Speculative Inference and Token Tree Verification},author={Miao, Xupeng and Oliaro, Gabriele and Zhang, Zhihao and Cheng, Xinhao and Wang, Zeyu and Wong, Rae Ying Yee and Zhu, Alan and Yang, Lijie and Shi, Xiaoxiang and Shi, Chunan and Chen, Zhuoming and Arfeen, Daiyaan and Abhyankar, Reyna and Jia, Zhihao},journal={Proceedings of ASPLOS Conference},year={2024},doi={10.48550/arXiv.2305.09781},}
NSDI
Parcae: Proactive, Liveput-Optimized DNN Training on Preemptible Instances
Jiangfei Duan1, Ziang Song1, Xupeng Miao1, Xiaoli Xi and 4 more authors
@article{nsdi24parcae,title={Parcae: Proactive, Liveput-Optimized DNN Training on Preemptible Instances},author={Duan, Jiangfei and Song, Ziang and Miao, Xupeng and Xi, Xiaoli and Lin, Dahua and Xu, Harry and Zhang, Minjia and Jia, Zhihao},journal={Proceedings of NSDI Conference},cofirst={true},year={2024},}
VLDB
SDPipe: A Semi-Decentralized Framework for Heterogeneity-aware Pipeline-parallel Training
Xupeng Miao, Yining Shi, Zhi Yang, Bin Cui and 1 more author
@article{miao2023sdpipe,title={SDPipe: A Semi-Decentralized Framework for Heterogeneity-aware Pipeline-parallel Training},author={Miao, Xupeng and Shi, Yining and Yang, Zhi and Cui, Bin and Jia, Zhihao},journal={Proc. {VLDB} Endow.},volume={16},year={2023},publisher={VLDB Endowment},}
VLDB
Galvatron: Efficient Transformer Training over Multiple GPUs Using Automatic Parallelism
Xupeng Miao, Yujie Wang, Youhe Jiang, Chunan Shi and 3 more authors
@article{miao2023galvatron,title={Galvatron: Efficient Transformer Training over Multiple GPUs Using Automatic Parallelism},author={Miao, Xupeng and Wang, Yujie and Jiang, Youhe and Shi, Chunan and Nie, Xiaonan and Zhang, Hailin and Cui, Bin},journal={Proc. {VLDB} Endow.},volume={16},number={3},pages={470--479},year={2023},doi={10.14778/3570690.3570697},publisher={VLDB Endowment},}
VLDB
HET: Scaling out Huge Embedding Model Training via Cache-enabled Distributed Framework (Best Scalable Data Science Paper Award)
Xupeng Miao, Hailin Zhang, Yining Shi, Xiaonan Nie and 3 more authors
@article{miao2021het,title={{HET:} Scaling out Huge Embedding Model Training via Cache-enabled Distributed Framework},author={Miao, Xupeng and Zhang, Hailin and Shi, Yining and Nie, Xiaonan and Yang, Zhi and Tao, Yangyu and Cui, Bin},journal={Proc. {VLDB} Endow.},volume={15},number={2},pages={312--320},year={2022},publisher={VLDB Endowment},}
SIGMOD
HET-GMP: A Graph-based System Approach to Scaling Large Embedding Model Training
Xupeng Miao, Yining Shi, Hailin Zhang, Xin Zhang and 3 more authors
@inproceedings{miao2022hetgmp,author={Miao, Xupeng and Shi, Yining and Zhang, Hailin and Zhang, Xin and Nie, Xiaonan and Yang, Zhi and Cui, Bin},title={{HET-GMP:} {A} Graph-based System Approach to Scaling Large Embedding Model Training},booktitle={Proceedings of SIGMOD Conference},pages={470--480},publisher={{ACM}},year={2022},doi={10.1145/3514221.3517902},}
SIGMOD
Heterogeneity-Aware Distributed Machine Learning Training via Partial Reduce
Xupeng Miao, Xiaonan Nie, Yingxia Shao, Zhi Yang and 3 more authors
@inproceedings{DBLP:conf/sigmod/MiaoNSYJM021,author={Miao, Xupeng and Nie, Xiaonan and Shao, Yingxia and Yang, Zhi and Jiang, Jiawei and Ma, Lingxiao and Cui, Bin},title={Heterogeneity-Aware Distributed Machine Learning Training via Partial
Reduce},booktitle={Proceedings of SIGMOD Conference},pages={2262--2270},publisher={{ACM}},year={2021},doi={10.1145/3448016.3452773},}