carview!

Available & Functional

@article{miao26flexllm,
  title = {FlexLLM: Token-Level Co-Serving of LLM Inference and Fine-Tuning with SLO Guarantees},
  author = {Oliaro, Gabriele and Miao, Xupeng and Cheng, Xinhao and Kada, Vineeth and Gao, Ruohan and Huang, Yingyi and Delacourt, Remi and Yang, April and Wang, Yingcheng and Wu, Mengdi and Unger, Colin and Jia, Zhihao},
  journal = {Proceedings of NSDI Conference},
  year = {2026},
  cofirst = {true},
}

ASPLOS

SpotServe: Serving Generative Large Language Models on Preemptible Instances (Distinguished Artifact Award), (IEEE Micro Top Picks Honorable Mention)

Xupeng Miao, Chunan Shi, Jiangfei Duan, Xiaoli Xi and 3 more authors

Proceedings of ASPLOS Conference 2024

Available & Functional & Reproduced

@article{miao24spotserve,
  title = {SpotServe: Serving Generative Large Language Models on Preemptible Instances},
  author = {Miao, Xupeng and Shi, Chunan and Duan, Jiangfei and Xi, Xiaoli and Lin, Dahua and Cui, Bin and Jia, Zhihao},
  journal = {Proceedings of ASPLOS Conference},
  year = {2024},
}

ASPLOS

SpecInfer: Accelerating Generative Large Language Model Serving with Speculative Inference and Token Tree Verification

Xupeng Miao, Gabriele Oliaro, Zhihao Zhang, Xinhao Cheng and 10 more authors

Proceedings of ASPLOS Conference 2024

Available & Functional

@article{miao23specinfer,
  title = {SpecInfer: Accelerating Generative Large Language Model Serving with Speculative Inference and Token Tree Verification},
  author = {Miao, Xupeng and Oliaro, Gabriele and Zhang, Zhihao and Cheng, Xinhao and Wang, Zeyu and Wong, Rae Ying Yee and Zhu, Alan and Yang, Lijie and Shi, Xiaoxiang and Shi, Chunan and Chen, Zhuoming and Arfeen, Daiyaan and Abhyankar, Reyna and Jia, Zhihao},
  journal = {Proceedings of ASPLOS Conference},
  year = {2024},
  doi = {10.48550/arXiv.2305.09781},
}

NSDI

Parcae: Proactive, Liveput-Optimized DNN Training on Preemptible Instances

Jiangfei Duan¹, Ziang Song¹, Xupeng Miao¹, Xiaoli Xi and 4 more authors

Proceedings of NSDI Conference 2024

@article{nsdi24parcae,
  title = {Parcae: Proactive, Liveput-Optimized DNN Training on Preemptible Instances},
  author = {Duan, Jiangfei and Song, Ziang and Miao, Xupeng and Xi, Xiaoli and Lin, Dahua and Xu, Harry and Zhang, Minjia and Jia, Zhihao},
  journal = {Proceedings of NSDI Conference},
  cofirst = {true},
  year = {2024},
}

VLDB

SDPipe: A Semi-Decentralized Framework for Heterogeneity-aware Pipeline-parallel Training

Xupeng Miao, Yining Shi, Zhi Yang, Bin Cui and 1 more author

Proc. VLDB Endow. 2023

Bib CCF-A

@article{miao2023sdpipe,
  title = {SDPipe: A Semi-Decentralized Framework for Heterogeneity-aware Pipeline-parallel Training},
  author = {Miao, Xupeng and Shi, Yining and Yang, Zhi and Cui, Bin and Jia, Zhihao},
  journal = {Proc. {VLDB} Endow.},
  volume = {16},
  year = {2023},
  publisher = {VLDB Endowment},
}

VLDB

Galvatron: Efficient Transformer Training over Multiple GPUs Using Automatic Parallelism

Xupeng Miao, Yujie Wang, Youhe Jiang, Chunan Shi and 3 more authors

Proc. VLDB Endow. 2023

@article{miao2023galvatron,
  title = {Galvatron: Efficient Transformer Training over Multiple GPUs Using Automatic Parallelism},
  author = {Miao, Xupeng and Wang, Yujie and Jiang, Youhe and Shi, Chunan and Nie, Xiaonan and Zhang, Hailin and Cui, Bin},
  journal = {Proc. {VLDB} Endow.},
  volume = {16},
  number = {3},
  pages = {470--479},
  year = {2023},
  doi = {10.14778/3570690.3570697},
  publisher = {VLDB Endowment},
}

VLDB

HET: Scaling out Huge Embedding Model Training via Cache-enabled Distributed Framework (Best Scalable Data Science Paper Award)

Xupeng Miao, Hailin Zhang, Yining Shi, Xiaonan Nie and 3 more authors

Proc. VLDB Endow. 2022

@article{miao2021het,
  title = {{HET:} Scaling out Huge Embedding Model Training via Cache-enabled Distributed Framework},
  author = {Miao, Xupeng and Zhang, Hailin and Shi, Yining and Nie, Xiaonan and Yang, Zhi and Tao, Yangyu and Cui, Bin},
  journal = {Proc. {VLDB} Endow.},
  volume = {15},
  number = {2},
  pages = {312--320},
  year = {2022},
  publisher = {VLDB Endowment},
}

SIGMOD

HET-GMP: A Graph-based System Approach to Scaling Large Embedding Model Training

Xupeng Miao, Yining Shi, Hailin Zhang, Xin Zhang and 3 more authors

In Proceedings of SIGMOD Conference 2022

@inproceedings{miao2022hetgmp,
  author = {Miao, Xupeng and Shi, Yining and Zhang, Hailin and Zhang, Xin and Nie, Xiaonan and Yang, Zhi and Cui, Bin},
  title = {{HET-GMP:} {A} Graph-based System Approach to Scaling Large Embedding Model Training},
  booktitle = {Proceedings of SIGMOD Conference},
  pages = {470--480},
  publisher = {{ACM}},
  year = {2022},
  doi = {10.1145/3514221.3517902},
}

SIGMOD

Heterogeneity-Aware Distributed Machine Learning Training via Partial Reduce

Xupeng Miao, Xiaonan Nie, Yingxia Shao, Zhi Yang and 3 more authors

In Proceedings of SIGMOD Conference 2021