You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
LAMM (pronounced as /læm/, means cute lamb to show appreciation to LLaMA), is a growing open-source community aimed at helping researchers and developers quickly train and evaluate Multi-modal Large Language Models (MLLM), and further build multi-modal AI agents capable of bridging the gap between ideas and execution, enabling seamless interaction between humans and AI machines.
@article{yin2023lamm,
title={LAMM: Language-Assisted Multi-Modal Instruction-Tuning Dataset, Framework, and Benchmark},
author={Yin, Zhenfei and Wang, Jiong and Cao, Jianjian and Shi, Zhelun and Liu, Dingning and Li, Mukai and Sheng, Lu and Bai, Lei and Huang, Xiaoshui and Wang, Zhiyong and others},
journal={arXiv preprint arXiv:2306.06687},
year={2023}
}
Assessment of Multimodal Large Language Models in Alignment with Human Values
@misc{shi2024assessment,
title={Assessment of Multimodal Large Language Models in Alignment with Human Values},
author={Zhelun Shi and Zhipin Wang and Hongxing Fan and Zaibin Zhang and Lijun Li and Yongting Zhang and Zhenfei Yin and Lu Sheng and Yu Qiao and Jing Shao},
year={2024},
eprint={2403.17830},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
ChEF
@misc{shi2023chef,
title={ChEF: A Comprehensive Evaluation Framework for Standardized Assessment of Multimodal Large Language Models},
author={Zhelun Shi and Zhipin Wang and Hongxing Fan and Zhenfei Yin and Lu Sheng and Yu Qiao and Jing Shao},
year={2023},
eprint={2311.02692},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
Octavius
@misc{chen2023octavius,
title={Octavius: Mitigating Task Interference in MLLMs via MoE},
author={Zeren Chen and Ziqin Wang and Zhen Wang and Huayang Liu and Zhenfei Yin and Si Liu and Lu Sheng and Wanli Ouyang and Yu Qiao and Jing Shao},
year={2023},
eprint={2311.02684},
archivePrefix={arXiv},
primaryClass={cs.CV}
}
DepictQA
@article{depictqa,
title={Depicting Beyond Scores: Advancing Image Quality Assessment through Multi-modal Language Models},
author={You, Zhiyuan and Li, Zheyuan, and Gu, Jinjin, and Yin, Zhenfei and Xue, Tianfan and Dong, Chao},
journal={arXiv preprint arXiv:2312.08962},
year={2023}
}
MP5
@misc{qin2023mp5,
title = {MP5: A Multi-modal Open-ended Embodied System in Minecraft via Active Perception},
author = {Yiran Qin and Enshen Zhou and Qichang Liu and Zhenfei Yin and Lu Sheng and Ruimao Zhang and Yu Qiao and Jing Shao},
year = {2023},
eprint = {2312.07472},
archivePrefix = {arXiv},
primaryClass = {cs.CV}
}
Get Started
Please see tutorial for the basic usage of this repo.
License
The project is CC BY NC 4.0 (allowing only non-commercial use) and models trained using the dataset should not be used outside of research purposes.
About
[NeurIPS 2023 Datasets and Benchmarks Track] LAMM: Multi-Modal Large Language Models and Applications as AI Agents