@inproceedings{2ea1657a61784eb4b6adbd739908efa0,
title = "AutoVFX: Physically Realistic Video Editing from Natural Language Instructions",
abstract = "Modern visual effects (VFX) software has made it possible for skilled artists to create imagery of virtually anything. However, the creation process remains laborious, complex, and largely inaccessible to everyday users. In this work, we present AutoVFX, a framework that automatically creates realistic and dynamic VFX videos from a single video and natural language instructions. By carefully integrating neural scene modeling, LLM-based code generation, and physical simulation, AutoVFX is able to provide physically-grounded, photorealistic editing effects that can be controlled directly using natural language instructions. We conduct extensive experiments to validate AutoVFX's efficacy across a diverse spectrum of videos and instructions. Quantitative and qualitative results suggest that AutoVFX outperforms all competing methods by a large margin in generative quality, instruction alignment, editing versatility, and physical plausibility.",
keywords = "llm agent, material editing, object insertion, physical simulation, scene simulation, text-guided video editing, visual effects",
author = "Hsu, \{Hao Yu\} and Lin, \{Chih Hao\} and Zhai, \{Albert J.\} and Hongchi Xia and Shenlong Wang",
note = "Publisher Copyright: {\textcopyright} 2025 IEEE.; 12th International Conference on 3D Vision, 3DV 2025 ; Conference date: 25-03-2025 Through 28-03-2025",
year = "2025",
doi = "10.1109/3DV66043.2025.00076",
language = "English (US)",
series = "Proceedings - 2025 International Conference on 3D Vision, 3DV 2025",
publisher = "Institute of Electrical and Electronics Engineers Inc.",
pages = "769--780",
booktitle = "Proceedings - 2025 International Conference on 3D Vision, 3DV 2025",
address = "United States",
}