diff --git a/gradio_app.py b/gradio_app.py
index e03ecbb..b38b11d 100644
--- a/gradio_app.py
+++ b/gradio_app.py
@@ -223,40 +223,85 @@ height="{height}" width="100%" frameborder="0"></iframe>'
 # ---------------------------------------------------------------------------
 # VRAM management helpers (used when --low_vram_mode is set)
 #
-# Strategy (RTX 3080, 16GB RAM / 20GB VRAM):
-#   Both models are fully deleted between uses — neither is kept in CPU RAM.
-#   This prevents the 16GB RAM from being exhausted when loading one model
-#   while the other is still resident in CPU memory.
+# Adaptive strategy based on available system RAM:
 #
-#   i23d (shape model, ~7.25GB VRAM):
-#     → fully del'd after each shape generation
-#     → reloaded from HF-cached weights on next shape request (~20-30s)
+#   When switching from shape → texture (or vice versa):
+#     1. Check available RAM via /proc/meminfo
+#     2. If enough RAM to hold a model in CPU while loading the other (~17GB):
+#        → .to('cpu') the outgoing model (fast, no disk reload needed later)
+#     3. If RAM is tight:
+#        → fully del the outgoing model, reload from disk later (~20-30s)
 #
-#   tex_pipeline (texture model, ~6.59GB VRAM):
-#     → fully del'd after each texture generation
-#     → reloaded from HF-cached weights on next texture request (~20s)
-#
-#   gc.collect() + empty_cache() before each load ensures previous tensors
-#   are freed before the new checkpoint is staged in CPU RAM.
+#   This allows machines with ≥32GB RAM to swap models instantly,
+#   while 16GB machines safely fall back to disk reload.
 # ---------------------------------------------------------------------------
 
-def _unload_i23d_worker():
-    """Delete shape model entirely, freeing VRAM and CPU RAM."""
-    global i23d_worker
-    if i23d_worker is not None:
-        logger.info("Unloading shape model from memory...")
+# Approximate RAM required (GB) to hold one model in CPU while loading another.
+# Model weights: ~7GB each. Loading from disk stages ~7GB temporarily.
+# Total: 7 (existing in CPU) + 7 (loading new) + 2 (OS headroom) = 16GB.
+_RAM_THRESHOLD_GB = 16.0
+
+# Track whether i23d is offloaded to CPU RAM (vs deleted entirely).
+_i23d_on_cpu = False
+
+
+def _get_available_ram_gb():
+    """Return available system RAM in GB from /proc/meminfo."""
+    try:
+        with open('/proc/meminfo') as f:
+            for line in f:
+                if line.startswith('MemAvailable:'):
+                    return int(line.split()[1]) / (1024 * 1024)
+    except Exception:
+        pass
+    return 0.0
+
+
+def _can_offload_to_cpu():
+    """Check if there's enough RAM to keep a model in CPU while loading another."""
+    available = _get_available_ram_gb()
+    can = available >= _RAM_THRESHOLD_GB
+    logger.info(
+        f"RAM check: {available:.1f}GB available, "
+        f"need {_RAM_THRESHOLD_GB:.0f}GB for CPU offload → "
+        f"{'CPU offload (fast)' if can else 'full delete (safe)'}"
+    )
+    return can
+
+
+def _prepare_for_tex():
+    """Free VRAM from shape model before loading texture pipeline."""
+    global i23d_worker, _i23d_on_cpu
+    if i23d_worker is None:
+        _ensure_tex_pipeline()
+        return
+
+    if _can_offload_to_cpu():
+        logger.info("Offloading shape model to CPU RAM (fast path)...")
+        i23d_worker.to('cpu')
+        _i23d_on_cpu = True
+        torch.cuda.empty_cache()
+    else:
+        logger.info("Deleting shape model entirely (safe path, limited RAM)...")
         del i23d_worker
         i23d_worker = None
+        _i23d_on_cpu = False
         gc.collect()
         gc.collect()
         torch.cuda.empty_cache()
 
+    _ensure_tex_pipeline()
+
 
 def _ensure_i23d_worker():
-    """Load shape model to GPU if not already loaded."""
-    global i23d_worker
-    if i23d_worker is None:
-        logger.info("Reloading shape model to GPU...")
+    """Load shape model to GPU — from CPU RAM (fast) or disk (slow)."""
+    global i23d_worker, _i23d_on_cpu
+    if i23d_worker is not None and _i23d_on_cpu:
+        logger.info("Restoring shape model from CPU to GPU (fast path)...")
+        i23d_worker.to(args.device)
+        _i23d_on_cpu = False
+    elif i23d_worker is None:
+        logger.info("Reloading shape model from disk to GPU (slow path)...")
         gc.collect()
         torch.cuda.empty_cache()
         from hy3dshape import Hunyuan3DDiTFlowMatchingPipeline
@@ -266,6 +311,8 @@ def _ensure_i23d_worker():
             use_safetensors=False,
             device=args.device,
         )
+        _i23d_on_cpu = False
+    # else: already on GPU, nothing to do
 
 
 def _unload_tex_pipeline():
@@ -460,12 +507,10 @@ def generation_all(
 
     text_path = os.path.join(save_folder, f'textured_mesh.obj')
 
-    # In low_vram_mode: fully delete shape model from RAM before loading texture
-    # pipeline. With only 16GB system RAM, keeping i23d in CPU RAM (~7GB) while
-    # loading tex (~7GB) would exceed available memory and trigger OOM Killer.
+    # In low_vram_mode: adaptively offload shape model (CPU or delete based on
+    # available RAM), then load texture pipeline.
     if args.low_vram_mode:
-        _unload_i23d_worker()
-        _ensure_tex_pipeline()
+        _prepare_for_tex()
 
     path_textured = tex_pipeline(mesh_path=path, image_path=image, output_mesh_path=text_path, save_glb=False)
 
diff --git a/hy3dshape/hy3dshape/rembg.py b/hy3dshape/hy3dshape/rembg.py
index bd94de3..92d4d79 100644
--- a/hy3dshape/hy3dshape/rembg.py
+++ b/hy3dshape/hy3dshape/rembg.py
@@ -13,12 +13,21 @@
 # by Tencent in accordance with TENCENT HUNYUAN COMMUNITY LICENSE AGREEMENT.
 
 from PIL import Image
+import onnxruntime as ort
 from rembg import remove, new_session
 
 
 class BackgroundRemover():
     def __init__(self, model_name: str = "bria-rmbg"):
-        self.session = new_session(model_name)
+        # Force CPU-only execution for onnxruntime to prevent CUDA arena
+        # from consuming ~12GB+ VRAM that PyTorch models need.
+        # Background removal is lightweight and runs fast on CPU.
+        _orig = ort.get_device
+        ort.get_device = lambda: "CPU"
+        try:
+            self.session = new_session(model_name)
+        finally:
+            ort.get_device = _orig
 
     def __call__(self, image: Image.Image):
         output = remove(image, session=self.session, bgcolor=[255, 255, 255, 0])