// VRM-aware character loader, powered by @pixiv/three-vrm
// Drop-in replacement for character-loader.jsx — same external API, but now:
//   • Loads .vrm (and falls back to plain .glb / .gltf gracefully)
//   • Auto-fixes VRM 0.x backward-facing bug via VRMUtils.rotateVRM0()
//   • Native lip-sync via expressionManager (visemes aa/ih/ou/ee/oh)
//   • Native blink + emotion expressions (happy/sad/angry/relaxed/surprised)
//   • Native head look-at via vrm.lookAt
//   • Spring-bone physics for hair / sash / fur (auto-updated each frame)
//   • Mixamo .glb animation retargeting onto the VRM normalized humanoid
//
// Public API (matches CharacterLoader exactly so app.jsx works unchanged):
//   { group, inner, update, setMouthOpen, setLookAt, triggerGesture,
//     setExpression, setEmotion, setState, hasAnimation, hasClipIdle,
//     loadAnimationFromBuffer, isGLB, isVRM, animationNames, mouthMode,
//     diagnostics }

const VRMCharacterLoader = (() => {

  // Mixamo bone name → VRM normalized humanoid bone name
  const MIXAMO_TO_VRM = {
    'Hips': 'hips',
    'Spine': 'spine',
    'Spine1': 'chest',
    'Spine2': 'upperChest',
    'Neck': 'neck',
    'Head': 'head',
    'LeftShoulder': 'leftShoulder',
    'LeftArm': 'leftUpperArm',
    'LeftForeArm': 'leftLowerArm',
    'LeftHand': 'leftHand',
    'RightShoulder': 'rightShoulder',
    'RightArm': 'rightUpperArm',
    'RightForeArm': 'rightLowerArm',
    'RightHand': 'rightHand',
    'LeftUpLeg': 'leftUpperLeg',
    'LeftLeg': 'leftLowerLeg',
    'LeftFoot': 'leftFoot',
    'LeftToeBase': 'leftToes',
    'RightUpLeg': 'rightUpperLeg',
    'RightLeg': 'rightLowerLeg',
    'RightFoot': 'rightFoot',
    'RightToeBase': 'rightToes',
    // Mixamo finger bones (3 per finger)
    'LeftHandThumb1': 'leftThumbMetacarpal',
    'LeftHandThumb2': 'leftThumbProximal',
    'LeftHandThumb3': 'leftThumbDistal',
    'LeftHandIndex1': 'leftIndexProximal',
    'LeftHandIndex2': 'leftIndexIntermediate',
    'LeftHandIndex3': 'leftIndexDistal',
    'LeftHandMiddle1': 'leftMiddleProximal',
    'LeftHandMiddle2': 'leftMiddleIntermediate',
    'LeftHandMiddle3': 'leftMiddleDistal',
    'LeftHandRing1': 'leftRingProximal',
    'LeftHandRing2': 'leftRingIntermediate',
    'LeftHandRing3': 'leftRingDistal',
    'LeftHandPinky1': 'leftLittleProximal',
    'LeftHandPinky2': 'leftLittleIntermediate',
    'LeftHandPinky3': 'leftLittleDistal',
    'RightHandThumb1': 'rightThumbMetacarpal',
    'RightHandThumb2': 'rightThumbProximal',
    'RightHandThumb3': 'rightThumbDistal',
    'RightHandIndex1': 'rightIndexProximal',
    'RightHandIndex2': 'rightIndexIntermediate',
    'RightHandIndex3': 'rightIndexDistal',
    'RightHandMiddle1': 'rightMiddleProximal',
    'RightHandMiddle2': 'rightMiddleIntermediate',
    'RightHandMiddle3': 'rightMiddleDistal',
    'RightHandRing1': 'rightRingProximal',
    'RightHandRing2': 'rightRingIntermediate',
    'RightHandRing3': 'rightRingDistal',
    'RightHandPinky1': 'rightLittleProximal',
    'RightHandPinky2': 'rightLittleIntermediate',
    'RightHandPinky3': 'rightLittleDistal',
  };

  // UE5 / UE4 Mannequin bone name → VRM normalized humanoid bone name.
  // UE5 mannequin (Manny/Quinn) uses snake_case with _l / _r suffixes.
  // UE5 has 5 spine bones (vs Mixamo's 3) — we map spine_01..03 to VRM
  // spine/chest/upperChest, and let spine_04/05 fall through to skip list.
  const UE_MANNEQUIN_TO_VRM = {
    'pelvis': 'hips',
    'spine_01': 'spine',
    'spine_02': 'chest',
    'spine_03': 'upperChest',
    // spine_04, spine_05 → no VRM equivalent, skipped below
    'neck_01': 'neck',
    // neck_02 → skipped (VRM has only one neck bone)
    'head': 'head',
    // Arms
    'clavicle_l': 'leftShoulder',
    'upperarm_l': 'leftUpperArm',
    'lowerarm_l': 'leftLowerArm',
    'hand_l': 'leftHand',
    'clavicle_r': 'rightShoulder',
    'upperarm_r': 'rightUpperArm',
    'lowerarm_r': 'rightLowerArm',
    'hand_r': 'rightHand',
    // Legs
    'thigh_l': 'leftUpperLeg',
    'calf_l': 'leftLowerLeg',
    'foot_l': 'leftFoot',
    'ball_l': 'leftToes',
    'thigh_r': 'rightUpperLeg',
    'calf_r': 'rightLowerLeg',
    'foot_r': 'rightFoot',
    'ball_r': 'rightToes',
    // UE5 fingers (3 segments per finger, _l / _r)
    'thumb_01_l': 'leftThumbMetacarpal',
    'thumb_02_l': 'leftThumbProximal',
    'thumb_03_l': 'leftThumbDistal',
    'index_01_l': 'leftIndexProximal',
    'index_02_l': 'leftIndexIntermediate',
    'index_03_l': 'leftIndexDistal',
    'middle_01_l': 'leftMiddleProximal',
    'middle_02_l': 'leftMiddleIntermediate',
    'middle_03_l': 'leftMiddleDistal',
    'ring_01_l': 'leftRingProximal',
    'ring_02_l': 'leftRingIntermediate',
    'ring_03_l': 'leftRingDistal',
    'pinky_01_l': 'leftLittleProximal',
    'pinky_02_l': 'leftLittleIntermediate',
    'pinky_03_l': 'leftLittleDistal',
    'thumb_01_r': 'rightThumbMetacarpal',
    'thumb_02_r': 'rightThumbProximal',
    'thumb_03_r': 'rightThumbDistal',
    'index_01_r': 'rightIndexProximal',
    'index_02_r': 'rightIndexIntermediate',
    'index_03_r': 'rightIndexDistal',
    'middle_01_r': 'rightMiddleProximal',
    'middle_02_r': 'rightMiddleIntermediate',
    'middle_03_r': 'rightMiddleDistal',
    'ring_01_r': 'rightRingProximal',
    'ring_02_r': 'rightRingIntermediate',
    'ring_03_r': 'rightRingDistal',
    'pinky_01_r': 'rightLittleProximal',
    'pinky_02_r': 'rightLittleIntermediate',
    'pinky_03_r': 'rightLittleDistal',
  };

  // Bone names to drop entirely from incoming clips. UE5 ships skeletons with
  // twist bones (skin-deformation aids), IK control bones (not part of the
  // animated hierarchy), and weapon/prop bones — none of these have VRM
  // equivalents and including them just produces dead AnimationMixer tracks.
  const SKIP_BONE_PATTERNS = [
    /_twist_/i,                  // upperarm_twist_01_l, lowerarm_twist_02_r, etc.
    /^ik_/i,                     // ik_foot_l, ik_hand_gun, ik_foot_root
    /^weapon_/i,                 // weapon_l, weapon_r (UE5 mannequin has these)
    /^root$/i,                   // UE5 root motion bone — handled separately
    /^spine_0[45]$/i,            // UE5's extra spine bones (no VRM equivalent)
    /^neck_02$/i,                // UE5's second neck bone (VRM has one neck)
    /_metacarpal_/i,             // some UE5 hand setups use metacarpal_l
  ];


  function logDiagnostics(vrm, gltf) {
    const exprs = vrm.expressionManager
      ? vrm.expressionManager.expressions.map(e => e.expressionName)
      : [];
    const humanBones = vrm.humanoid
      ? Object.keys(vrm.humanoid.humanBones || vrm.humanoid._rawHumanBones || {})
      : [];
    const springCount = vrm.springBoneManager?.joints?.size
      ?? vrm.springBoneManager?.springBones?.length
      ?? 0;
    console.group('[VRMCharacterLoader] VRM diagnostics');
    console.log('VRM meta version:', vrm.meta?.metaVersion ?? '0.0 (legacy)');
    console.log('VRM meta:', vrm.meta?.title || vrm.meta?.name || '(no title)',
                'by', vrm.meta?.author || '(no author)');
    console.log('Humanoid bones:', humanBones.length, humanBones.slice(0, 10), '…');
    console.log('Expressions:', exprs.length, exprs);
    console.log('Spring bones:', springCount);
    console.log('LookAt available:', !!vrm.lookAt);
    console.log('Animations baked into file:', (gltf.animations || []).map(a => a.name));
    console.groupEnd();
  }

  async function load({ url, arrayBuffer }) {
    const Loader = window.GLTFLoader || THREE.GLTFLoader;
    if (!Loader) throw new Error('GLTFLoader not ready');
    const VRMLoaderPlugin = window.VRMLoaderPlugin;
    const VRMUtils = window.VRMUtils;
    if (!VRMLoaderPlugin || !VRMUtils) {
      throw new Error('three-vrm not ready — make sure VRMLoaderPlugin is loaded before this script');
    }

    const loader = new Loader();
    loader.register((parser) => new VRMLoaderPlugin(parser, {
      // Maintain a normalized humanoid skeleton (makes Mixamo retargeting reliable)
      autoUpdateHumanBones: true,
    }));

    const gltf = await new Promise((resolve, reject) => {
      const onDone = (g) => resolve(g);
      const onErr = (e) => reject(e);
      if (arrayBuffer) loader.parse(arrayBuffer, '', onDone, onErr);
      else if (url) loader.load(url, onDone, undefined, onErr);
      else reject(new Error('Need arrayBuffer or url'));
    });

    const vrm = gltf.userData.vrm;
    if (!vrm) {
      // Plain glTF — delegate back to the original CharacterLoader so legacy
      // RPM / Mixamo characters still work alongside VRM ones.
      console.log('[VRMCharacterLoader] Not a VRM file — delegating to CharacterLoader');
      return CharacterLoader.load({ arrayBuffer, url });
    }

    logDiagnostics(vrm, gltf);

    // ────── Fix the bear-faces-backwards bug ──────
    // VRM 0.x avatars natively face -Z; rotateVRM0 spins 180° around Y to face +Z
    // (no-op on VRM 1.0). This is the line that fixes your screenshot.
    VRMUtils.rotateVRM0(vrm);

    // Optimize: merge geometries and skeletons where safe
    try { VRMUtils.removeUnnecessaryVertices(gltf.scene); } catch (e) { console.warn('removeUnnecessaryVertices failed', e); }
    try { VRMUtils.combineSkeletons(gltf.scene); } catch (e) { console.warn('combineSkeletons failed', e); }

    // Disable frustum culling (VRM bounds get stale during animation)
    vrm.scene.traverse((obj) => {
      if (obj.isMesh) {
        obj.frustumCulled = false;
        obj.castShadow = true;
        obj.receiveShadow = true;
      }
    });

    return wrap(vrm, gltf);
  }

  function wrap(vrm, gltf) {
    // ────── Outer / inner group split (matches CharacterLoader contract) ──────
    const group = new THREE.Group();
    const inner = new THREE.Group();
    group.add(inner);
    inner.add(vrm.scene);

    // ────── Center & scale: feet on ground, ~1.7 m tall, centered XZ ──────
    const box = new THREE.Box3().setFromObject(vrm.scene);
    const size = new THREE.Vector3(); box.getSize(size);
    const targetH = 1.7;
    const baseScale = targetH / Math.max(0.001, size.y);
    vrm.scene.scale.setScalar(baseScale);
    const box2 = new THREE.Box3().setFromObject(vrm.scene);
    const c2 = new THREE.Vector3(); box2.getCenter(c2);
    vrm.scene.position.x -= c2.x;
    vrm.scene.position.z -= c2.z;
    vrm.scene.position.y -= box2.min.y;

    // ────── Animation system ──────
    let mixer = null;
    const actions = {};

    // ────── Default rest pose (so we don't show T-pose at startup) ──────
    // VRoid / Mixamo / RPM authors all ship VRMs in T-pose — that's the spec.
    // We nudge the arms ~75° down so the character looks natural before any
    // animation clip loads. When a clip plays, the AnimationMixer drives these
    // bones directly and overrides our values; when no clip plays, we re-apply
    // the rest pose each frame in update() so it never drifts back to T-pose.
    const lUpperArm = vrm.humanoid?.getNormalizedBoneNode?.('leftUpperArm');
    const rUpperArm = vrm.humanoid?.getNormalizedBoneNode?.('rightUpperArm');
    const lLowerArm = vrm.humanoid?.getNormalizedBoneNode?.('leftLowerArm');
    const rLowerArm = vrm.humanoid?.getNormalizedBoneNode?.('rightLowerArm');
    const REST = {
      lUpperZ: Math.PI * 0.42,    // ~75° → arm rotates down on left side
      rUpperZ: -Math.PI * 0.42,
      lUpperY: 0.05,              // tiny inward so arms hug body
      rUpperY: -0.05,
      lLowerY: -0.1,              // slight elbow bend for relaxed look
      rLowerY: 0.1,
    };
    function applyRestPose() {
      if (lUpperArm) {
        lUpperArm.rotation.set(0, REST.lUpperY, REST.lUpperZ);
      }
      if (rUpperArm) {
        rUpperArm.rotation.set(0, REST.rUpperY, REST.rUpperZ);
      }
      if (lLowerArm) lLowerArm.rotation.set(0, REST.lLowerY, 0);
      if (rLowerArm) rLowerArm.rotation.set(0, REST.rLowerY, 0);
    }
    applyRestPose();

    function isAnyActionPlaying() {
      for (const k in actions) {
        const a = actions[k];
        if (a && a.isRunning() && a.getEffectiveWeight() > 0.01) return true;
      }
      return false;
    }

    if (gltf.animations && gltf.animations.length) {
      mixer = new THREE.AnimationMixer(vrm.scene);
      for (const clip of gltf.animations) {
        actions[clip.name.toLowerCase()] = mixer.clipAction(clip);
      }
      const idle = actions['idle'] || actions['breathing'] || actions[Object.keys(actions)[0]];
      if (idle) idle.play();
    }

    // ────── Detect available expressions for both VRM 0.x and VRM 1.0 ──────
    // VRM 1.0 uses 'aa','ih','ou','ee','oh','blink','happy','sad','angry','relaxed','surprised'
    // VRM 0.x uses 'a','i','u','e','o','blink','fun','sorrow','angry','joy','surprised'
    // three-vrm normalizes most of these to the 1.0 names automatically; we still
    // probe both to be safe across producers (VRoid Studio, Vroid Hub, Replikant…)
    const exprMgr = vrm.expressionManager;
    function findExpr(...candidates) {
      if (!exprMgr) return null;
      for (const c of candidates) {
        if (exprMgr.getExpression(c)) return c;
      }
      return null;
    }
    const exprMouth = findExpr('aa', 'a', 'A', 'ah');
    const exprBlink = findExpr('blink', 'Blink');
    const exprHappy = findExpr('happy', 'fun', 'Fun', 'joy', 'Joy');
    const exprSad   = findExpr('sad', 'sorrow', 'Sorrow');
    const exprAngry = findExpr('angry', 'Angry');
    const exprRelax = findExpr('relaxed', 'neutral', 'Neutral');
    const exprSurp  = findExpr('surprised', 'Surprised');

    // ────── Look-at target (parented to the scene root once available) ──────
    const lookAtTarget = new THREE.Object3D();
    lookAtTarget.position.set(0, 1.6, 5); // default = looking forward at camera height
    if (vrm.lookAt) vrm.lookAt.target = lookAtTarget;

    // ────── State ──────
    let mouthOpen = 0, mouthOpenTarget = 0;
    let blinkAmt = 0, blinkTimer = 1.5 + Math.random() * 3;
    const emotion = { happy: 0, sad: 0, angry: 0, surprised: 0, relaxed: 0 };
    const emotionTarget = { happy: 0, sad: 0, angry: 0, surprised: 0, relaxed: 0 };
    let _t = Math.random() * 100;

    // ────── Public API ──────
    function setMouthOpen(amp) {
      mouthOpenTarget = Math.max(0, Math.min(1, amp));
    }
    function setLookAt(v3) {
      // Convert world-space target to local-space of lookAtTarget's parent
      if (!lookAtTarget.parent) {
        lookAtTarget.position.copy(v3);
      } else {
        const local = v3.clone();
        lookAtTarget.parent.worldToLocal(local);
        lookAtTarget.position.copy(local);
      }
    }
    function setExpression(name, value) {
      // Direct expression poke (advanced — bypasses emotion smoothing)
      if (!exprMgr) return false;
      if (!exprMgr.getExpression(name)) return false;
      exprMgr.setValue(name, Math.max(0, Math.min(1, value)));
      return true;
    }
    function setEmotion(name, value) {
      // Smoothed emotion (preferred). Names: happy, sad, angry, surprised, relaxed
      if (name in emotionTarget) {
        emotionTarget[name] = Math.max(0, Math.min(1, value));
      }
    }
    function triggerGesture(name) {
      const a = actions[name?.toLowerCase?.()];
      if (a) {
        a.reset();
        a.setLoop(THREE.LoopOnce, 1);
        a.clampWhenFinished = true;
        a.fadeIn(0.2).play();
        setTimeout(() => a.fadeOut(0.4), 1100);
        return true;
      }
      return false;
    }
    function setState(state) {
      const target = actions[state];
      if (!target) return false;
      const fadeTime = 0.3;
      for (const k in actions) {
        if (k === state) continue;
        const a = actions[k];
        if (a.isRunning() && a.getEffectiveWeight() > 0) a.fadeOut(fadeTime);
      }
      target.reset();
      target.setLoop(THREE.LoopRepeat, Infinity);
      target.fadeIn(fadeTime).play();
      return true;
    }
    function hasAnimation(slot) { return !!actions[slot]; }

    // ────── Update loop ──────
    function update(dt) {
      _t += dt;

      // Late-attach lookAt target to scene root so its world matrix updates
      if (!lookAtTarget.parent) {
        let root = group;
        while (root.parent) root = root.parent;
        if (root && root !== group) root.add(lookAtTarget);
      }

      // Smooth lip-sync amplitude
      mouthOpen += (mouthOpenTarget - mouthOpen) * Math.min(1, dt * 22);
      if (exprMgr && exprMouth) {
        exprMgr.setValue(exprMouth, mouthOpen);
      }

      // Auto-blink (procedural)
      if (exprMgr && exprBlink) {
        blinkTimer -= dt;
        if (blinkTimer <= 0) {
          blinkAmt = Math.min(1, blinkAmt + dt * 14);
          if (blinkAmt >= 1) blinkTimer = 2 + Math.random() * 4;
        } else if (blinkAmt > 0) {
          blinkAmt = Math.max(0, blinkAmt - dt * 10);
        }
        // triangle wave (open → closed → open)
        const v = blinkAmt > 0.5 ? (1 - blinkAmt) * 2 : blinkAmt * 2;
        exprMgr.setValue(exprBlink, v);
      }

      // Emotion smoothing (8 Hz response)
      for (const k in emotion) {
        emotion[k] += (emotionTarget[k] - emotion[k]) * Math.min(1, dt * 8);
      }
      if (exprMgr) {
        if (exprHappy) exprMgr.setValue(exprHappy, emotion.happy);
        if (exprSad)   exprMgr.setValue(exprSad,   emotion.sad);
        if (exprAngry) exprMgr.setValue(exprAngry, emotion.angry);
        if (exprSurp)  exprMgr.setValue(exprSurp,  emotion.surprised);
        if (exprRelax) exprMgr.setValue(exprRelax, emotion.relaxed);
      }

      // Procedural breathing + rest pose (only when no clip is driving bones)
      if (!isAnyActionPlaying()) {
        // Re-apply the arm rest pose so we never show T-pose
        applyRestPose();
        // Gentle chest sway
        const breath = Math.sin(_t * 1.3) * 0.012;
        const spine = vrm.humanoid?.getNormalizedBoneNode?.('spine')
                   || vrm.humanoid?.getRawBoneNode?.('spine');
        if (spine) {
          spine.rotation.x = breath * 0.5;
        }
        // Subtle side-to-side body weight shift
        const hips = vrm.humanoid?.getNormalizedBoneNode?.('hips');
        if (hips) {
          hips.rotation.z = Math.sin(_t * 0.7) * 0.015;
        }
      }

      if (mixer) mixer.update(dt);

      // CRITICAL: this drives lookAt, expressions, AND spring bones
      vrm.update(dt);
    }

    // ────── External animation retargeting onto VRM humanoid ──────
    // Accepts both .glb / .gltf (via GLTFLoader) and .fbx (via FBXLoader) so
    // UE5 / Mixamo / Maya exports drop in without conversion. The retargeter
    // then maps Mixamo or UE5 mannequin bone names → VRM normalized humanoid.
    async function loadAnimationFromBuffer(arrayBuffer, slotOrName, fileName) {
      const isFBX = /\.fbx$/i.test(fileName || slotOrName || '')
        || sniffFBXMagic(arrayBuffer);

      let clip;
      let sourceRestPoses = null;
      let sourceBonePositions = null;

      if (isFBX) {
        if (!window.FBXLoader) throw new Error('FBXLoader not ready');
        const fbxLoader = new window.FBXLoader();
        const root = fbxLoader.parse(arrayBuffer, '');
        if (!root.animations || !root.animations.length) {
          throw new Error('No animation clips found in FBX');
        }
        clip = root.animations[0];
        const captured = collectRestPoses(root);
        sourceRestPoses = captured.restPoses;
        sourceBonePositions = captured.bonePositions;
      } else {
        const Loader = window.GLTFLoader || THREE.GLTFLoader;
        const animLoader = new Loader();
        const animGltf = await new Promise((res, rej) =>
          animLoader.parse(arrayBuffer, '', res, rej));
        if (!animGltf.animations || !animGltf.animations.length) {
          throw new Error('No animation clips found in glTF');
        }
        clip = animGltf.animations[0];
        const captured = collectRestPoses(animGltf.scene);
        sourceRestPoses = captured.restPoses;
        sourceBonePositions = captured.bonePositions;
        // Mixamo glTFs often have empty scene roots — fall back to parser cache
        if (Object.keys(sourceRestPoses).length === 0 && animGltf.parser) {
          try {
            const parser = animGltf.parser;
            const nodeCount = parser.json?.nodes?.length || 0;
            const nodes = await Promise.all(
              Array.from({ length: nodeCount }, (_, i) => parser.getDependency('node', i))
            );
            for (const n of nodes) {
              if (!n?.name || !n.quaternion) continue;
              const cleanName = n.name
                .replace(/^mixamorig:?/i, '')
                .replace(/^mixamorig\d+:?/i, '');
              if (!sourceRestPoses[cleanName]) {
                sourceRestPoses[cleanName] = n.quaternion.clone();
                if (n.position) sourceBonePositions[cleanName] = n.position.clone();
              }
            }
          } catch (e) { /* silent */ }
        }
      }

      if (!mixer) mixer = new THREE.AnimationMixer(vrm.scene);
      const retargeted = retargetClipToVRM(clip, vrm, sourceRestPoses, sourceBonePositions);
      // Set the clip name to slotOrName (the library's internal key, no
      // extension) — NOT fileName (with .FBX). Otherwise the action gets
      // registered under "lib:foo.FBX" but the library queries "lib:foo".
      retargeted.name = slotOrName || fileName || retargeted.name;
      const action = mixer.clipAction(retargeted);

      // Library mode (named clip, not a fixed slot) — register and return clip handle.
      // Slot mode (slot is one of 'idle'|'talking'|'listening'|'gestureA'|'gestureB')
      // — register under that slot for legacy compatibility.
      const KNOWN_SLOTS = ['idle', 'talking', 'listening', 'gestureA', 'gestureB'];
      if (KNOWN_SLOTS.includes(slotOrName)) {
        actions[slotOrName] = action;
        if (slotOrName === 'idle') setState('idle');
      } else {
        // Library mode — keyed by the same name the library uses
        const libKey = `lib:${slotOrName || retargeted.name}`;
        actions[libKey] = action;
      }
      return { clip: retargeted, action, name: retargeted.name };
    }

    // FBX magic-number sniffer (covers binary FBX files w/o file extension)
    function sniffFBXMagic(buf) {
      try {
        const arr = new Uint8Array(buf, 0, 23);
        const sig = String.fromCharCode(...arr.slice(0, 20));
        return sig === 'Kaydara FBX Binary  ';
      } catch (e) { return false; }
    }

    // Walk the parsed source and capture each bone's rest-pose quaternion
    // AND position. Position offsets reveal the bone's "long axis" direction
    // — needed for axis-frame alignment between rigs that orient bones
    // differently (Mixamo: bones extend +Y; bear VRM: bones extend ±X).
    function collectRestPoses(rootObj) {
      const restPoses = {};
      const bonePositions = {};
      const seenIds = new Set();
      function visit(node) {
        if (!node || seenIds.has(node.uuid)) return;
        seenIds.add(node.uuid);
        if (node.name && node.quaternion) {
          const cleanName = node.name
            .replace(/^mixamorig:?/i, '')
            .replace(/^mixamorig\d+:?/i, '');
          if (!restPoses[cleanName]) {
            restPoses[cleanName] = node.quaternion.clone();
            if (node.position) bonePositions[cleanName] = node.position.clone();
          }
        }
        if (node.children) {
          for (const c of node.children) visit(c);
        }
      }
      if (rootObj?.traverse) rootObj.traverse(visit);
      rootObj?.traverse?.((n) => {
        if (n.isSkinnedMesh && n.skeleton?.bones) {
          for (const b of n.skeleton.bones) visit(b);
        }
      });
      return { restPoses, bonePositions };
    }

    // Play a registered library clip by name (with optional fade transition).
    // Used by the Behavior Director to auto-cycle between variants.
    function playLibraryClip(name, { fade = 0.4, loop = true } = {}) {
      const key = `lib:${name}`;
      const target = actions[key];
      if (!target) return false;
      // Fade out everything currently playing
      for (const k in actions) {
        if (k === key) continue;
        const a = actions[k];
        if (a && a.isRunning() && a.getEffectiveWeight() > 0.01) a.fadeOut(fade);
      }
      target.reset();
      target.setLoop(loop ? THREE.LoopRepeat : THREE.LoopOnce, loop ? Infinity : 1);
      if (!loop) target.clampWhenFinished = true;
      target.fadeIn(fade).play();
      return true;
    }

    function listLibraryClips() {
      return Object.keys(actions)
        .filter(k => k.startsWith('lib:'))
        .map(k => k.slice(4));
    }

    function clearLibrary() {
      for (const k of Object.keys(actions)) {
        if (k.startsWith('lib:')) {
          actions[k].stop();
          delete actions[k];
        }
      }
    }

    // Unload a single slot (idle/talking/listening/gestureA/gestureB).
    // Stops the action, removes it, and if it was idle, returns to the
    // procedural rest pose (arms-down breathing). Used by the per-slot
    // Reset button in the panel.
    function unloadSlot(slot) {
      const a = actions[slot];
      if (!a) return false;
      a.stop();
      // Uncache so the AnimationMixer drops it cleanly
      try { mixer?.uncacheAction(a.getClip()); } catch (e) {}
      delete actions[slot];
      // If we just removed the active idle action, restart the procedural
      // rest pose. The update() loop checks isAnyActionPlaying() — if no
      // action is running, applyRestPose() runs each frame.
      return true;
    }

    // Detect which skeleton convention a clip is using by sampling its bone
    // names. Returns 'mixamo' | 'ue_mannequin' | 'unknown'. We only need to
    // detect once per clip — the first 30 track names are plenty.
    function detectSkeletonType(clip) {
      let mixamoHits = 0, ueHits = 0;
      const sample = clip.tracks.slice(0, 30);
      for (const t of sample) {
        const boneName = t.name.split('.')[0]
          .replace(/^mixamorig:?/i, '')
          .replace(/^mixamorig\d+:?/i, '');
        if (MIXAMO_TO_VRM[boneName]) mixamoHits++;
        if (UE_MANNEQUIN_TO_VRM[boneName]) ueHits++;
      }
      if (ueHits > mixamoHits && ueHits >= 3) return 'ue_mannequin';
      if (mixamoHits >= 3) return 'mixamo';
      return 'unknown';
    }

    function retargetClipToVRM(clip, vrm, sourceRestPoses, sourceBonePositions) {
      const skeletonType = detectSkeletonType(clip);
      const boneMap = skeletonType === 'ue_mannequin'
        ? UE_MANNEQUIN_TO_VRM
        : MIXAMO_TO_VRM;

      // ── First-frame baseline retargeting ─────────────────────────────
      // The source bone's "rest pose" stored in the FBX/glTF doesn't reflect
      // where the animator wanted the bone visually — it's just the rigger's
      // convention (e.g. Mixamo always encodes T-pose). For breathing/idle
      // animations, the animator effectively starts with arm at "hanging"
      // pose, which is encoded as a ~75° rotation away from the rest-pose
      // T-pose. If we subtract the rest pose (or worse, pass through raw),
      // we get the bear arms locked at +75° forward of natural.
      //
      // The right baseline is the animation's OWN first keyframe — wherever
      // the animator chose to start. We extract pure motion as:
      //     delta(t) = Q_keyframe(0)⁻¹ × Q_keyframe(t)
      // Then apply on top of bear's natural rest pose:
      //     Q_target(t) = R_target_rest × delta(t)
      //
      // For bear (identity rest), Q_target = delta. The animation runs as
      // small motion on top of bear's natural arms-down pose. No axis
      // alignment needed — both rigs receive the same motion relative to
      // their natural rest.
      //
      // Caveat: this works for idles/talking/listening where the first
      // frame IS the visual rest pose. For animations that START with a
      // gesture (e.g. waving while standing), the bear will start at
      // bear-rest, not at the gesture — minor visual discontinuity for the
      // first crossfade, but cleaner overall.

      let mappedCount = 0, droppedCount = 0, skippedCount = 0;
      let baselined = 0;
      const newTracks = [];
      const _tmpQ = new THREE.Quaternion();

      for (const t of clip.tracks) {
        // Track names: "BoneName.quaternion" or "BoneName.position" etc.
        const m = t.name.match(/^(.+?)\.(.+)$/);
        if (!m) { droppedCount++; continue; }
        let [, boneName, prop] = m;

        // Strip Mixamo prefix variants
        boneName = boneName
          .replace(/^mixamorig:?/i, '')
          .replace(/^mixamorig\d+:?/i, '');

        // Skip twist bones, IK control bones, weapon bones, extra spines etc.
        if (SKIP_BONE_PATTERNS.some(re => re.test(boneName))) {
          skippedCount++;
          continue;
        }

        // Lookup VRM equivalent in the active bone map
        const vrmKey = boneMap[boneName];
        if (!vrmKey) {
          // Not a humanoid bone we know how to map. For unknown skeletons
          // pass through as-is (best-effort); for known skeletons drop
          // (we already explicitly mapped what we want).
          if (skeletonType === 'unknown') {
            newTracks.push(t.clone());
          } else {
            droppedCount++;
          }
          continue;
        }

        let vrmNode = null;
        if (vrm.humanoid) {
          vrmNode = vrm.humanoid.getNormalizedBoneNode?.(vrmKey)
                 || vrm.humanoid.getRawBoneNode?.(vrmKey);
        }
        if (!vrmNode) {
          // VRM doesn't have this bone (e.g. fingers on a no-finger model).
          droppedCount++;
          continue;
        }

        const newTrack = t.clone();
        newTrack.name = `${vrmNode.name}.${prop}`;

        // ── Skeleton bind-pose workaround (hips + legs) ──────────────────
        // Mocap skeletons (UE5 mannequin, Mixamo) are human-proportioned with
        // long legs (~90 cm) and a specific hip rest pose. Non-human VRMs
        // like our bears have very different proportions (~30 cm legs, wider
        // hips, lower center of mass). When we apply the source's hip/leg
        // rotations to the target, the math evaluates correctly but the
        // visual result is grotesque — bear lifts foot to chest height,
        // sinks below ground, spins in place, etc.
        //
        // For talking-in-place kiosk animations the lower body should stay
        // rooted anyway, so we drop hips + legs entirely. Upper body (spine,
        // chest, neck, head, shoulders, arms, hands, fingers) still animates
        // because its motion is computed RELATIVE to its parent — and as
        // long as the chain of relative motions matches, the upper body
        // looks correct. We trade locomotion (no walking) for stability,
        // which is the right tradeoff for a chatbot kiosk.
        const LOWER_BODY = new Set([
          'hips',
          'leftUpperLeg', 'leftLowerLeg', 'leftFoot', 'leftToes',
          'rightUpperLeg', 'rightLowerLeg', 'rightFoot', 'rightToes',
        ]);
        if (LOWER_BODY.has(vrmKey)) {
          droppedCount++;
          continue;
        }

        // Drop position tracks for everything (we already excluded hips above)
        if (prop === 'position') {
          droppedCount++;
          continue;
        }

        // ── First-frame baseline + motion damping for upper body ────
        // 1) Subtract first keyframe → pure motion delta from baseline
        // 2) Damp delta by configurable factor (default 0.55) — Mixamo's
        //    gestures are authored for human-proportioned characters with
        //    long arms. Bear has short arms and rounded torso, so the same
        //    rotation amount makes hands clip into body. Damping scales
        //    rotations down (slerp from identity towards delta).
        // 3) Apply on top of bear's rest pose
        //
        // Applied to ALL upper-body bones (arms + spine chain + head) — for
        // bones like head/neck where Mixamo's first frame may be a non-rest
        // pose (e.g. looking down for Happy_Idle), baseline subtraction is
        // critical to avoid permanent offset like "kepala tunduk".
        const UPPER_BODY_BASELINE = new Set([
          'leftShoulder', 'rightShoulder',
          'leftUpperArm', 'leftLowerArm', 'leftHand',
          'rightUpperArm', 'rightLowerArm', 'rightHand',
          // Spine chain + head — same baseline issue applies
          'spine', 'chest', 'upperChest', 'neck', 'head',
        ]);
        if (prop === 'quaternion' && UPPER_BODY_BASELINE.has(vrmKey)
            && newTrack.values && newTrack.values.length >= 8) {
          const v = newTrack.values;
          const q0 = new THREE.Quaternion(v[0], v[1], v[2], v[3]);
          const q0Inv = q0.clone().invert();
          const tgtNode = vrm.humanoid?.getNormalizedBoneNode?.(vrmKey)
                       || vrm.humanoid?.getRawBoneNode?.(vrmKey);
          const R_tgt = tgtNode?.quaternion?.clone() || new THREE.Quaternion();

          // Damping factor — set window.metaspeakArmDamping to override.
          // 1.0 = full Mixamo motion (gestures may clip body)
          // 0.55 = 55% motion (default — natural for bear proportions)
          // 0.3 = subtle, very calm
          const damping = (typeof window !== 'undefined' && typeof window.metaspeakArmDamping === 'number')
            ? window.metaspeakArmDamping
            : 0.55;
          const _identity = new THREE.Quaternion();

          for (let i = 0; i < v.length; i += 4) {
            _tmpQ.set(v[i], v[i+1], v[i+2], v[i+3]);
            // delta = q0⁻¹ × Q_keyframe (motion relative to first frame)
            _tmpQ.premultiply(q0Inv);
            // Damp: slerp from identity towards delta by damping factor
            if (damping !== 1.0) {
              _tmpQ.slerpQuaternions(_identity, _tmpQ.clone(), damping);
            }
            // Q_target = R_tgt × delta_damped
            _tmpQ.premultiply(R_tgt);
            v[i]   = _tmpQ.x;
            v[i+1] = _tmpQ.y;
            v[i+2] = _tmpQ.z;
            v[i+3] = _tmpQ.w;
          }
          baselined++;
        }

        newTracks.push(newTrack);
        mappedCount++;
      }

      console.log(
        `[VRMCharacterLoader] Retargeted clip "${clip.name}" — skeleton=${skeletonType}, ` +
        `mapped=${mappedCount}, dropped=${droppedCount}, skipped=${skippedCount}, ` +
        `first-frame-baselined=${baselined}`
      );

      const cloned = clip.clone();
      cloned.tracks = newTracks;
      cloned.resetDuration();
      return cloned;
    }

    // ────── Diagnostics published for the panel UI ──────
    const diagnostics = {
      vrmVersion: vrm.meta?.metaVersion ?? '0.0',
      title: vrm.meta?.title || vrm.meta?.name || null,
      author: vrm.meta?.author || null,
      expressionCount: exprMgr ? exprMgr.expressions.length : 0,
      expressionsAvailable: {
        mouth: exprMouth, blink: exprBlink, happy: exprHappy,
        sad: exprSad, angry: exprAngry, relaxed: exprRelax, surprised: exprSurp,
      },
      springBoneCount: vrm.springBoneManager?.joints?.size
                    ?? vrm.springBoneManager?.springBones?.length
                    ?? 0,
      humanoidBoneCount: Object.keys(
        vrm.humanoid?.humanBones || vrm.humanoid?._rawHumanBones || {}
      ).length,
      hasLookAt: !!vrm.lookAt,
      animationCount: Object.keys(actions).length,
      mouthMode: exprMouth ? `expression:${exprMouth}` : 'none',
    };

    return {
      group, inner,
      update, setMouthOpen, setLookAt, triggerGesture, setExpression, setEmotion,
      setState, hasAnimation,
      isGLB: true,
      isVRM: true,
      hasClipIdle: !!actions.idle,
      animationNames: Object.keys(actions),
      mouthMode: diagnostics.mouthMode,
      diagnostics,
      loadAnimationFromBuffer,
      unloadSlot,
      // Library mode (multi-clip, behavior-director driven)
      playLibraryClip,
      listLibraryClips,
      clearLibrary,
      // Direct VRM access for advanced users
      _vrm: vrm,
    };
  }

  return { load };
})();

window.VRMCharacterLoader = VRMCharacterLoader;