2025年10月17日金曜日

GPU文字起こしWhisperX(超下書き記事)

 

下を順番に1行ずつ実行(全部 PowerShell)。

  1. Python 3.11 を Chocolatey で入れる
    choco install -y python311

  2. 新しい Python で venv を作り直す(既存があれば消してOK)
    & "C:\Python311\python.exe" -m venv C:\asr_venv

  3. pip を更新
    & "C:\asr_venv\Scripts\python.exe" -m pip install --upgrade pip setuptools wheel

  4. CUDA 12.6 用の PyTorch を入れる(3.11 対応)
    & "C:\asr_venv\Scripts\python.exe" -m pip install --index-url https://download.pytorch.org/whl/cu126 torch==2.7.1+cu126 torchvision==0.22.1+cu126 torchaudio==2.7.1+cu126

  5. WhisperX と補助ライブラリ
    & "C:\asr_venv\Scripts\python.exe" -m pip install whisperx==3.7.2 webrtcvad-wheels==2.0.14 pydub librosa==0.10.2.post1 soundfile scikit-learn speechbrain

  6. 動作確認
    & "C:\asr_venv\Scripts\python.exe" -c "import ssl,torch; print('ssl OK'); print('torch',torch.__version__,'CUDA',torch.version.cuda)"

以降、スクリプト中の Python パスは C:\asr_venv\Scripts\python.exe に合わせてください(transcribe_noalign.ps1dragdrop_diarize.ps1$Python 変数など)。

 

文字起こしスクリプト 
C:\asr_tools\transcribe_noalign.ps1

param(
  [Parameter(ValueFromRemainingArguments = $true)]
  [string[]]$InputPaths
)
$KeepJson = $true   # ← JSONを残すなら $true、消すなら $false
$ErrorActionPreference = "Stop"

# --- logging ---
$logPath = Join-Path $PSScriptRoot ("run_{0:yyyyMMdd_HHmmss}.log" -f (Get-Date))
try { Start-Transcript -Path $logPath -Append | Out-Null } catch {}

# --- settings ---
$Python = "C:\asr_venv\Scripts\python.exe"

# WhisperX arguments (no alignment) → JSONだけを出力
$WhisperArgsCommon = @(
  "-m","whisperx",
  "--model","large-v3",
  "--language","ja",
  "--compute_type","float16",
  "--vad_method","silero",
  "--no_align",
  "--output_format","json"
)

# メタデータ読み
$UseQuickTimeUTC    = $true   # iPhone/QuickTimeのUTC→JSTズレ対策
$RecurseFolders     = $false
$ManualTZShiftHours = 0       # 追加手動補正(不要なら 0)

# タイムスタンプ入りTXT
$TimestampStyle     = "start" # "start" or "range"

# --- exiftool auto-detect ---
$ExifTool = $null
$cmd = $null
try { $cmd = Get-Command exiftool -ErrorAction SilentlyContinue } catch {}
if ($cmd -and $cmd.Source) {
  $ExifTool = $cmd.Source
} else {
  foreach ($c in @(
    "C:\ProgramData\chocolatey\bin\exiftool.exe",
    "C:\ProgramData\chocolatey\lib\exiftool\tools\exiftool.exe"
  )) { if (Test-Path $c) { $ExifTool = $c; break } }
}
if (-not $ExifTool) {
  Write-Error "exiftool not found. Install: choco install exiftool -y"
  try { Stop-Transcript | Out-Null } catch {}
  exit 1
}

function Get-MediaDateTime([string]$path) {
  $tags = @("MediaCreateDate","CreateDate","TrackCreateDate","ModifyDate","FileCreateDate")
  foreach ($t in $tags) {
    $cmdArgs = @()
    if ($UseQuickTimeUTC) { $cmdArgs += "-api"; $cmdArgs += "QuickTimeUTC=1" }
    $cmdArgs += "-s"; $cmdArgs += "-s"; $cmdArgs += "-s"
    $cmdArgs += "-d"; $cmdArgs += "%Y-%m-%d %H:%M:%S"
    $cmdArgs += "-$t"; $cmdArgs += "--"; $cmdArgs += $path
    $out = & $ExifTool @cmdArgs 2>$null
    if ($LASTEXITCODE -eq 0 -and $out) {
      $line = ($out | Where-Object { $_ -and ($_.Trim().Length -gt 0) } | Select-Object -First 1)
      if ($line) { try { return [datetime]::Parse($line) } catch {} }
    }
  }
  return $null
}

function Format-JpName([datetime]$dt) {
  $y=$dt.Year; $m=$dt.Month; $d=$dt.Day; $h=$dt.Hour; $mm = "{0:D2}" -f $dt.Minute
  return "{0}年{1}月{2}日{3}時{4}分" -f $y,$m,$d,$h,$mm
}

function Get-TargetFiles([string[]]$paths) {
  $files = New-Object System.Collections.Generic.List[string]
  foreach ($p in $paths) {
    if (-not $p) { continue }
    try { $full = (Resolve-Path -LiteralPath $p).Path } catch { Write-Warning "Path not found: $p"; continue }
    if (Test-Path -LiteralPath $full -PathType Leaf) {
      if ($full.ToLower().EndsWith(".m4a")) { $files.Add($full) }
    } elseif (Test-Path -LiteralPath $full -PathType Container) {
      $opt = @{ LiteralPath=$full; File=$true; Filter="*.m4a" }
      if ($RecurseFolders) { Get-ChildItem @opt -Recurse | % { $files.Add($_.FullName) } }
      else                 { Get-ChildItem @opt           | % { $files.Add($_.FullName) } }
    }
  }
  return $files
}

# --- HH:MM:SS フォーマッタ(秒→文字列, ミリ秒なし) ---
function Format-HHMMSS([double]$sec) {
  if ($sec -lt 0) { $sec = 0 }
  $ts = [TimeSpan]::FromSeconds([math]::Round($sec))
  $hrs = $ts.Hours + ($ts.Days * 24)
  return "{0:D2}:{1:D2}:{2:D2}" -f $hrs, $ts.Minutes, $ts.Seconds
}

# --- JSON→timestamped.txt(整列なしでもOK) ---
function Convert-JsonToTimestampedTxt([string]$jsonPath, [string]$txtOutPath, [string]$timeStyle = "start") {
  if (-not (Test-Path -LiteralPath $jsonPath)) { return $false }
  try {
    $json = Get-Content -LiteralPath $jsonPath -Raw -Encoding UTF8 | ConvertFrom-Json -ErrorAction Stop
  } catch {
    Write-Warning "JSON parse failed: $jsonPath"
    return $false
  }
  if (-not $json.segments) { return $false }

  $out = New-Object System.Collections.Generic.List[string]
  foreach ($seg in $json.segments) {
    $text = ("" + $seg.text).Trim()
    if ($text -eq "") { continue }
    $start = Format-HHMMSS([double]$seg.start)
    $end   = Format-HHMMSS([double]$seg.end)
    if ($timeStyle -eq "range") { $out.Add(("[{0}-{1}] {2}" -f $start,$end,$text)) }
    else                        { $out.Add(("[{0}] {1}"     -f $start,$text)) }
  }
  if ($out.Count -gt 0) { $out | Set-Content -LiteralPath $txtOutPath -Encoding UTF8; return $true }
  return $false
}

# ---- input checks ----
if (-not $InputPaths -or $InputPaths.Count -eq 0) {
  Write-Host "Drag & drop .m4a files or folders onto this script."
  try { Stop-Transcript | Out-Null } catch {}
  exit 1
}
$files = Get-TargetFiles $InputPaths
if ($files.Count -eq 0) {
  Write-Host "No .m4a files found."
  try { Stop-Transcript | Out-Null } catch {}
  exit 1
}

# ---- main loop ----
foreach ($f in $files) {
  try {
    $dir  = [System.IO.Path]::GetDirectoryName($f)
    $base = [System.IO.Path]::GetFileName($f)

    # 1) rename by media datetime
    $dt = Get-MediaDateTime $f
    if ($null -eq $dt) {
      Write-Warning "[$base] no media datetime; fallback to file CreationTime"
      $dt = (Get-Item -LiteralPath $f).CreationTime
    }
    if ($ManualTZShiftHours -ne 0) { $dt = $dt.AddHours($ManualTZShiftHours) }
    $jp = Format-JpName($dt)
    $newName = "$jp.m4a"
    $newPath = [System.IO.Path]::Combine($dir, $newName)
    if ($newPath -ne $f) {
      $i=1
      while (Test-Path -LiteralPath $newPath) {
        $newName = "{0}({1}).m4a" -f $jp,$i
        $newPath = [System.IO.Path]::Combine($dir, $newName)
        $i++
      }
      Rename-Item -LiteralPath $f -NewName $newName
      $f = $newPath
      $base = $newName
      Write-Host "Renamed: $base"
    } else {
      Write-Host "Already named: $base"
    }

    # 2) transcribe (no alignment) → JSONのみ
    $outDir = [System.IO.Path]::Combine($dir, "_transcripts")
    if (-not (Test-Path -LiteralPath $outDir)) { New-Item -ItemType Directory -Path $outDir | Out-Null }

    $args = @()
    $args += $WhisperArgsCommon
    $args += "--output_dir"; $args += $outDir
    $args += $f

    & $Python @args
    if ($LASTEXITCODE -ne 0) {
      Write-Warning "WhisperX failed: $base (exit $LASTEXITCODE)"
      continue
    }

    Write-Host "Done: $base -> $outDir"

    # 3) JSON → timestamped.txt
    $stem   = [System.IO.Path]::GetFileNameWithoutExtension($base)
    $json1  = Join-Path $outDir ($base + ".json")                # 例: 2025年…m4a.json
    $json2  = Join-Path $outDir ($stem + ".json")                # 例: 2025年….json(ゆれ対策)
    $json3  = $null
    if (-not (Test-Path -LiteralPath $json1) -and -not (Test-Path -LiteralPath $json2)) {
      $json3 = Get-ChildItem -LiteralPath $outDir -Filter ("{0}*.json" -f $stem) -File -ErrorAction SilentlyContinue |
               Sort-Object LastWriteTime -Descending | Select-Object -First 1
      if ($json3) { $json3 = $json3.FullName }
    }
    $jsonPath = $null
    foreach ($c in @($json1,$json2,$json3)) { if ($c -and (Test-Path -LiteralPath $c)) { $jsonPath = $c; break } }

    $tsTxt = Join-Path $outDir ($base + ".timestamped.txt")
    $ok = $false
    if ($jsonPath) {
      $ok = Convert-JsonToTimestampedTxt -jsonPath $jsonPath -txtOutPath $tsTxt -timeStyle $TimestampStyle
    } else {
      Write-Warning "JSON not found for: $base"
    }

    if ($ok) {
      Write-Host "Timestamped TXT created: $tsTxt"

# 4) 最小掃除:JSONだけ消す(KeepJson=falseのときだけ)
if (-not $KeepJson) {
  try {
    $jsonToDelete = @()
    if (Test-Path -LiteralPath $json1) { $jsonToDelete += $json1 }
    if (Test-Path -LiteralPath $json2) { $jsonToDelete += $json2 }
    if ($json3 -and (Test-Path -LiteralPath $json3)) { $jsonToDelete += $json3 }
    $jsonToDelete = $jsonToDelete | Select-Object -Unique
    foreach ($p in $jsonToDelete) {
      if ($p -ne $tsTxt) { Remove-Item -LiteralPath $p -Force -ErrorAction SilentlyContinue }
    }
  } catch {
    Write-Warning "cleanup failed: $($_.Exception.Message)"
  }
}
    } else {
      Write-Warning "Could not create timestamped TXT (no JSON or parse failed)."
    }

  } catch {
    Write-Warning "Error: $f"
    Write-Warning $_.Exception.Message
  }
}

try { Stop-Transcript | Out-Null } catch {}
Write-Host ("ExitCode: {0}" -f 0)

ドラッグアンドドロップ対応バッチファイル
C:\asr_tools\ドラッグして文字起こし.cmd

@echo off
setlocal

REM この CMD と同じフォルダの PS1 を呼び出します
set "SCRIPT=%~dp0transcribe_noalign.ps1"

if not exist "%SCRIPT%" (
  echo Missing: "%SCRIPT%"
  echo C:\asr_tools に transcribe_noalign.ps1 があるか確認してください。
  pause
  exit /b 1
)

REM ドラッグ&ドロップで渡された全引数(%*)をそのまま PS1 に渡します
powershell -NoProfile -ExecutionPolicy Bypass -File "%SCRIPT%" %*

set "EC=%ERRORLEVEL%"
echo(
echo Done. ExitCode=%EC%
pause
 

0 件のコメント:

コメントを投稿