Speech-to-Text Transcription
Automatic speech-to-text transcription for subtitles, captions, and content indexing
Automatically transcribe speech from your videos with advanced AI-powered speech recognition. Generate accurate transcripts with automatic language detection for subtitles, closed captions, accessibility, and content indexing.
Overview
VideoCascade provides high-quality speech-to-text transcription powered by state-of-the-art AI models. When enabled, the system:
- Extracts audio from video at optimal quality (16kHz mono MP3)
- Transcribes speech with automatic language detection
- Returns transcript with full text, detected language, and downloadable transcript URL
Perfect for generating subtitles, enabling search, improving accessibility, and indexing video content.
Enabling Transcription
Set enableTranscription: true in your video processing request:
curl -X POST https://api.videocascade.com/v1/videos \
-H "Authorization: Bearer vca_your_api_key" \
-H "Content-Type: application/json" \
-d '{
"fileUrl": "https://example.com/video.mp4",
"enableTranscription": true
}'const response = await fetch('https://api.videocascade.com/v1/videos', {
method: 'POST',
headers: {
'Authorization': 'Bearer vca_your_api_key',
'Content-Type': 'application/json',
},
body: JSON.stringify({
fileUrl: 'https://example.com/video.mp4',
enableTranscription: true,
}),
});
const data = await response.json();
console.log(`Video ID: ${data.videoId}`);import requests
response = requests.post(
'https://api.videocascade.com/v1/videos',
headers={
'Authorization': 'Bearer vca_your_api_key',
'Content-Type': 'application/json',
},
json={
'fileUrl': 'https://example.com/video.mp4',
'enableTranscription': True,
}
)
data = response.json()
print(f"Video ID: {data['videoId']}")interface VideoRequest {
fileUrl: string;
enableTranscription?: boolean;
}
const request: VideoRequest = {
fileUrl: 'https://example.com/video.mp4',
enableTranscription: true,
};
const response = await fetch('https://api.videocascade.com/v1/videos', {
method: 'POST',
headers: {
'Authorization': 'Bearer vca_your_api_key',
'Content-Type': 'application/json',
},
body: JSON.stringify(request),
});
const data = await response.json();
console.log(`Video ID: ${data.videoId}`);Response Structure
The transcript data is included in the video object under the transcript field:
interface Transcript {
status: 'pending' | 'processing' | 'completed' | 'failed';
text?: string; // Full transcript text
language?: string; // Detected language code (e.g., 'en', 'es', 'fr')
transcriptUrl?: string; // S3 URL to download full transcript
}
interface VideoResponse {
videoId: string;
status: 'queued' | 'running' | 'succeeded' | 'failed';
progressPercent?: number;
hasTranscript: boolean; // true if transcription was requested
transcript?: Transcript;
// ... other fields
}{
"videoId": "v_abc12345",
"status": "succeeded",
"progressPercent": 100,
"finalVideoUrl": "https://storage.example.com/videos/final.mp4",
"hasTranscript": true,
"transcript": {
"status": "completed",
"text": "Welcome to this tutorial on video processing. Today we'll cover how to automatically transcribe your videos using AI. First, let's discuss why transcription is important for accessibility and SEO...",
"language": "en",
"transcriptUrl": "https://storage.example.com/transcripts/v_abc12345.txt"
},
"enableTranscription": true,
"createdAt": "2025-11-23T10:30:00Z",
"lastUpdatedAt": "2025-11-23T10:32:15Z"
}Transcript Status States
| Status | Description |
|---|---|
pending | Transcription queued but not started yet |
processing | Currently extracting audio and transcribing |
completed | Transcription finished successfully, text available |
failed | Transcription failed (check errorMessage in video response) |
Retrieving Transcripts
Option 1: Get Video Status
Retrieve transcript with video status:
const response = await fetch(
`https://api.videocascade.com/v1/videos/v_abc12345`,
{
headers: {
'Authorization': 'Bearer vca_your_api_key',
},
}
);
const video = await response.json();
if (video.hasTranscript && video.transcript.status === 'completed') {
console.log('Language:', video.transcript.language);
console.log('Transcript:', video.transcript.text);
console.log('Download URL:', video.transcript.transcriptUrl);
}response = requests.get(
'https://api.videocascade.com/v1/videos/v_abc12345',
headers={'Authorization': 'Bearer vca_your_api_key'}
)
video = response.json()
if video.get('hasTranscript') and video['transcript']['status'] == 'completed':
print(f"Language: {video['transcript']['language']}")
print(f"Transcript: {video['transcript']['text']}")
print(f"Download URL: {video['transcript']['transcriptUrl']}")Option 2: Dedicated Transcript Endpoint
Retrieve only the transcript data:
curl -X GET https://api.videocascade.com/v1/videos/v_abc12345/transcript \
-H "Authorization: Bearer vca_your_api_key"const response = await fetch(
'https://api.videocascade.com/v1/videos/v_abc12345/transcript',
{
headers: {
'Authorization': 'Bearer vca_your_api_key',
},
}
);
const transcript = await response.json();
console.log(transcript);
// {
// "videoId": "v_abc12345",
// "text": "Welcome to this tutorial...",
// "language": "en",
// "transcriptUrl": "https://storage.example.com/transcripts/v_abc12345.txt",
// "createdAt": "2025-11-23T10:32:15Z"
// }response = requests.get(
'https://api.videocascade.com/v1/videos/v_abc12345/transcript',
headers={'Authorization': 'Bearer vca_your_api_key'}
)
transcript = response.json()
print(transcript)interface TranscriptResponse {
videoId: string;
text: string;
language: string;
transcriptUrl: string;
createdAt: string;
}
const response = await fetch(
'https://api.videocascade.com/v1/videos/v_abc12345/transcript',
{
headers: {
'Authorization': 'Bearer vca_your_api_key',
},
}
);
const transcript: TranscriptResponse = await response.json();
console.log(transcript.text);Supported Languages
We automatically detect and transcribes 50+ languages:
| Language | Code | Language | Code |
|---|---|---|---|
| English | en | Spanish | es |
| French | fr | German | de |
| Italian | it | Portuguese | pt |
| Chinese | zh | Japanese | ja |
| Korean | ko | Arabic | ar |
| Hindi | hi | Turkish | tr |
| Polish | pl | Swedish | sv |
| Danish | da | Norwegian | no |
| Finnish | fi | Greek | el |
| and more |
Automatic Detection: You don't need to specify the language - we automatically detects it and returns the language code in the response.
Use Cases
Generate Subtitles/Captions
Create SRT or VTT subtitle files from transcripts:
// Transcribe video
const response = await fetch('https://api.videocascade.com/v1/videos', {
method: 'POST',
headers: {
'Authorization': 'Bearer vca_your_api_key',
'Content-Type': 'application/json',
},
body: JSON.stringify({
fileUrl: 'https://example.com/video.mp4',
enableTranscription: true,
}),
});
const { videoId } = await response.json();
// Wait for completion
const video = await waitForCompletion(videoId);
// Convert to SRT format
function convertToSRT(text, wordsPerSubtitle = 10) {
const words = text.split(' ');
let srt = '';
let counter = 1;
let time = 0;
for (let i = 0; i < words.length; i += wordsPerSubtitle) {
const subtitle = words.slice(i, i + wordsPerSubtitle).join(' ');
const startTime = formatTime(time);
time += 3; // 3 seconds per subtitle
const endTime = formatTime(time);
srt += `${counter}\n${startTime} --> ${endTime}\n${subtitle}\n\n`;
counter++;
}
return srt;
}
function formatTime(seconds) {
const h = Math.floor(seconds / 3600).toString().padStart(2, '0');
const m = Math.floor((seconds % 3600) / 60).toString().padStart(2, '0');
const s = (seconds % 60).toString().padStart(2, '0');
return `${h}:${m}:${s},000`;
}
const srtContent = convertToSRT(video.transcript.text);
console.log(srtContent);Video Content Search
Make video content searchable:
// Transcribe and index videos
async function indexVideoContent(videoUrl) {
const response = await fetch('https://api.videocascade.com/v1/videos', {
method: 'POST',
headers: {
Authorization: 'Bearer vca_your_api_key',
'Content-Type': 'application/json',
},
body: JSON.stringify({
fileUrl: videoUrl,
enableTranscription: true,
enableAiAnalysis: true, // Also get tags
}),
});
const { videoId } = await response.json();
const video = await waitForCompletion(videoId);
// Index in search engine
await searchEngine.index({
id: videoId,
transcript: video.transcript.text,
language: video.transcript.language,
tags: video.analysis.data.tags,
// Full-text search
searchableContent: video.transcript.text,
});
}
// Users can now search video content by spoken words
const results = await searchEngine.search('tutorial video processing');Accessibility Compliance
Provide transcripts for accessibility:
// Generate accessible video page
async function createAccessibleVideoPage(videoId) {
const video = await getVideo(videoId);
const html = `
<!DOCTYPE html>
<html lang="${video.transcript.language}">
<head>
<title>Video - ${video.videoName}</title>
</head>
<body>
<video controls src="${video.finalVideoUrl}">
<track kind="captions" src="${video.transcript.transcriptUrl}" />
</video>
<section>
<h2>Transcript</h2>
<p>${video.transcript.text}</p>
</section>
<section>
<h2>Download Options</h2>
<ul>
<li><a href="${video.finalVideoUrl}">Download Video</a></li>
<li><a href="${video.transcript.transcriptUrl}">Download Transcript</a></li>
</ul>
</section>
</body>
</html>
`;
return html;
}Meeting Notes & Summaries
Auto-generate meeting notes from recordings:
// Transcribe meeting
const response = await fetch('https://api.videocascade.com/v1/videos', {
method: 'POST',
headers: {
Authorization: 'Bearer vca_your_api_key',
'Content-Type': 'application/json',
},
body: JSON.stringify({
fileUrl: 'https://example.com/meeting-recording.mp4',
enableTranscription: true,
removeNoise: true, // Clean audio
normalizeAudio: true, // Balance speaker volumes
}),
});
const { videoId } = await response.json();
const video = await waitForCompletion(videoId);
// Extract action items using AI
const actionItems = await extractActionItems(video.transcript.text);
// Send meeting notes
await sendEmail({
to: 'team@example.com',
subject: 'Meeting Notes - Nov 23',
body: `
Full Transcript:
${video.transcript.text}
Action Items:
${actionItems.map(item => `- ${item}`).join('\n')}
Recording: ${video.finalVideoUrl}
Transcript: ${video.transcript.transcriptUrl}
`,
});SEO Optimization
Improve video SEO with transcripts:
// Add transcript to page for search engines
async function renderVideoPage(videoId) {
const video = await getVideo(videoId);
return `
<article>
<h1>${video.videoName}</h1>
<video controls src="${video.finalVideoUrl}"></video>
<!-- Schema.org structured data for search engines -->
<script type="application/ld+json">
{
"@context": "https://schema.org",
"@type": "VideoObject",
"name": "${video.videoName}",
"description": "${video.analysis?.data?.summary || ''}",
"contentUrl": "${video.finalVideoUrl}",
"transcript": "${video.transcript.text}",
"inLanguage": "${video.transcript.language}"
}
</script>
<!-- Visible transcript for users and crawlers -->
<section>
<h2>Transcript</h2>
<p>${video.transcript.text}</p>
</section>
</article>
`;
}Combining with Other Features
Transcription works alongside other video processing features:
// Full video processing pipeline
const response = await fetch('https://api.videocascade.com/v1/videos', {
method: 'POST',
headers: {
'Authorization': 'Bearer vca_your_api_key',
'Content-Type': 'application/json',
},
body: JSON.stringify({
fileUrl: 'https://example.com/video.mp4',
// AI features
enableTranscription: true, // Speech-to-text
enableAiAnalysis: true, // Visual analysis
enableThumbnail: true, // Generate thumbnail
// Audio enhancement
normalizeAudio: true, // Consistent volume
removeNoise: true, // Clean background
removeSilence: true, // Remove pauses
// Video processing
aspectRatio: '16:9', // Format for YouTube
compressionQuality: 95, // High quality
// Webhook notification
webhookUrl: 'https://yourapp.com/webhooks/video-complete'
}),
});# Full video processing pipeline
response = requests.post(
'https://api.videocascade.com/v1/videos',
headers={
'Authorization': 'Bearer vca_your_api_key',
'Content-Type': 'application/json',
},
json={
'fileUrl': 'https://example.com/video.mp4',
# AI features
'enableTranscription': True, # Speech-to-text
'enableAiAnalysis': True, # Visual analysis
'enableThumbnail': True, # Generate thumbnail
# Audio enhancement
'normalizeAudio': True, # Consistent volume
'removeNoise': True, # Clean background
'removeSilence': True, # Remove pauses
# Video processing
'aspectRatio': '16:9', # Format for YouTube
'compressionQuality': 95, # High quality
# Webhook notification
'webhookUrl': 'https://yourapp.com/webhooks/video-complete'
}
)Best Practices
1. Improve Audio Quality First
Clean audio produces better transcripts:
// ✅ Good: Clean audio before transcription
{
fileUrl: 'https://example.com/video.mp4',
enableTranscription: true,
normalizeAudio: true, // Consistent volume
removeNoise: true, // Remove background noise
}
// ❌ Less effective: Transcribe raw audio
{
fileUrl: 'https://example.com/noisy-video.mp4',
enableTranscription: true,
}2. Use Webhooks for Long Videos
Don't poll - use webhooks for efficient notification:
// ✅ Good: Use webhook
{
fileUrl: 'https://example.com/long-video.mp4',
enableTranscription: true,
webhookUrl: 'https://yourapp.com/webhooks/transcription-complete'
}3. Store Transcript URL in Database
Save the transcript URL for easy retrieval:
// After transcription completes
await db.videos.update({
id: videoId,
transcriptText: video.transcript.text,
transcriptUrl: video.transcript.transcriptUrl,
language: video.transcript.language,
hasTranscript: true,
});
// Later retrieval
const video = await db.videos.findOne({ id: videoId });
console.log(video.transcriptUrl);4. Handle Multiple Languages
Support multilingual content:
const video = await getVideo(videoId);
// Display transcript in user's language
const displayLanguage = {
en: 'English',
es: 'Spanish',
fr: 'French',
de: 'German',
// ... etc
}[video.transcript.language];
console.log(`Transcript available in ${displayLanguage}`);5. Cache Transcripts
Avoid repeated API calls:
// Check cache first
const cached = await redis.get(`transcript:${videoId}`);
if (cached) {
return JSON.parse(cached);
}
// Fetch and cache
const response = await fetch(
`https://api.videocascade.com/v1/videos/${videoId}/transcript`,
{
headers: { Authorization: 'Bearer vca_your_api_key' },
}
);
const transcript = await response.json();
await redis.set(
`transcript:${videoId}`,
JSON.stringify(transcript),
'EX',
86400 * 7 // Cache 7 days
);Limitations & Constraints
Duration Limits
- Maximum duration: 10 minutes
- Recommended: Under 5 minutes for best results
- Long videos: Consider splitting into segments
10 Minute Limit: Videos longer than 10 minutes will have transcription disabled automatically. Split long videos into segments for transcription.
Language Support
Best accuracy:
- English, Spanish, French, German, Italian
- Mandarin Chinese, Japanese, Korean
- Portuguese, Dutch, Russian
Moderate accuracy:
- Arabic, Hindi, Turkish, Polish
- Most European languages
Limited accuracy:
- Rare languages or dialects
- Heavy accents or non-native speakers
- Technical jargon or domain-specific terms
Audio Quality Requirements
Required:
- Clear speech (not mumbled or whispered)
- Minimal background noise
- Single speaker preferred (multi-speaker works but may be less accurate)
Not suitable for:
- Music transcription
- Multiple overlapping speakers
- Very noisy environments (construction, traffic)
- Very quiet or distant speech
Error Handling
Handle common error scenarios:
async function transcribeVideoSafely(videoUrl) {
try {
const response = await fetch('https://api.videocascade.com/v1/videos', {
method: 'POST',
headers: {
'Authorization': 'Bearer vca_your_api_key',
'Content-Type': 'application/json',
},
body: JSON.stringify({
fileUrl: videoUrl,
enableTranscription: true,
normalizeAudio: true, // Improve audio quality
}),
});
if (!response.ok) {
throw new Error(`API error: ${response.status}`);
}
const data = await response.json();
const videoId = data.videoId;
// Wait for transcription
const result = await waitForCompletion(videoId);
if (!result.hasTranscript) {
return {
success: false,
error: 'Transcription not available',
text: '',
};
}
if (result.transcript.status === 'failed') {
return {
success: false,
error: result.errorMessage || 'Transcription failed',
text: '',
};
}
return {
success: true,
text: result.transcript.text,
language: result.transcript.language,
url: result.transcript.transcriptUrl,
};
} catch (error) {
console.error('Error transcribing video:', error);
return {
success: false,
error: error.message,
text: '',
};
}
}
// Usage
const result = await transcribeVideoSafely(videoUrl);
if (result.success) {
console.log('Transcript:', result.text);
} else {
console.error('Failed:', result.error);
}def transcribe_video_safely(video_url):
"""Transcribe video with error handling"""
try:
response = requests.post(
'https://api.videocascade.com/v1/videos',
headers={
'Authorization': 'Bearer vca_your_api_key',
'Content-Type': 'application/json',
},
json={
'fileUrl': video_url,
'enableTranscription': True,
'normalizeAudio': True, # Improve audio quality
}
)
response.raise_for_status()
data = response.json()
video_id = data['videoId']
# Wait for transcription
result = wait_for_completion(video_id)
if not result.get('hasTranscript'):
return {
'success': False,
'error': 'Transcription not available',
'text': ''
}
if result['transcript']['status'] == 'failed':
return {
'success': False,
'error': result.get('errorMessage', 'Transcription failed'),
'text': ''
}
return {
'success': True,
'text': result['transcript']['text'],
'language': result['transcript']['language'],
'url': result['transcript']['transcriptUrl']
}
except Exception as error:
print(f"Error transcribing video: {error}")
return {
'success': False,
'error': str(error),
'text': ''
}
# Usage
result = transcribe_video_safely(video_url)
if result['success']:
print(f"Transcript: {result['text']}")
else:
print(f"Failed: {result['error']}")Common Error Messages
| Error | Cause | Solution |
|---|---|---|
| "Audio file too large" | Extracted audio > 25MB | Use shorter video or lower quality source |
| "Failed to extract audio" | Video has no audio track | Ensure video contains audio |
| "Transcription timed out" | OpenAI API timeout (60s) | Retry or use shorter video |
| "OpenAI API key not configured" | Missing API key | Configure OPENAI_API_KEY environment variable |
| "Video does not have a transcript" | Transcription not enabled | Set enableTranscription: true |