Developer Guide

Speech to Text API with Australian Data Residency: Python Guide

A practical walkthrough for Python developers. Submit audio, poll for results, extract speaker labels, and handle errors, all on Australian infrastructure.

Why data residency matters for Australian developers

Most transcription APIs (AssemblyAI, Deepgram, OpenAI Whisper) process audio on US infrastructure. For Australian developers building products for Australian clients, that creates a problem: sending audio containing personal information offshore triggers APP 8 obligations under the Privacy Act 1988 (Cth). APP 8 requires you to take reasonable steps to ensure overseas recipients handle the data in line with the Australian Privacy Principles, which is an obligation most teams aren't set up to manage.

Australian Transcription processes all audio exclusively on AWS infrastructure in Sydney. Your data never leaves Australia, so APP 8 obligations are never triggered. The API pattern is similar to other async transcription services, so the integration is straightforward if you've used any of them before.

Prerequisites

  • Python 3.8 or later
  • requests library (pip install requests)
  • An Australian Transcription API key (sign up free, no credit card required)

Basic example: submit a file and get the transcript

The API is async: you submit a file and receive a job ID, then poll for completion. Here's a minimal working example:

transcribe_basic.py
import time
import requests

API_KEY = "your_api_key_here"
BASE_URL = "https://api.icana.ai/api/v1"
HEADERS = {"X-API-Key": API_KEY}


def transcribe(audio_path: str) -> str:
    """Submit an audio file and return the completed transcript."""

    # Step 1: Submit the file
    with open(audio_path, "rb") as f:
        response = requests.post(
            f"{BASE_URL}/transcribe",
            headers=HEADERS,
            files={"file": f},
            data={"language": "en", "num_speakers": 2},
        )
    response.raise_for_status()
    job_id = response.json()["job_id"]
    print(f"Job submitted: {job_id}")

    # Step 2: Poll until complete (max 60 attempts, ~5 min)
    for attempt in range(60):
        result = requests.get(
            f"{BASE_URL}/jobs/{job_id}",
            headers=HEADERS,
        )
        result.raise_for_status()
        data = result.json()

        status = data["status"]
        print(f"Status: {status}")

        if status == "complete":
            return data["transcription"]
        elif status == "failed":
            raise RuntimeError(f"Transcription failed: {data.get('error')}")

        time.sleep(5)

    raise TimeoutError(f"Job {job_id} did not complete within 5 minutes")


if __name__ == "__main__":
    transcript = transcribe("recording.mp3")
    print("\nTranscript:")
    print(transcript)

Using the prompt parameter for domain-specific vocabulary

The prompt parameter accepts a comma-separated list of terms you want the model to recognise correctly. This is particularly useful for medical or legal terminology, product names, or proper nouns that the base model might transcribe incorrectly.

transcribe_with_vocab.py
import requests

API_KEY = "your_api_key_here"
BASE_URL = "https://api.icana.ai/api/v1"
HEADERS = {"X-API-Key": API_KEY}

# Medical vocabulary example
MEDICAL_TERMS = (
    "metformin, HbA1c, hypertension, dyslipidaemia, "
    "myocardial infarction, electrocardiogram, spirometry"
)

# Legal vocabulary example
LEGAL_TERMS = (
    "indemnification, tortfeasor, liquidated damages, "
    "Anton Piller, Mareva injunction, subrogation"
)


def transcribe_with_vocab(audio_path: str, vocab: str) -> str:
    with open(audio_path, "rb") as f:
        response = requests.post(
            f"{BASE_URL}/transcribe",
            headers=HEADERS,
            files={"file": f},
            data={
                "language": "en",
                "num_speakers": 2,
                "prompt": vocab,
            },
        )
    response.raise_for_status()
    return response.json()["job_id"]


# Submit a medical consultation recording
job_id = transcribe_with_vocab("consultation.mp3", MEDICAL_TERMS)
print(f"Job ID: {job_id}")

Extracting speaker diarization output

When the job completes, the response includes a diarization field that contains the transcript broken into speaker-labelled segments, each with start and end timestamps.

extract_speakers.py
import time
import requests

API_KEY = "your_api_key_here"
BASE_URL = "https://api.icana.ai/api/v1"
HEADERS = {"X-API-Key": API_KEY}


def get_job_result(job_id: str) -> dict:
    """Poll until complete and return the full result (max 60 attempts, ~5 min)."""
    for attempt in range(60):
        response = requests.get(
            f"{BASE_URL}/jobs/{job_id}",
            headers=HEADERS,
        )
        response.raise_for_status()
        data = response.json()

        if data["status"] == "complete":
            return data
        elif data["status"] == "failed":
            raise RuntimeError(f"Job failed: {data.get('error')}")

        time.sleep(5)

    raise TimeoutError(f"Job {job_id} did not complete within 5 minutes")


def print_diarized_transcript(job_id: str):
    result = get_job_result(job_id)

    # diarization is a list of segments
    segments = result.get("diarization", [])
    if not segments:
        # Fall back to plain transcript if diarization unavailable
        print(result.get("transcription", ""))
        return

    for segment in segments:
        speaker = segment.get("speaker", "Unknown")
        start = segment.get("start", 0)
        end = segment.get("end", 0)
        text = segment.get("text", "")

        # Format: [Speaker 1 | 0:00 - 0:15] Hello, how are you feeling today?
        start_fmt = f"{int(start // 60)}:{int(start % 60):02d}"
        end_fmt = f"{int(end // 60)}:{int(end % 60):02d}"
        print(f"[{speaker} | {start_fmt} - {end_fmt}] {text}")


# Example usage (after submitting a job)
# job_id = "job_abc123"
# print_diarized_transcript(job_id)

Error handling

Two error codes worth handling explicitly:

  • 429 Too Many Requests: you've exceeded the rate limit. Back off and retry. The response includes a Retry-After header.
  • 402 Payment Required: your free credit has been exhausted. Top up your account to continue.
transcribe_with_errors.py
import time
import requests

API_KEY = "your_api_key_here"
BASE_URL = "https://api.icana.ai/api/v1"
HEADERS = {"X-API-Key": API_KEY}


def submit_transcription(audio_path: str, max_retries: int = 3) -> str:
    """Submit a transcription job with retry logic for rate limiting."""
    for attempt in range(max_retries):
        with open(audio_path, "rb") as f:
            response = requests.post(
                f"{BASE_URL}/transcribe",
                headers=HEADERS,
                files={"file": f},
                data={"language": "en"},
            )

        if response.status_code == 200:
            return response.json()["job_id"]

        elif response.status_code == 429:
            # Rate limited — respect Retry-After header if present
            retry_after = int(response.headers.get("Retry-After", 10))
            print(f"Rate limited. Retrying in {retry_after}s...")
            time.sleep(retry_after)

        elif response.status_code == 402:
            raise RuntimeError(
                "Insufficient credit. Top up your account at "
                "https://australiantranscription.com.au/billing"
            )

        elif response.status_code == 401:
            raise ValueError("Invalid API key. Check your X-API-Key header.")

        else:
            response.raise_for_status()

    raise RuntimeError(f"Failed after {max_retries} attempts.")

Checking your usage

You can query your current usage summary at any time:

check_usage.py
import requests

API_KEY = "your_api_key_here"
BASE_URL = "https://api.icana.ai/api/v1"
HEADERS = {"X-API-Key": API_KEY}


def get_usage():
    response = requests.get(
        f"{BASE_URL}/usage",
        headers=HEADERS,
    )
    response.raise_for_status()
    return response.json()


usage = get_usage()
print(usage)

API reference summary

Endpoint Method Description
/api/v1/transcribe POST Submit audio for transcription. Returns job_id.
/api/v1/jobs/{'{job_id}'} GET Poll job status. Returns status, transcription, and diarization when complete.
/api/v1/usage GET Get account usage summary.

POST /api/v1/transcribe fields

  • file: audio file (multipart/form-data, required)
  • language: ISO 639-1 language code, default "en"
  • prompt: comma-separated vocabulary hints (optional)
  • num_speakers: number of speakers, 1 to 10, default 2

Full API documentation is available at /docs.

Get your free API key

Sign up and get 60 minutes of free transcription. No credit card required. Australian data residency included.