Transcribe Audio Input with Firebase Firestore and Storage through OpenAI

Hello World!

import { onDocumentCreated } from "firebase-functions/v2/firestore";
import admin from "firebase-admin";
import { logger } from "firebase-functions";
import OpenAI from "openai";
import fs from "fs";
import path from "path";
import axios from "axios";
import fileType from "file-type";

// Initialize Firebase Admin SDK
admin.initializeApp();

async function fetchFileOld(url: string): Promise<string> {
  const response = await axios({
    url,
    method: "GET",
    responseType: "stream",
  });

  const urlPath = new URL(url).pathname;
  const extension = path.extname(urlPath);
  const timestamp = Date.now();
  const outputPath = path.join(__dirname, `output_${timestamp}${extension}`);

  const stream = fs.createWriteStream(outputPath);
  await new Promise<void>((resolve, reject) => {
    response.data.pipe(stream);
    response.data.on("error", reject);
    stream.on("finish", resolve);
  });

  return outputPath;
}

async function fetchFileOld2(url: string): Promise<string> {
  const response = await axios({
    url,
    method: "GET",
    responseType: "arraybuffer",
  });

  const urlPath = new URL(url).pathname;
  const timestamp = Date.now();
  const outputPath = path.join(__dirname, `output_${timestamp}.m4a`);

  fs.writeFileSync(outputPath, response.data);

  return outputPath;
}

async function fetchFile(url: string): Promise<string> {
  const response = await axios({
    url,
    method: "GET",
    responseType: "arraybuffer",
  });

  const type = await fileType.fileTypeFromBuffer(response.data);
  const extension = type ? type.ext : "bin";
  const mime = type ? type.mime : "application/octet-stream";

  const timestamp = Date.now();
  const outputPath = path.join(__dirname, `output_${timestamp}.${extension}`);

  fs.writeFileSync(outputPath, response.data);

  console.log(`File saved as ${outputPath} with MIME type ${mime}`);

  return outputPath;
}

// Firestore Trigger for Document Creation
export const noteWasCreatedByUser = onDocumentCreated(
  "notes/{noteId}",
  async (event) => {
    const snapshot = event.data;
    if (!snapshot) {
      logger.error("No data associated with the event");
      return;
    }
    const data = snapshot.data();
    if (!data) {
      logger.error("No data available on the event object.");
      return;
    }
    const noteId = event.params.noteId;
    logger.info(`Note ID: ${noteId}`);

    const audioPath = data.audioPath;
    logger.info(`Audio path: ${audioPath}`);

    try {
      const downloadedFilePath = await fetchFile(audioPath);
      if (!process.env.OPENAI_API_KEY) {
        throw new Error("OPENAI_API_KEY is not defined");
      }

      const openai = new OpenAI({
        organization: process.env.OPENAI_ORG_ID,
        project: process.env.OPENAI_PROJECT_ID,
        apiKey: process.env.OPENAI_API_KEY,
      });

      const transcriptionResult = await openai.audio.transcriptions.create({
        file: fs.createReadStream(downloadedFilePath),
        model: "whisper-1",
      });
      const db = admin.firestore();
      await db.collection("notes").doc(noteId).update({
        transcript: transcriptionResult.text,
        status: "SUCCESS",
        isReady: true,
      });

      logger.info(`Transcription completed and saved for note ID: ${noteId}`);
    } catch (error) {
      logger.error(
        `Failed to process audio file for note ID: ${noteId}`,
        error
      );
    }
  }
);