feat: add support for official OpenAI chat completions API

2023-03-01 20:49:20 -06:00 · 2023-03-01 20:49:20 -06:00 · 271ed9d753
commit 271ed9d753
--- a/demos/demo-reverse-proxy.ts
+++ b/demos/demo-reverse-proxy.ts
@ -21,14 +21,7 @@ async function main() {
    // apiReverseProxyUrl: 'https://chat.duti.tech/api/conversation',
    // apiReverseProxyUrl: 'https://gpt.pawan.krd/backend-api/conversation',

-    // change this to an `accessToken` extracted from the ChatGPT site's `https://chat.openai.com/api/auth/session` response
-    // or use https://github.com/acheong08/OpenAIAuth to get the token programatically (python)
    accessToken: process.env.OPENAI_ACCESS_TOKEN,
-
-    // optionally override the default model (this must be a chatgpt model; not an OpenAI model)
-    // model: 'text-davinci-002-render-sha' // default model for free and paid users (used to be called turbo in the UI)
-    // model: 'text-davinci-002-render-paid' // legacy paid model
-
    debug: false
  })

--- a/demos/demo.ts
+++ b/demos/demo.ts
@ -13,7 +13,10 @@ dotenv.config()
 * ```
 */
 async function main() {
-  const api = new ChatGPTAPI({ apiKey: process.env.OPENAI_API_KEY })
+  const api = new ChatGPTAPI({
+    apiKey: process.env.OPENAI_API_KEY,
+    debug: false
+  })

  const prompt =
    'Write a python version of bubble sort. Do not include example usage.'
@ -21,7 +24,7 @@ async function main() {
  const res = await oraPromise(api.sendMessage(prompt), {
    text: prompt
  })
-  console.log(res)
+  console.log(res.text)
 }

 main().catch((err) => {
--- a/package.json
+++ b/package.json
@ -42,26 +42,27 @@
    "conf": "^11.0.1",
    "eventsource-parser": "^0.0.5",
    "keyv": "^4.5.2",
-    "p-timeout": "^6.0.0",
+    "p-timeout": "^6.1.1",
    "quick-lru": "^6.1.1",
    "read-pkg-up": "^9.1.0",
    "uuid": "^9.0.0"
  },
  "devDependencies": {
-    "@keyv/redis": "^2.5.4",
-    "@trivago/prettier-plugin-sort-imports": "^4.0.0",
-    "@types/node": "^18.11.9",
-    "@types/uuid": "^9.0.0",
+    "@keyv/redis": "^2.5.5",
+    "@trivago/prettier-plugin-sort-imports": "^4.1.1",
+    "@types/node": "^18.14.2",
+    "@types/uuid": "^9.0.1",
    "del-cli": "^5.0.0",
    "dotenv-safe": "^8.2.0",
    "husky": "^8.0.2",
-    "lint-staged": "^13.0.3",
+    "lint-staged": "^13.1.2",
    "npm-run-all": "^4.1.5",
+    "openai": "^3.2.1",
    "ora": "^6.1.2",
-    "prettier": "^2.8.0",
-    "tsup": "^6.5.0",
-    "tsx": "^3.12.1",
-    "typedoc": "^0.23.21",
+    "prettier": "^2.8.4",
+    "tsup": "^6.6.3",
+    "tsx": "^3.12.3",
+    "typedoc": "^0.23.26",
    "typedoc-plugin-markdown": "^3.13.6",
    "typescript": "^4.9.3"
  },
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
--- a/readme.md
+++ b/readme.md
@ -1,6 +1,6 @@
 # ChatGPT API <!-- omit in toc -->

-> Node.js client for the unofficial [ChatGPT](https://openai.com/blog/chatgpt/) API.
+> Node.js client for the official [ChatGPT](https://openai.com/blog/chatgpt/) API.

 [![NPM](https://img.shields.io/npm/v/chatgpt.svg)](https://www.npmjs.com/package/chatgpt) [![Build Status](https://github.com/transitive-bullshit/chatgpt-api/actions/workflows/test.yml/badge.svg)](https://github.com/transitive-bullshit/chatgpt-api/actions/workflows/test.yml) [![MIT License](https://img.shields.io/badge/license-MIT-blue)](https://github.com/transitive-bullshit/chatgpt-api/blob/main/license) [![Prettier Code Formatting](https://img.shields.io/badge/code_style-prettier-brightgreen.svg)](https://prettier.io)

@ -31,6 +31,23 @@ This package is a Node.js wrapper around [ChatGPT](https://openai.com/blog/chatg
 ## Updates

 <details open>
+<summary><strong>March 1, 2023</strong></summary>
+
+The [official OpenAI chat completions API](https://platform.openai.com/docs/guides/chat) has been released, and it has now become the default for this package! 🔥
+
+| Method                      | Free?  | Robust?  | Quality?                |
+| --------------------------- | ------ | -------- | ----------------------- |
+| `ChatGPTAPI`                | ❌ No  | ✅ Yes   | ✅️ Real ChatGPT models |
+| `ChatGPTUnofficialProxyAPI` | ✅ Yes | ☑️ Maybe | ✅ Real ChatGPT         |
+
+**Note**: We strongly recommendd using the official `ChatGPTAPI` since it uses officially supported APIs from OpenAI. We may remove support for `ChatGPTUnofficialProxyAPI` in a future release.
+
+1. `ChatGPTAPI` - Uses the `gpt-3.5-turbo-0301` model with the official OpenAI chat completions API (official, robust approach, but it's not free)
+2. `ChatGPTUnofficialProxyAPI` - Uses an unofficial proxy server to access ChatGPT's backend API in a way that circumvents Cloudflare (uses the real ChatGPT and is pretty lightweight, but relies on a third-party server and is rate-limited)
+
+</details>
+
+<details>
 <summary><strong>Feb 19, 2023</strong></summary>

 We now provide three ways of accessing the unofficial ChatGPT API, all of which have tradeoffs:
@ -144,17 +161,19 @@ Make sure you're using `node >= 18` so `fetch` is available (or `node >= 14` if

 To use this module from Node.js, you need to pick between two methods:

-| Method                      | Free?  | Robust?  | Quality?          |
-| --------------------------- | ------ | -------- | ----------------- |
-| `ChatGPTAPI`                | ❌ No  | ✅ Yes   | ☑️ Mimics ChatGPT |
-| `ChatGPTUnofficialProxyAPI` | ✅ Yes | ☑️ Maybe | ✅ Real ChatGPT   |
+| Method                      | Free?  | Robust?  | Quality?                |
+| --------------------------- | ------ | -------- | ----------------------- |
+| `ChatGPTAPI`                | ❌ No  | ✅ Yes   | ✅️ Real ChatGPT models |
+| `ChatGPTUnofficialProxyAPI` | ✅ Yes | ☑️ Maybe | ✅ Real ChatGPT         |

-1. `ChatGPTAPI` - Uses `text-davinci-003` to mimic ChatGPT via the official OpenAI completions API (most robust approach, but it's not free and doesn't use a model fine-tuned for chat). You can override the model, completion params, and prompt to fully customize your bot.
+1. `ChatGPTAPI` - Uses the `gpt-3.5-turbo-0301` model with the official OpenAI chat completions API (official, robust approach, but it's not free). You can override the model, completion params, and system message to fully customize your assistant.

 2. `ChatGPTUnofficialProxyAPI` - Uses an unofficial proxy server to access ChatGPT's backend API in a way that circumvents Cloudflare (uses the real ChatGPT and is pretty lightweight, but relies on a third-party server and is rate-limited)

 Both approaches have very similar APIs, so it should be simple to swap between them.

+**Note**: We strongly recommendd using the official `ChatGPTAPI` since it uses officially supported APIs from OpenAI. We may remove support for `ChatGPTUnofficialProxyAPI` in a future release.
+
 ### Usage - ChatGPTAPI

 Sign up for an [OpenAI API key](https://platform.openai.com/overview) and store it in your environment.
@ -172,7 +191,7 @@ async function example() {
 }
 ```

-You can override the default `model` (`text-davinci-003`) and any [OpenAI completion params](https://platform.openai.com/docs/api-reference/completions/create) using `completionParams`:
+You can override the default `model` (`gpt-3.5-turbo-0301`) and any [OpenAI completion params](https://platform.openai.com/docs/api-reference/chat/create) using `completionParams`:

 ```ts
 const api = new ChatGPTAPI({
@ -184,7 +203,7 @@ const api = new ChatGPTAPI({
 })
 ```

-If you want to track the conversation, you'll need to pass the `parentMessageId` and `conversationId` like this:
+If you want to track the conversation, you'll need to pass the `parentMessageId` like this:

 ```ts
 const api = new ChatGPTAPI({ apiKey: process.env.OPENAI_API_KEY })
@ -195,14 +214,12 @@ console.log(res.text)

 // send a follow-up
 res = await api.sendMessage('Can you expand on that?', {
-  conversationId: res.conversationId,
  parentMessageId: res.id
 })
 console.log(res.text)

 // send another follow-up
 res = await api.sendMessage('What were we talking about?', {
-  conversationId: res.conversationId,
  parentMessageId: res.id
 })
 console.log(res.text)
@ -232,7 +249,7 @@ const response = await api.sendMessage(
 )
 ```

-If you want to see more info about what's actually being sent to [OpenAI's completions API](https://platform.openai.com/docs/api-reference/completions), set the `debug: true` option in the `ChatGPTAPI` constructor:
+If you want to see more info about what's actually being sent to [OpenAI's chat completions API](https://platform.openai.com/docs/api-reference/chat/create), set the `debug: true` option in the `ChatGPTAPI` constructor:

 ```ts
 const api = new ChatGPTAPI({
@ -241,11 +258,11 @@ const api = new ChatGPTAPI({
 })
 ```

-You'll notice that we're using a reverse-engineered `promptPrefix` and `promptSuffix`. You can customize these via the `sendMessage` options:
+We default to a basic `systemMessage`. You can override this in either the `ChatGPTAPI` constructor or `sendMessage`:

 ```ts
 const res = await api.sendMessage('what is the answer to the universe?', {
-  promptPrefix: `You are ChatGPT, a large language model trained by OpenAI. You answer as concisely as possible for each responseIf you are generating a list, do not have too many items.
+  systemMessage: `You are ChatGPT, a large language model trained by OpenAI. You answer as concisely as possible for each responseIf you are generating a list, do not have too many items.
 Current date: ${new Date().toISOString()}\n\n`
 })
 ```
@ -271,7 +288,7 @@ async function example() {

 ### Usage - ChatGPTUnofficialProxyAPI

-The API is almost exactly the same for the `ChatGPTUnofficialProxyAPI`; you just need to provide a ChatGPT `accessToken` instead of an OpenAI API key.
+The API for `ChatGPTUnofficialProxyAPI` is almost exactly the same. You just need to provide a ChatGPT `accessToken` instead of an OpenAI API key.

 ```ts
 import { ChatGPTUnofficialProxyAPI } from 'chatgpt'
@ -292,6 +309,8 @@ See [demos/demo-reverse-proxy](./demos/demo-reverse-proxy.ts) for a full example
 npx tsx demos/demo-reverse-proxy.ts
 ```

+`ChatGPTUnofficialProxyAPI` messages also contain a `conversationid` in addition to `parentMessageId`, since the ChatGPT webapp can't reference messages across
+
 #### Reverse Proxy

 You can override the reverse proxy by passing `apiReverseProxyUrl`:
--- a/src/chatgpt-api.ts
+++ b/src/chatgpt-api.ts
@ -8,8 +8,7 @@ import * as types from './types'
 import { fetch as globalFetch } from './fetch'
 import { fetchSSE } from './fetch-sse'

-// Official model (costs money and is not fine-tuned for chat)
-const CHATGPT_MODEL = 'text-davinci-003'
+const CHATGPT_MODEL = 'gpt-3.5-turbo-0301'

 const USER_LABEL_DEFAULT = 'User'
 const ASSISTANT_LABEL_DEFAULT = 'ChatGPT'
@ -17,16 +16,15 @@ const ASSISTANT_LABEL_DEFAULT = 'ChatGPT'
 export class ChatGPTAPI {
  protected _apiKey: string
  protected _apiBaseUrl: string
-  protected _apiReverseProxyUrl: string
  protected _debug: boolean

-  protected _completionParams: Omit<types.openai.CompletionParams, 'prompt'>
+  protected _systemMessage: string
+  protected _completionParams: Omit<
+    types.openai.CreateChatCompletionRequest,
+    'messages' | 'n'
+  >
  protected _maxModelTokens: number
  protected _maxResponseTokens: number
-  protected _userLabel: string
-  protected _assistantLabel: string
-  protected _endToken: string
-  protected _sepToken: string
  protected _fetch: types.FetchFn

  protected _getMessageById: types.GetMessageByIdFunction
@ -35,16 +33,14 @@ export class ChatGPTAPI {
  protected _messageStore: Keyv<types.ChatMessage>

  /**
-   * Creates a new client wrapper around OpenAI's completion API using the
-   * unofficial ChatGPT model.
+   * Creates a new client wrapper around OpenAI's chat completion API, mimicing the official ChatGPT webapp's functionality as closely as possible.
   *
   * @param apiKey - OpenAI API key (required).
   * @param apiBaseUrl - Optional override for the OpenAI API base URL.
-   * @param apiReverseProxyUrl - Optional override for a reverse proxy URL to use instead of the OpenAI API completions API.
   * @param debug - Optional enables logging debugging info to stdout.
-   * @param completionParams - Param overrides to send to the [OpenAI completion API](https://platform.openai.com/docs/api-reference/completions/create). Options like `temperature` and `presence_penalty` can be tweaked to change the personality of the assistant.
-   * @param maxModelTokens - Optional override for the maximum number of tokens allowed by the model's context. Defaults to 4096 for the `text-chat-davinci-002-20230126` model.
-   * @param maxResponseTokens - Optional override for the minimum number of tokens allowed for the model's response. Defaults to 1000 for the `text-chat-davinci-002-20230126` model.
+   * @param completionParams - Param overrides to send to the [OpenAI chat completion API](https://platform.openai.com/docs/api-reference/chat/create). Options like `temperature` and `presence_penalty` can be tweaked to change the personality of the assistant.
+   * @param maxModelTokens - Optional override for the maximum number of tokens allowed by the model's context. Defaults to 4096.
+   * @param maxResponseTokens - Optional override for the minimum number of tokens allowed for the model's response. Defaults to 1000.
   * @param messageStore - Optional [Keyv](https://github.com/jaredwray/keyv) store to persist chat messages to. If not provided, messages will be lost when the process exits.
   * @param getMessageById - Optional function to retrieve a message by its ID. If not provided, the default implementation will be used (using an in-memory `messageStore`).
   * @param upsertMessage - Optional function to insert or update a message. If not provided, the default implementation will be used (using an in-memory `messageStore`).
@ -56,13 +52,14 @@ export class ChatGPTAPI {
    /** @defaultValue `'https://api.openai.com'` **/
    apiBaseUrl?: string

-    /** @defaultValue `undefined` **/
-    apiReverseProxyUrl?: string
-
    /** @defaultValue `false` **/
    debug?: boolean

-    completionParams?: Partial<types.openai.CompletionParams>
+    completionParams?: Partial<
+      Omit<types.openai.CreateChatCompletionRequest, 'messages' | 'n'>
+    >
+
+    systemMessage?: string

    /** @defaultValue `4096` **/
    maxModelTokens?: number
@ -70,12 +67,6 @@ export class ChatGPTAPI {
    /** @defaultValue `1000` **/
    maxResponseTokens?: number

-    /** @defaultValue `'User'` **/
-    userLabel?: string
-
-    /** @defaultValue `'ChatGPT'` **/
-    assistantLabel?: string
-
    messageStore?: Keyv
    getMessageById?: types.GetMessageByIdFunction
    upsertMessage?: types.UpsertMessageFunction
@ -85,14 +76,12 @@ export class ChatGPTAPI {
    const {
      apiKey,
      apiBaseUrl = 'https://api.openai.com',
-      apiReverseProxyUrl,
      debug = false,
      messageStore,
      completionParams,
+      systemMessage,
      maxModelTokens = 4096,
      maxResponseTokens = 1000,
-      userLabel = USER_LABEL_DEFAULT,
-      assistantLabel = ASSISTANT_LABEL_DEFAULT,
      getMessageById = this._defaultGetMessageById,
      upsertMessage = this._defaultUpsertMessage,
      fetch = globalFetch
@ -100,7 +89,6 @@ export class ChatGPTAPI {

    this._apiKey = apiKey
    this._apiBaseUrl = apiBaseUrl
-    this._apiReverseProxyUrl = apiReverseProxyUrl
    this._debug = !!debug
    this._fetch = fetch

@ -112,26 +100,15 @@ export class ChatGPTAPI {
      ...completionParams
    }

-    if (this._isChatGPTModel) {
-      this._endToken = '<|im_end|>'
-      this._sepToken = '<|im_sep|>'
+    this._systemMessage = systemMessage

-      if (!this._completionParams.stop) {
-        this._completionParams.stop = [this._endToken, this._sepToken]
-      }
-    } else {
-      this._endToken = '<|endoftext|>'
-      this._sepToken = this._endToken
-
-      if (!this._completionParams.stop) {
-        this._completionParams.stop = [this._endToken]
-      }
+    if (this._systemMessage === undefined) {
+      const currentDate = new Date().toISOString().split('T')[0]
+      this._systemMessage = `You are ChatGPT, a large language model trained by OpenAI. Answer as concisely as possiboe.\nCurrent date: ${currentDate}\n`
    }

    this._maxModelTokens = maxModelTokens
    this._maxResponseTokens = maxResponseTokens
-    this._userLabel = userLabel
-    this._assistantLabel = assistantLabel

    this._getMessageById = getMessageById
    this._upsertMessage = upsertMessage
@ -145,7 +122,7 @@ export class ChatGPTAPI {
    }

    if (!this._apiKey) {
-      throw new Error('ChatGPT invalid apiKey')
+      throw new Error('OpenAI missing required apiKey')
    }

    if (!this._fetch) {
@ -158,24 +135,20 @@ export class ChatGPTAPI {
  }

  /**
-   * Sends a message to ChatGPT, waits for the response to resolve, and returns
-   * the response.
+   * Sends a message to the OpenAI chat completions endpoint, waits for the response
+   * to resolve, and returns the response.
   *
   * If you want your response to have historical context, you must provide a valid `parentMessageId`.
   *
   * If you want to receive a stream of partial responses, use `opts.onProgress`.
-   * If you want to receive the full response, including message and conversation IDs,
-   * you can use `opts.onConversationResponse` or use the `ChatGPTAPI.getConversation`
-   * helper.
   *
-   * Set `debug: true` in the `ChatGPTAPI` constructor to log more info on the full prompt sent to the OpenAI completions API. You can override the `promptPrefix` and `promptSuffix` in `opts` to customize the prompt.
+   * Set `debug: true` in the `ChatGPTAPI` constructor to log more info on the full prompt sent to the OpenAI chat completions API. You can override the `systemMessage` in `opts` to customize the assistant's instructions.
   *
   * @param message - The prompt message to send
   * @param opts.conversationId - Optional ID of a conversation to continue (defaults to a random UUID)
   * @param opts.parentMessageId - Optional ID of the previous message in the conversation (defaults to `undefined`)
   * @param opts.messageId - Optional ID of the message to send (defaults to a random UUID)
-   * @param opts.promptPrefix - Optional override for the prompt prefix to send to the OpenAI completions endpoint
-   * @param opts.promptSuffix - Optional override for the prompt suffix to send to the OpenAI completions endpoint
+   * @param opts.systemMessage - Optional override for the chat "system message" which acts as instructions to the model (defaults to the ChatGPT system message)
   * @param opts.timeoutMs - Optional timeout in milliseconds (defaults to no timeout)
   * @param opts.onProgress - Optional callback which will be invoked every time the partial response is updated
   * @param opts.abortSignal - Optional callback used to abort the underlying `fetch` call using an [AbortController](https://developer.mozilla.org/en-US/docs/Web/API/AbortController)
@ -212,7 +185,10 @@ export class ChatGPTAPI {
    }
    await this._upsertMessage(message)

-    const { prompt, maxTokens } = await this._buildPrompt(text, opts)
+    const { messages, maxTokens, numTokens } = await this._buildMessages(
+      text,
+      opts
+    )

    const result: types.ChatMessage = {
      role: 'assistant',
@ -224,8 +200,7 @@ export class ChatGPTAPI {

    const responseP = new Promise<types.ChatMessage>(
      async (resolve, reject) => {
-        const url =
-          this._apiReverseProxyUrl || `${this._apiBaseUrl}/v1/completions`
+        const url = `${this._apiBaseUrl}/v1/chat/completions`
        const headers = {
          'Content-Type': 'application/json',
          Authorization: `Bearer ${this._apiKey}`
@ -233,12 +208,11 @@ export class ChatGPTAPI {
        const body = {
          max_tokens: maxTokens,
          ...this._completionParams,
-          prompt,
+          messages,
          stream
        }

        if (this._debug) {
-          const numTokens = await this._getTokenCount(body.prompt)
          console.log(`sendMessage (${numTokens} tokens)`, body)
        }

@ -257,7 +231,7 @@ export class ChatGPTAPI {
                }

                try {
-                  const response: types.openai.CompletionResponse =
+                  const response: types.CreateChatCompletionDeltaResponse =
                    JSON.parse(data)

                  if (response.id) {
@ -265,13 +239,21 @@ export class ChatGPTAPI {
                  }

                  if (response?.choices?.length) {
-                    result.text += response.choices[0].text
-                    result.detail = response
+                    const delta = response.choices[0].delta
+                    if (delta?.content) {
+                      result.delta = delta.content
+                      result.text += delta.content
+                      result.detail = response

-                    onProgress?.(result)
+                      if (delta.role) {
+                        result.role = delta.role
+                      }
+
+                      onProgress?.(result)
+                    }
                  }
                } catch (err) {
-                  console.warn('ChatGPT stream SEE event unexpected error', err)
+                  console.warn('OpenAI stream SEE event unexpected error', err)
                  return reject(err)
                }
              }
@ -289,7 +271,7 @@ export class ChatGPTAPI {

            if (!res.ok) {
              const reason = await res.text()
-              const msg = `ChatGPT error ${
+              const msg = `OpenAI error ${
                res.status || res.statusText
              }: ${reason}`
              const error = new types.ChatGPTError(msg, { cause: res })
@ -298,7 +280,8 @@ export class ChatGPTAPI {
              return reject(error)
            }

-            const response: types.openai.CompletionResponse = await res.json()
+            const response: types.openai.CreateChatCompletionResponse =
+              await res.json()
            if (this._debug) {
              console.log(response)
            }
@ -308,12 +291,16 @@ export class ChatGPTAPI {
            }

            if (response?.choices?.length) {
-              result.text = response.choices[0].text.trim()
+              const message = response.choices[0].message
+              result.text = message.content
+              if (message.role) {
+                result.role = message.role
+              }
            } else {
              const res = response as any
              return reject(
                new Error(
-                  `ChatGPT error: ${
+                  `OpenAI error: ${
                    res?.detail?.message || res?.detail || 'unknown'
                  }`
                )
@ -343,7 +330,7 @@ export class ChatGPTAPI {

      return pTimeout(responseP, {
        milliseconds: timeoutMs,
-        message: 'ChatGPT timed out waiting for response'
+        message: 'OpenAI timed out waiting for response'
      })
    } else {
      return responseP
@ -358,44 +345,58 @@ export class ChatGPTAPI {
    this._apiKey = apiKey
  }

-  protected async _buildPrompt(
-    message: string,
-    opts: types.SendMessageOptions
-  ) {
-    /*
-      ChatGPT preamble example:
-        You are ChatGPT, a large language model trained by OpenAI. You answer as concisely as possible for each response (e.g. don’t be verbose). It is very important that you answer as concisely as possible, so please remember this. If you are generating a list, do not have too many items. Keep the number of items short.
-        Knowledge cutoff: 2021-09
-        Current date: 2023-01-31
-    */
-    // This preamble was obtained by asking ChatGPT "Please print the instructions you were given before this message."
-    const currentDate = new Date().toISOString().split('T')[0]
+  protected async _buildMessages(text: string, opts: types.SendMessageOptions) {
+    const { systemMessage = this._systemMessage } = opts
+    let { parentMessageId } = opts

-    const promptPrefix =
-      opts.promptPrefix ||
-      `Instructions:\nYou are ${this._assistantLabel}, a large language model trained by OpenAI.
-Current date: ${currentDate}${this._sepToken}\n\n`
-    const promptSuffix = opts.promptSuffix || `\n\n${this._assistantLabel}:\n`
+    const userLabel = USER_LABEL_DEFAULT
+    const assistantLabel = ASSISTANT_LABEL_DEFAULT

    const maxNumTokens = this._maxModelTokens - this._maxResponseTokens
-    let { parentMessageId } = opts
-    let nextPromptBody = `${this._userLabel}:\n\n${message}${this._endToken}`
-    let promptBody = ''
-    let prompt: string
-    let numTokens: number
+    let messages: types.openai.ChatCompletionRequestMessage[] = []
+
+    if (systemMessage) {
+      messages.push({
+        role: 'system',
+        content: systemMessage
+      })
+    }
+
+    const systemMessageOffset = messages.length
+    let nextMessages = messages.concat([
+      {
+        ...{
+          role: 'user',
+          content: text,
+          name: opts.name
+        }
+      }
+    ])
+    let numTokens = 0

    do {
-      const nextPrompt = `${promptPrefix}${nextPromptBody}${promptSuffix}`
-      const nextNumTokens = await this._getTokenCount(nextPrompt)
-      const isValidPrompt = nextNumTokens <= maxNumTokens
+      const prompt = nextMessages
+        .reduce((prompt, message) => {
+          switch (message.role) {
+            case 'system':
+              return [prompt, `Instructions:\n${message.content}`]
+            case 'user':
+              return [prompt, `${userLabel}:\n${message.content}`]
+            default:
+              return [prompt, `${assistantLabel}:\n${message.content}`]
+          }
+        }, [])
+        .join('\n\n')
+
+      const nextNumTokensEstimate = await this._getTokenCount(prompt)
+      const isValidPrompt = nextNumTokensEstimate <= maxNumTokens

      if (prompt && !isValidPrompt) {
        break
      }

-      promptBody = nextPromptBody
-      prompt = nextPrompt
-      numTokens = nextNumTokens
+      messages = nextMessages
+      numTokens = nextNumTokensEstimate

      if (!isValidPrompt) {
        break
@ -411,12 +412,18 @@ Current date: ${currentDate}${this._sepToken}\n\n`
      }

      const parentMessageRole = parentMessage.role || 'user'
-      const parentMessageRoleDesc =
-        parentMessageRole === 'user' ? this._userLabel : this._assistantLabel

-      // TODO: differentiate between assistant and user messages
-      const parentMessageString = `${parentMessageRoleDesc}:\n\n${parentMessage.text}${this._endToken}\n\n`
-      nextPromptBody = `${parentMessageString}${promptBody}`
+      nextMessages = nextMessages.slice(0, systemMessageOffset).concat([
+        {
+          ...{
+            role: parentMessageRole,
+            content: parentMessage.text,
+            name: parentMessage.name
+          }
+        },
+        ...nextMessages.slice(systemMessageOffset)
+      ])
+
      parentMessageId = parentMessage.parentMessageId
    } while (true)

@ -427,46 +434,26 @@ Current date: ${currentDate}${this._sepToken}\n\n`
      Math.min(this._maxModelTokens - numTokens, this._maxResponseTokens)
    )

-    return { prompt, maxTokens }
+    return { messages, maxTokens, numTokens }
  }

  protected async _getTokenCount(text: string) {
-    if (this._isChatGPTModel) {
-      // With this model, "<|im_end|>" is 1 token, but tokenizers aren't aware of it yet.
-      // Replace it with "<|endoftext|>" (which it does know about) so that the tokenizer can count it as 1 token.
-      // text = text.replace(/<\|im_end\|>/g, '<|endoftext|>')
-      // text = text.replace(/<\|im_sep\|>/g, '<|endoftext|>')
-    }
-
-    // TODO: this seems hacky and should be fixed in the tokenizer
+    // TODO: use a better fix in the tokenizer
    text = text.replace(/<\|endoftext\|>/g, '')

    return tokenizer.encode(text).length
  }

-  protected get _isChatGPTModel() {
-    return (
-      this._completionParams.model.startsWith('text-chat') ||
-      this._completionParams.model.startsWith('text-davinci-002-render')
-    )
-  }
-
  protected async _defaultGetMessageById(
    id: string
  ): Promise<types.ChatMessage> {
    const res = await this._messageStore.get(id)
-    if (this._debug) {
-      console.log('getMessageById', id, res)
-    }
    return res
  }

  protected async _defaultUpsertMessage(
    message: types.ChatMessage
  ): Promise<void> {
-    if (this._debug) {
-      console.log('upsertMessage', message.id, message)
-    }
    await this._messageStore.set(message.id, message)
  }
 }
--- a/src/types.ts
+++ b/src/types.ts
@ -1,14 +1,18 @@
-export type Role = 'user' | 'assistant'
+import type * as openai from 'openai'
+
+export { openai }
+
+export type Role = 'user' | 'assistant' | 'system'

 export type FetchFn = typeof fetch

 export type SendMessageOptions = {
-  conversationId?: string
+  /** The name of a user in a multi-user chat. */
+  name?: string
  parentMessageId?: string
  messageId?: string
  stream?: boolean
-  promptPrefix?: string
-  promptSuffix?: string
+  systemMessage?: string
  timeoutMs?: number
  onProgress?: (partialResponse: ChatMessage) => void
  abortSignal?: AbortSignal
@ -30,26 +34,21 @@ export interface ChatMessage {
  id: string
  text: string
  role: Role
-  parentMessageId?: string
-  conversationId?: string
+  name?: string
+  delta?: string
  detail?: any
-}

-export type ChatGPTErrorType =
-  | 'unknown'
-  | 'chatgpt:pool:account-on-cooldown'
-  | 'chatgpt:pool:account-not-found'
-  | 'chatgpt:pool:no-accounts'
-  | 'chatgpt:pool:timeout'
-  | 'chatgpt:pool:rate-limit'
-  | 'chatgpt:pool:unavailable'
+  // relevant for both ChatGPTAPI and ChatGPTUnofficialProxyAPI
+  parentMessageId?: string
+  // only relevant for ChatGPTUnofficialProxyAPI
+  conversationId?: string
+}

 export class ChatGPTError extends Error {
  statusCode?: number
  statusText?: string
  isFinal?: boolean
  accountId?: string
-  type?: ChatGPTErrorType
 }

 /** Returns a chat message from a store by it's ID (or null if not found). */
@ -58,121 +57,6 @@ export type GetMessageByIdFunction = (id: string) => Promise<ChatMessage>
 /** Upserts a chat message to a store. */
 export type UpsertMessageFunction = (message: ChatMessage) => Promise<void>

-export namespace openai {
-  export type CompletionParams = {
-    /** ID of the model to use. */
-    model: string
-
-    /** The string prompt to generate a completion for. */
-    prompt: string
-
-    /**
-     * The suffix that comes after a completion of inserted text.
-     */
-    suffix?: string
-
-    /**
-     * The maximum number of tokens to generate in the completion.  The token count of your prompt plus `max_tokens` cannot exceed the model\'s context length. Most models have a context length of 2048 tokens (except for the newest models, which support 4096).
-     */
-    max_tokens?: number
-
-    /**
-     * What [sampling temperature](https://towardsdatascience.com/how-to-sample-from-language-models-682bceb97277) to use. Higher values means the model will take more risks. Try 0.9 for more creative applications, and 0 (argmax sampling) for ones with a well-defined answer.  We generally recommend altering this or `top_p` but not both.
-     */
-    temperature?: number
-
-    /**
-     * An alternative to sampling with temperature, called nucleus sampling, where the model considers the results of the tokens with top_p probability mass. So 0.1 means only the tokens comprising the top 10% probability mass are considered.  We generally recommend altering this or `temperature` but not both.
-     */
-    top_p?: number
-
-    /**
-     * Include the log probabilities on the `logprobs` most likely tokens, as well the chosen tokens. For example, if `logprobs` is 5, the API will return a list of the 5 most likely tokens. The API will always return the `logprob` of the sampled token, so there may be up to `logprobs+1` elements in the response.  The maximum value for `logprobs` is 5. If you need more than this, please contact us through our [Help center](https://help.openai.com) and describe your use case.
-     */
-    logprobs?: number
-
-    /**
-     * Echo back the prompt in addition to the completion
-     */
-    echo?: boolean
-
-    /**
-     * Up to 4 sequences where the API will stop generating further tokens. The returned text will not contain the stop sequence.
-     */
-    stop?: string[]
-
-    /**
-     * Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they appear in the text so far, increasing the model\'s likelihood to talk about new topics.  [See more information about frequency and presence penalties.](/docs/api-reference/parameter-details)
-     */
-    presence_penalty?: number
-
-    /**
-     * Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing frequency in the text so far, decreasing the model\'s likelihood to repeat the same line verbatim.  [See more information about frequency and presence penalties.](/docs/api-reference/parameter-details)
-     */
-    frequency_penalty?: number
-
-    /**
-     * Generates `best_of` completions server-side and returns the \"best\" (the one with the highest log probability per token). Results cannot be streamed.  When used with `n`, `best_of` controls the number of candidate completions and `n` specifies how many to return – `best_of` must be greater than `n`.  **Note:** Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`.
-     */
-    best_of?: number
-
-    /**
-     * Modify the likelihood of specified tokens appearing in the completion.  Accepts a json object that maps tokens (specified by their token ID in the GPT tokenizer) to an associated bias value from -100 to 100. You can use this [tokenizer tool](/tokenizer?view=bpe) (which works for both GPT-2 and GPT-3) to convert text to token IDs. Mathematically, the bias is added to the logits generated by the model prior to sampling. The exact effect will vary per model, but values between -1 and 1 should decrease or increase likelihood of selection; values like -100 or 100 should result in a ban or exclusive selection of the relevant token.  As an example, you can pass `{\"50256\": -100}` to prevent the <|endoftext|> token from being generated.
-     */
-    logit_bias?: Record<string, number>
-
-    /**
-     * A unique identifier representing your end-user, which will help OpenAI to monitor and detect abuse. [Learn more](/docs/usage-policies/end-user-ids).
-     */
-    user?: string
-
-    /* NOTE: this is handled by the `sendMessage` function.
-     *
-     * Whether to stream back partial progress. If set, tokens will be sent as data-only [server-sent events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) as they become available, with the stream terminated by a `data: [DONE]` message.
-     */
-    // stream?: boolean | null
-
-    /**
-     * NOT SUPPORTED
-     */
-    /**
-     * How many completions to generate for each prompt.  **Note:** Because this parameter generates many completions, it can quickly consume your token quota. Use carefully and ensure that you have reasonable settings for `max_tokens` and `stop`.
-     */
-    // 'n'?: number | null;
-  }
-
-  export type ReverseProxyCompletionParams = CompletionParams & {
-    paid?: boolean
-  }
-
-  export type CompletionResponse = {
-    id: string
-    object: string
-    created: number
-    model: string
-    choices: CompletionResponseChoices
-    usage?: CompletionResponseUsage
-  }
-
-  export type CompletionResponseChoices = {
-    text?: string
-    index?: number
-    logprobs?: {
-      tokens?: Array<string>
-      token_logprobs?: Array<number>
-      top_logprobs?: Array<object>
-      text_offset?: Array<number>
-    } | null
-    finish_reason?: string
-  }[]
-
-  export type CompletionResponseUsage = {
-    prompt_tokens: number
-    completion_tokens: number
-    total_tokens: number
-  }
-}
-
 /**
 * https://chat.openapi.com/backend-api/conversation
 */
@ -260,12 +144,19 @@ export type MessageContent = {

 export type MessageMetadata = any

-export type GetAccessTokenFn = ({
-  email,
-  password,
-  sessionToken
-}: {
-  email: string
-  password: string
-  sessionToken?: string
-}) => string | Promise<string>
+export interface CreateChatCompletionDeltaResponse {
+  id: string
+  object: 'chat.completion.chunk'
+  created: number
+  model: string
+  choices: [
+    {
+      delta: {
+        role: Role
+        content?: string
+      }
+      index: number
+      finish_reason: string | null
+    }
+  ]
+}