index.js 9.58 KB
const { Configuration, OpenAIApi } = require("openai");
const express = require('express')
const bodyParser = require('body-parser')
const cors = require('cors')
require('dotenv').config()
const rateLimit = require('express-rate-limit')
const anchorme = require("anchorme").default;
const axios = require('axios');
const { encodingForModel } = require('js-tiktoken');
const tiktokenModels = [
  'text-davinci-003',
  'text-davinci-002',
  'text-davinci-001',
  'text-curie-001',
  'text-babbage-001',
  'text-ada-001',
  'davinci',
  'curie',
  'babbage',
  'ada',
  'code-davinci-002',
  'code-davinci-001',
  'code-cushman-002',
  'code-cushman-001',
  'davinci-codex',
  'cushman-codex',
  'text-davinci-edit-001',
  'code-davinci-edit-001',
  'text-embedding-ada-002',
  'text-similarity-davinci-001',
  'text-similarity-curie-001',
  'text-similarity-babbage-001',
  'text-similarity-ada-001',
  'text-search-davinci-doc-001',
  'text-search-curie-doc-001',
  'text-search-babbage-doc-001',
  'text-search-ada-doc-001',
  'code-search-babbage-code-001',
  'code-search-ada-code-001',
  'gpt2',
  'gpt-4',
  'gpt-4-0314',
  'gpt-4-32k',
  'gpt-4-32k-0314',
  'gpt-3.5-turbo',
  'gpt-3.5-turbo-0301'
];

// Open AI Configuration
// console.log(process.env.OPENAI_API_ORG)
const configuration = new Configuration({
  organization: process.env.OPENAI_API_ORG,
  apiKey: process.env.OPENAI_API_KEY,
});
const openai = new OpenAIApi(configuration);

const rateLimiter = rateLimit({
  windowMs: 1000 * 60 * 1, // 1 minute (refreshTime)
  max: 3000, // limit each IP to x requests per windowMs (refreshTime)
  message: 'Sorry, too many requests. Please try again in a bit!',
});

// Express Configuration
const app = express()
const port = 3080

app.use(bodyParser.json())
app.use(cors())
app.use(require('morgan')('dev'))
app.use(rateLimiter)

const max_tokens = process.env.MAX_TOKENS_chatbot_plus ? parseInt(process.env.MAX_TOKENS_chatbot_plus) : 512;
// Routing

// Primary Open AI Route
app.post('/api', async (req, res) => {
  const { message, currentModel, temperature } = req.body;

  if (currentModel == "gpt-3.5-turbo" || currentModel == "gpt-3.5-turbo-0301") {
    runGPTTurbo(req, res);
    return;
  }

  if (currentModel == "openchat_3.5" || currentModel == "zephyr-7B-beta") {
    runOpensource(req, res);
    return;
  }

  let greetingPrompt = 'Hello, how can I assist you?'
  const greetings = ['hi', 'hello', 'hey']
  if (greetings.some((greeting) => message.toLowerCase().includes(greeting))) {
    greetingPrompt = 'Hello, how can I help you today?'
  }
  let query_prompt = `${greetingPrompt}\n${message}`;
  str_length = req.body.message.split(' ').length;
  if (str_length >= 800) {
    arr_body = req.body.message.split("\n");
    if (arr_body.length >= 4) {
      var i = arr_body.length - 2
      while (i--) {
        arr_body.splice(i, 1);
      }
      query_prompt = arr_body.join("\n")
    }
  }
  const moderation = await axios.post("https://api.openai.com/v1/moderations", {
    input: query_prompt
  }, { headers: { 'content-type': 'application/json', 'Authorization': `Bearer ${process.env.OPENAI_API_KEY}` } });

  if (moderation.data.results[0].flagged) {
    res.json({
      success: false,
      message: "I'm sorry, but I can't assist with that. We want everyone to use our tool safely and responsibly.\nIf you have any other questions or need advice on a different topic, feel free to ask."
    });
    res.end();
    return;
  }

  try {
    const response = await openai.createCompletion({
      model: `${currentModel}`,// "text-davinci-003",
      prompt: query_prompt,
      max_tokens: max_tokens,
      temperature,
    });
    let input = response.data.choices[0].text;
    let usage = {};
    let enc = null;
    try {
      enc = encodingForModel(tiktokenModels.includes(currentModel) ? currentModel : 'gpt-3.5-turbo');
      usage.prompt_tokens = (enc.encode(query_prompt)).length;
      usage.completion_tokens = (enc.encode(input)).length;
      usage.total_tokens = usage.prompt_tokens + usage.completion_tokens;
    } catch (e) {
      console.log('Error encoding prompt text', e);
    }

    res.json({
      usage: usage,
      message: anchorme({
        input,
        options: {
          attributes: {
            target: "_blank"
          },
        }
      })
    })
  } catch (e) {
    let error_msg = e.response.data.error.message ? e.response.data.error.message : '';
    if (error_msg.indexOf('maximum context length') >= 0) {
      res.json({
        message: "The output for your prompt is too long for us to process. Please reduce your prompt and try again.",
      })
    } else {
      // console.log(e.response);
    }
  } finally {
    // console.log('We do cleanup here');
  }
});

async function runGPTTurbo(req, res) {
  // "gpt-3.5-turbo"
  const { message, currentModel, temperature } = req.body;
  var input = '';
  const message_history = JSON.parse(message);
  const query_prompt = message_history.length ? message_history[message_history.length - 1].content : "";
  const moderation = await axios.post("https://api.openai.com/v1/moderations", {
    input: query_prompt
  }, { headers: { 'content-type': 'application/json', 'Authorization': `Bearer ${process.env.OPENAI_API_KEY}` } });

  if (moderation.data.results[0].flagged) {
    res.json({
      success: false,
      message: "I'm sorry, but I can't assist with that. We want everyone to use our tool safely and responsibly.\nIf you have any other questions or need advice on a different topic, feel free to ask."
    });
    res.end();
    return;
  }
  try {
    const response = await openai.createChatCompletion({
      model: `${currentModel}`,
      messages: JSON.parse(message),
      max_tokens: max_tokens,
      temperature
    });
    input = response.data.choices[0].message.content
  } catch (e) {
    let error_msg = e.response.data.error.message ? e.response.data.error.message : '';
    if (error_msg.indexOf('maximum context length') >= 0) {
      input = "The output for your prompt is too long for us to process. Please reduce your prompt and try again.";
    } else {
      // console.log(e.response);
    }
  } finally {

    let usage = {};
    let enc = null;
    try {
      enc = encodingForModel(tiktokenModels.includes(currentModel) ? currentModel : 'gpt-3.5-turbo');
      usage.prompt_tokens = (enc.encode(query_prompt)).length;
      usage.completion_tokens = (enc.encode(input)).length;
      usage.total_tokens = usage.prompt_tokens + usage.completion_tokens;
    } catch (e) {
      console.log('Error encoding prompt text', e);
    }

    res.json({
      prompt: JSON.parse(message),
      usage: usage,
      message: anchorme({
        input,
        options: {
          attributes: {
            target: "_blank"
          },
        }
      })
    });
    return;
  }
}

const get_endpoint_api_url = (currentModel) => {
  const OPENSOURCE_ENDPOINTS = process.env.OPENSOURCE_ENDPOINTS;
  const endpoints = JSON.parse(OPENSOURCE_ENDPOINTS);
  const endpoint_api_url = endpoints?.[currentModel];
  return endpoint_api_url
}
const get_endpoint_api_key = (currentModel) => {
  const OPENSOURCE_API_KEY = process.env.OPENSOURCE_API_KEY;
  const api_keys = JSON.parse(OPENSOURCE_API_KEY);
  const key = api_keys?.[currentModel];
  return key
}
async function runOpensource(req, res) {
  const { message, currentModel, temperature } = req.body;
  var input = '';
  const message_history = JSON.parse(message);
  const query_prompt = message_history.length ? message_history[message_history.length - 1].content : "";

  try {
    let error_msg = "";
    const endpoint_api_url = get_endpoint_api_url(currentModel);
    const api_key = get_endpoint_api_key(currentModel);
    const response = await axios.post(endpoint_api_url + '/chat/completions', {
      messages: JSON.parse(message),
      temperature
    }, {
      headers: {
        'Content-Type': 'application/json',
        'Authorization': 'Bearer '+api_key
    },
    }) .catch(error => {
      error_msg = error.response.statusText ? error.response.statusText : '';
    });

    if (error_msg!==''){
      input = "Error: "+error_msg;
    }else{
      input = response.data.choices[0].message.content
    }
    
  } catch (e) {
    let error_msg = e.response.data.error.message ? e.response.data.error.message : '';
    if (error_msg.indexOf('maximum context length') >= 0) {
      input = "The output for your prompt is too long for us to process. Please reduce your prompt and try again.";
    } else {
      // console.log(e.response);
    }
  } finally {

    let usage = {};
    let enc = null;
    try {
      enc = encodingForModel('gpt-3.5-turbo');
      usage.prompt_tokens = (enc.encode(query_prompt)).length;
      usage.completion_tokens = (enc.encode(input)).length;
      usage.total_tokens = usage.prompt_tokens + usage.completion_tokens;
    } catch (e) {
      console.log('Error encoding prompt text', e);
    }

    res.json({
      prompt: JSON.parse(message),
      usage: usage,
      message: anchorme({
        input,
        options: {
          attributes: {
            target: "_blank"
          },
        }
      })
    });
    return;
  }
}



// Get Models Route
app.get('/models', async (req, res) => {
  const response = await openai.listEngines();
  const models = response.data;
  const opensource_models = process.env.OPENSOURCE_MODELS ? process.env.OPENSOURCE_MODELS.split(',') : [];

  models.data = models.data.filter((model) => {
    return model.id.indexOf('gpt-4') < 0
  })

  opensource_models.forEach((model) => {
    models.data.push({
      id: model,
      beta: true,
    });
  })

  res.json({
    models
  })
});

// Start the server
app.listen(port, () => {
  console.log(`Example app listening at http://localhost:${port}`)
});