index.js 5.9 KB
const { Configuration, OpenAIApi } = require("openai");
const express = require('express')
const bodyParser = require('body-parser')
const cors = require('cors')
require('dotenv').config()
const rateLimit = require('express-rate-limit')
const anchorme = require("anchorme").default;
const axios = require('axios');
const tiktoken = require('@dqbd/tiktoken');
const tiktokenModels = [
  'text-davinci-003',
  'text-davinci-002',
  'text-davinci-001',
  'text-curie-001',
  'text-babbage-001',
  'text-ada-001',
  'davinci',
  'curie',
  'babbage',
  'ada',
  'code-davinci-002',
  'code-davinci-001',
  'code-cushman-002',
  'code-cushman-001',
  'davinci-codex',
  'cushman-codex',
  'text-davinci-edit-001',
  'code-davinci-edit-001',
  'text-embedding-ada-002',
  'text-similarity-davinci-001',
  'text-similarity-curie-001',
  'text-similarity-babbage-001',
  'text-similarity-ada-001',
  'text-search-davinci-doc-001',
  'text-search-curie-doc-001',
  'text-search-babbage-doc-001',
  'text-search-ada-doc-001',
  'code-search-babbage-code-001',
  'code-search-ada-code-001',
  'gpt2',
  'gpt-4',
  'gpt-4-0314',
  'gpt-4-32k',
  'gpt-4-32k-0314',
  'gpt-3.5-turbo',
  'gpt-3.5-turbo-0301'
];
const encoding_for_model = tiktoken.encoding_for_model;

// Open AI Configuration
// console.log(process.env.OPENAI_API_ORG)
const configuration = new Configuration({
	organization: process.env.OPENAI_API_ORG,
    apiKey: process.env.OPENAI_API_KEY,
});
const openai = new OpenAIApi(configuration);

const rateLimiter = rateLimit({
  windowMs: 1000 * 60 * 1, // 1 minute (refreshTime)
  max: 3000, // limit each IP to x requests per windowMs (refreshTime)
  message: 'Sorry, too many requests. Please try again in a bit!',
});

// Express Configuration
const app = express()
const port = 3080

app.use(bodyParser.json())
app.use(cors())
app.use(require('morgan')('dev'))
app.use(rateLimiter)

// Routing

// Primary Open AI Route
app.post('/api', async (req, res) => {
	const { message, currentModel, temperature } = req.body;

	if(currentModel == "gpt-3.5-turbo" || currentModel == "gpt-3.5-turbo-0301") {
		runGPTTurbo(req,res);
		return;
	}

	let greetingPrompt = 'Hello, how can I assist you?'
	const greetings = ['hi', 'hello', 'hey']

	if (greetings.some((greeting) => message.toLowerCase().includes(greeting))) {
		greetingPrompt = 'Hello, how can I help you today?'
	}

	let query_prompt = `${greetingPrompt}\n${message}`;
	str_length = req.body.message.split(' ').length;
	if (str_length>=800){
		arr_body = req.body.message.split("\n");
		if (arr_body.length>=4){
			var i = arr_body.length-2
			while (i--) {
		    arr_body.splice(i, 1);
			}
			query_prompt = arr_body.join("\n")
		}
	}
	try {
		const response = await openai.createCompletion({
			model: `${currentModel}`,// "text-davinci-003",
			prompt: query_prompt,
			max_tokens: 3000,
			temperature,
		  });
		let input = response.data.choices[0].text;
		let usage = {};
		let enc = null;
		try {
			enc = encoding_for_model(tiktokenModels.includes(currentModel) ? currentModel : 'gpt-3.5-turbo');
			usage.prompt_tokens = (enc.encode(query_prompt)).length;
			usage.completion_tokens = (enc.encode(input)).length;
			usage.total_tokens = usage.prompt_tokens + usage.completion_tokens;
		} catch (e) {
			console.log('Error encoding prompt text', e);
		}

		// TOKEN USAGE
    axios.post(`${process.env.API_URL}e/set-chat-usage`,
      { app: 'chatbot', prompt_token: usage.prompt_tokens, total_token: usage.total_tokens },
      { headers: { 'content-type': 'application/x-www-form-urlencoded' }
    });

		res.json({
			message: anchorme({
				input,
				options: {
					attributes: {
						target: "_blank"
					},
				}
			})
		})
	} catch (e) {
  		let error_msg = e.response.data.error.message ?  e.response.data.error.message : '';
  		if (error_msg.indexOf('maximum context length')>=0){
				res.json({
					message: "The output for your prompt is too long for us to process. Please reduce your prompt and try again.",
				})
  		}else{
  			console.log(e.response);
  		}
  } finally {
      // console.log('We do cleanup here');
  }	

});

async function runGPTTurbo(req, res) {
	// "gpt-3.5-turbo"
	const { message, currentModel, temperature } = req.body;
	var input = '';
	const message_history = JSON.parse(message);
	const query_prompt = message_history.length ? message_history[message_history.length - 1].content : "";
	try {
		const response = await openai.createChatCompletion({
			model: `${currentModel}`,
			messages: JSON.parse(message),
			max_tokens: 3000,
			temperature
		});
		input = response.data.choices[0].message.content
	} catch (e) {
		let error_msg = e.response.data.error.message ?  e.response.data.error.message : '';
		if (error_msg.indexOf('maximum context length')>=0){
			input = "The output for your prompt is too long for us to process. Please reduce your prompt and try again.";
		}else{
			console.log(e.response);
		}
	} finally {

		let usage = {};
		let enc = null;
		try {
			enc = encoding_for_model(tiktokenModels.includes(currentModel) ? currentModel : 'gpt-3.5-turbo');
			usage.prompt_tokens = (enc.encode(query_prompt)).length;
			usage.completion_tokens = (enc.encode(input)).length;
			usage.total_tokens = usage.prompt_tokens + usage.completion_tokens;
		} catch (e) {
			console.log('Error encoding prompt text', e);
		}

		// TOKEN USAGE
    axios.post(`${process.env.API_URL}e/set-chat-usage`,
      { app: 'chatbot', prompt_token: usage.prompt_tokens, total_token: usage.total_tokens },
      { headers: { 'content-type': 'application/x-www-form-urlencoded' }
    });

		res.json({
			prompt: JSON.parse(message),
			message: anchorme({
				input,
				options: {
					attributes: {
						target: "_blank"
					},
				}
			})
		});
		return;
	}
}

// Get Models Route
app.get('/models', async (req, res) => {
	const response = await openai.listEngines();
	res.json({
		models: response.data
	})
});

// Start the server
app.listen(port, () => {
	  console.log(`Example app listening at http://localhost:${port}`)
});