c8b81840 by Jerald Lim

maxtoken

1 parent 417e15e7
......@@ -13,4 +13,6 @@ OPENSOURCE_MODELS="openchat_3.5,zephyr-7B-beta"
OPENSOURCE_ENDPOINTS={"openchat_3.5": "http://192.168.22.180:5000/v1", "zephyr-7B-beta": "http://192.168.22.180:5000/v1"}
OPENSOURCE_API_KEY={"openchat_3.5": "Q8QZG0HhS62piZSmvSR7CP3E53fY8J99Ad2wmJLdiW05cpjnnBAH5xlQlClYdqeh", "zephyr-7B-beta": "Q8QZG0HhS62piZSmvSR7CP3E53fY8J99Ad2wmJLdiW05cpjnnBAH5xlQlClYdqeh"}
REACT_APP_START_SERVER_URL= https://dev.start.ai-pro.org
\ No newline at end of file
REACT_APP_START_SERVER_URL= https://dev.start.ai-pro.org
MAX_TOKENS_chatbot_plus=100
\ No newline at end of file
......
......@@ -69,6 +69,7 @@ app.use(cors())
app.use(require('morgan')('dev'))
app.use(rateLimiter)
const max_tokens = process.env.MAX_TOKENS_chatbot_plus ? parseInt(process.env.MAX_TOKENS_chatbot_plus) : 512;
// Routing
// Primary Open AI Route
......@@ -119,7 +120,7 @@ app.post('/api', async (req, res) => {
const response = await openai.createCompletion({
model: `${currentModel}`,// "text-davinci-003",
prompt: query_prompt,
max_tokens: 3000,
max_tokens: max_tokens,
temperature,
});
let input = response.data.choices[0].text;
......@@ -152,7 +153,7 @@ app.post('/api', async (req, res) => {
message: "The output for your prompt is too long for us to process. Please reduce your prompt and try again.",
})
} else {
console.log(e.response);
// console.log(e.response);
}
} finally {
// console.log('We do cleanup here');
......@@ -181,7 +182,7 @@ async function runGPTTurbo(req, res) {
const response = await openai.createChatCompletion({
model: `${currentModel}`,
messages: JSON.parse(message),
max_tokens: 3000,
max_tokens: max_tokens,
temperature
});
input = response.data.choices[0].message.content
......@@ -190,7 +191,7 @@ async function runGPTTurbo(req, res) {
if (error_msg.indexOf('maximum context length') >= 0) {
input = "The output for your prompt is too long for us to process. Please reduce your prompt and try again.";
} else {
console.log(e.response);
// console.log(e.response);
}
} finally {
......@@ -266,7 +267,7 @@ async function runOpensource(req, res) {
if (error_msg.indexOf('maximum context length') >= 0) {
input = "The output for your prompt is too long for us to process. Please reduce your prompt and try again.";
} else {
console.log(e.response);
// console.log(e.response);
}
} finally {
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!