maxtoken

Jerald Lim
Showing 2 changed files with 9 additions and 6 deletions
.env-template
index.js
--- a/.env-template
View file @c8b8184
+++ b/.env-template
View file @c8b8184
@@ -13,4 +13,6 @@ OPENSOURCE_MODELS="openchat_3.5,zephyr-7B-beta"
 OPENSOURCE_ENDPOINTS={"openchat_3.5": "http://192.168.22.180:5000/v1", "zephyr-7B-beta": "http://192.168.22.180:5000/v1"}
 OPENSOURCE_API_KEY={"openchat_3.5": "Q8QZG0HhS62piZSmvSR7CP3E53fY8J99Ad2wmJLdiW05cpjnnBAH5xlQlClYdqeh", "zephyr-7B-beta": "Q8QZG0HhS62piZSmvSR7CP3E53fY8J99Ad2wmJLdiW05cpjnnBAH5xlQlClYdqeh"}

-REACT_APP_START_SERVER_URL= https://dev.start.ai-pro.org                                    
\ No newline at end of file
+REACT_APP_START_SERVER_URL= https://dev.start.ai-pro.org                                    
+
+MAX_TOKENS_chatbot_plus=100
\ No newline at end of file
--- a/index.js
View file @c8b8184
+++ b/index.js
View file @c8b8184
@@ -69,6 +69,7 @@ app.use(cors())
 app.use(require('morgan')('dev'))
 app.use(rateLimiter)

+const max_tokens = process.env.MAX_TOKENS_chatbot_plus ? parseInt(process.env.MAX_TOKENS_chatbot_plus) : 512;
 // Routing

 // Primary Open AI Route
@@ -119,7 +120,7 @@ app.post('/api', async (req, res) => {
    const response = await openai.createCompletion({
      model: `${currentModel}`,// "text-davinci-003",
      prompt: query_prompt,
-      max_tokens: 3000,
+      max_tokens: max_tokens,
      temperature,
    });
    let input = response.data.choices[0].text;
@@ -152,7 +153,7 @@ app.post('/api', async (req, res) => {
        message: "The output for your prompt is too long for us to process. Please reduce your prompt and try again.",
      })
    } else {
-      console.log(e.response);
+      // console.log(e.response);
    }
  } finally {
    // console.log('We do cleanup here');
@@ -181,7 +182,7 @@ async function runGPTTurbo(req, res) {
    const response = await openai.createChatCompletion({
      model: `${currentModel}`,
      messages: JSON.parse(message),
-      max_tokens: 3000,
+      max_tokens: max_tokens,
      temperature
    });
    input = response.data.choices[0].message.content
@@ -190,7 +191,7 @@ async function runGPTTurbo(req, res) {
    if (error_msg.indexOf('maximum context length') >= 0) {
      input = "The output for your prompt is too long for us to process. Please reduce your prompt and try again.";
    } else {
-      console.log(e.response);
+      // console.log(e.response);
    }
  } finally {

@@ -266,7 +267,7 @@ async function runOpensource(req, res) {
    if (error_msg.indexOf('maximum context length') >= 0) {
      input = "The output for your prompt is too long for us to process. Please reduce your prompt and try again.";
    } else {
-      console.log(e.response);
+      // console.log(e.response);
    }
  } finally {