c8b81840 by Jerald Lim

maxtoken

1 parent 417e15e7
...@@ -13,4 +13,6 @@ OPENSOURCE_MODELS="openchat_3.5,zephyr-7B-beta" ...@@ -13,4 +13,6 @@ OPENSOURCE_MODELS="openchat_3.5,zephyr-7B-beta"
13 OPENSOURCE_ENDPOINTS={"openchat_3.5": "http://192.168.22.180:5000/v1", "zephyr-7B-beta": "http://192.168.22.180:5000/v1"} 13 OPENSOURCE_ENDPOINTS={"openchat_3.5": "http://192.168.22.180:5000/v1", "zephyr-7B-beta": "http://192.168.22.180:5000/v1"}
14 OPENSOURCE_API_KEY={"openchat_3.5": "Q8QZG0HhS62piZSmvSR7CP3E53fY8J99Ad2wmJLdiW05cpjnnBAH5xlQlClYdqeh", "zephyr-7B-beta": "Q8QZG0HhS62piZSmvSR7CP3E53fY8J99Ad2wmJLdiW05cpjnnBAH5xlQlClYdqeh"} 14 OPENSOURCE_API_KEY={"openchat_3.5": "Q8QZG0HhS62piZSmvSR7CP3E53fY8J99Ad2wmJLdiW05cpjnnBAH5xlQlClYdqeh", "zephyr-7B-beta": "Q8QZG0HhS62piZSmvSR7CP3E53fY8J99Ad2wmJLdiW05cpjnnBAH5xlQlClYdqeh"}
15 15
16 REACT_APP_START_SERVER_URL= https://dev.start.ai-pro.org
...\ No newline at end of file ...\ No newline at end of file
16 REACT_APP_START_SERVER_URL= https://dev.start.ai-pro.org
17
18 MAX_TOKENS_chatbot_plus=100
...\ No newline at end of file ...\ No newline at end of file
......
...@@ -69,6 +69,7 @@ app.use(cors()) ...@@ -69,6 +69,7 @@ app.use(cors())
69 app.use(require('morgan')('dev')) 69 app.use(require('morgan')('dev'))
70 app.use(rateLimiter) 70 app.use(rateLimiter)
71 71
72 const max_tokens = process.env.MAX_TOKENS_chatbot_plus ? parseInt(process.env.MAX_TOKENS_chatbot_plus) : 512;
72 // Routing 73 // Routing
73 74
74 // Primary Open AI Route 75 // Primary Open AI Route
...@@ -119,7 +120,7 @@ app.post('/api', async (req, res) => { ...@@ -119,7 +120,7 @@ app.post('/api', async (req, res) => {
119 const response = await openai.createCompletion({ 120 const response = await openai.createCompletion({
120 model: `${currentModel}`,// "text-davinci-003", 121 model: `${currentModel}`,// "text-davinci-003",
121 prompt: query_prompt, 122 prompt: query_prompt,
122 max_tokens: 3000, 123 max_tokens: max_tokens,
123 temperature, 124 temperature,
124 }); 125 });
125 let input = response.data.choices[0].text; 126 let input = response.data.choices[0].text;
...@@ -152,7 +153,7 @@ app.post('/api', async (req, res) => { ...@@ -152,7 +153,7 @@ app.post('/api', async (req, res) => {
152 message: "The output for your prompt is too long for us to process. Please reduce your prompt and try again.", 153 message: "The output for your prompt is too long for us to process. Please reduce your prompt and try again.",
153 }) 154 })
154 } else { 155 } else {
155 console.log(e.response); 156 // console.log(e.response);
156 } 157 }
157 } finally { 158 } finally {
158 // console.log('We do cleanup here'); 159 // console.log('We do cleanup here');
...@@ -181,7 +182,7 @@ async function runGPTTurbo(req, res) { ...@@ -181,7 +182,7 @@ async function runGPTTurbo(req, res) {
181 const response = await openai.createChatCompletion({ 182 const response = await openai.createChatCompletion({
182 model: `${currentModel}`, 183 model: `${currentModel}`,
183 messages: JSON.parse(message), 184 messages: JSON.parse(message),
184 max_tokens: 3000, 185 max_tokens: max_tokens,
185 temperature 186 temperature
186 }); 187 });
187 input = response.data.choices[0].message.content 188 input = response.data.choices[0].message.content
...@@ -190,7 +191,7 @@ async function runGPTTurbo(req, res) { ...@@ -190,7 +191,7 @@ async function runGPTTurbo(req, res) {
190 if (error_msg.indexOf('maximum context length') >= 0) { 191 if (error_msg.indexOf('maximum context length') >= 0) {
191 input = "The output for your prompt is too long for us to process. Please reduce your prompt and try again."; 192 input = "The output for your prompt is too long for us to process. Please reduce your prompt and try again.";
192 } else { 193 } else {
193 console.log(e.response); 194 // console.log(e.response);
194 } 195 }
195 } finally { 196 } finally {
196 197
...@@ -266,7 +267,7 @@ async function runOpensource(req, res) { ...@@ -266,7 +267,7 @@ async function runOpensource(req, res) {
266 if (error_msg.indexOf('maximum context length') >= 0) { 267 if (error_msg.indexOf('maximum context length') >= 0) {
267 input = "The output for your prompt is too long for us to process. Please reduce your prompt and try again."; 268 input = "The output for your prompt is too long for us to process. Please reduce your prompt and try again.";
268 } else { 269 } else {
269 console.log(e.response); 270 // console.log(e.response);
270 } 271 }
271 } finally { 272 } finally {
272 273
......
Styling with Markdown is supported
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!