LLM context size
Message number in chat can be big. It become costly, LLM forgetting what was the goal.
So lets make a tail
processor that cuts the context size to the last few messages.
system:
@{ gpt-4.1 | tail 2 }
user:
first message
assistant:
second message
user:
third message
tail.proc.js
module.exports = async function tail(node, args) {
const n = parseInt(args, 10);
const count = (isNaN(n) || n <= 0) ? 20 : n;
return {
...node,
exec: async (payload, ctx) => {
const startIndex = Math.max(0, payload.messages.length - count)
const messages = payload.messages.filter(
(message, index) => (message.role === "system") || (index >= startIndex))
return node.exec({
...payload,
messages
}, ctx)
}
}
};
Here the LLM will see only last 2 messages from the chat:
{
"model": "gpt-4.1",
"messages": [
{
"role": "system",
"content": ""
},
{
"role": "assistant",
"content": "second message"
},
{
"role": "user",
"content": "third message"
}
],
"stream": true
}