Manage llm context size

LLM context size

Message number in chat can be big. It become costly, LLM forgetting what was the goal.
So lets make a tail processor that cuts the context size to the last few messages.

system:
@{ gpt-4.1 | tail 2 }
user: 
first message
assistant:
second message
user:
third message

tail.proc.js

module.exports = async function tail(node, args) {
  const n = parseInt(args, 10);
  const count = (isNaN(n) || n <= 0) ? 20 : n;
  return {
    ...node,
    exec: async (payload, ctx) => {
      const startIndex = Math.max(0, payload.messages.length - count)
      const messages = payload.messages.filter(
        (message, index) => (message.role === "system") || (index >= startIndex))
    
      return node.exec({
        ...payload,
        messages
      }, ctx)
    }
  }
};

Here the LLM will see only last 2 messages from the chat:

{
  "model": "gpt-4.1",
  "messages": [
    {
      "role": "system",
      "content": ""
    },
    {
      "role": "assistant",
      "content": "second message"
    },
    {
      "role": "user",
      "content": "third message"
    }
  ],
  "stream": true
}