Skip to main content

LangChain + MEMANTO

LangChain Add persistent, cross-session memory to your LangChain agents and chains using MEMANTO. LangChain built-in memory classes reset between runs. MEMANTO plugs in as a custom memory backend that stores and retrieves context semantically so your chains remember what matters, even days later.

How It Works

LangChain Chain / Agent -> MemAntoMemory -> MEMANTO Server -> Moorcheh.ai
You drop MemAntoMemory in wherever LangChain expects a BaseMemory. It handles session activation, storing new messages, and injecting recalled context into your prompts.

Prerequisites

Install

pip install memanto langchain langchain-openai httpx

Step 1: Start MEMANTO Server

memanto serve

Step 2: Create the Memory Class

Create memanto_memory.py:
import os
import httpx
from langchain.memory import BaseMemory

class MemAntoMemory(BaseMemory):
    """LangChain-compatible memory backend powered by MEMANTO."""

    agent_id: str = "langchain-agent"
    memanto_url: str = "http://localhost:8000"
    memory_key: str = "memory"
    session_token: str = ""

    def __init__(self, **kwargs):
        super().__init__(**kwargs)
        self._api_key = os.environ["MOORCHEH_API_KEY"]
        self._client = httpx.Client()
        self._activate()

    def _activate(self):
        response = self._client.post(
            f"{self.memanto_url}/api/v2/agents/{self.agent_id}/activate",
            headers={"Authorization": f"Bearer {self._api_key}"}
        )
        response.raise_for_status()
        self.session_token = response.json()["session_token"]

    @property
    def _headers(self) -> dict:
        return {
            "Authorization": f"Bearer {self._api_key}",
            "X-Session-Token": self.session_token
        }

    @property
    def memory_variables(self) -> list[str]:
        return [self.memory_key]

    def load_memory_variables(self, inputs: dict) -> dict:
        """Called before each LLM call - recalls relevant memories."""
        query = inputs.get("input", inputs.get("human_input", ""))
        if not query:
            return {self.memory_key: ""}

        response = self._client.get(
            f"{self.memanto_url}/api/v2/agents/{self.agent_id}/recall",
            params={"query": query, "limit": 5},
            headers=self._headers
        )
        response.raise_for_status()
        memories = response.json().get("memories", [])
        if not memories:
            return {self.memory_key: ""}

        context = "
".join(f"- {m['content']}" for m in memories)
        return {self.memory_key: f"Relevant memory:
{context}"}

    def save_context(self, inputs: dict, outputs: dict) -> None:
        """Called after each LLM call - stores the conversation turn."""
        human = inputs.get("input", inputs.get("human_input", ""))
        ai = outputs.get("output", outputs.get("response", ""))

        if human:
            self._client.post(
                f"{self.memanto_url}/api/v2/agents/{self.agent_id}/remember",
                params={"memory_type": "fact", "content": f"User said: {human}"},
                headers=self._headers
            )
        if ai:
            self._client.post(
                f"{self.memanto_url}/api/v2/agents/{self.agent_id}/remember",
                params={"memory_type": "fact", "content": f"Assistant replied: {ai}"},
                headers=self._headers
            )

    def clear(self) -> None:
        pass  # Memories persist in MEMANTO - clear via CLI if needed

Step 3: Use in a Chain

Create agent.py:
import os
from langchain_openai import ChatOpenAI
from langchain.chains import ConversationChain
from langchain.prompts import PromptTemplate
from memanto_memory import MemAntoMemory

llm = ChatOpenAI(model="gpt-4o-mini", temperature=0)
memory = MemAntoMemory(agent_id="my-assistant")

# Build a prompt that uses recalled memory as context
prompt = PromptTemplate(
    input_variables=["memory", "input"],
    template=(
        "You are a helpful assistant with long-term memory.

"
        "{memory}

"
        "Human: {input}
"
        "Assistant:"
    )
)

chain = ConversationChain(llm=llm, memory=memory, prompt=prompt, verbose=True)

# First run - Alice introduces herself
response = chain.invoke({"input": "My name is Alice and I prefer dark mode."})
print(response["output"])

# Second run - MEMANTO recalls that Alice prefers dark mode
response = chain.invoke({"input": "What UI settings should I use?"})
print(response["output"])

Step 4: Run

export MOORCHEH_API_KEY=mk_your_api_key
export OPENAI_API_KEY=sk_your_openai_key
python agent.py

Using with LCEL (LangChain Expression Language)

Inject recalled memory directly into an LCEL pipeline:
import os, httpx
from langchain_openai import ChatOpenAI
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnableLambda

API_KEY = os.environ["MOORCHEH_API_KEY"]
AGENT_ID = "lcel-agent"
BASE_URL = "http://localhost:8000/api/v2"

token = httpx.post(
    f"{BASE_URL}/agents/{AGENT_ID}/activate",
    headers={"Authorization": f"Bearer {API_KEY}"}
).json()["session_token"]

HEADERS = {"Authorization": f"Bearer {API_KEY}", "X-Session-Token": token}

def recall_context(inputs: dict) -> dict:
    resp = httpx.get(
        f"{BASE_URL}/agents/{AGENT_ID}/recall",
        params={"query": inputs["question"], "limit": 5},
        headers=HEADERS
    )
    memories = resp.json().get("memories", [])
    context = "
".join(f"- {m['content']}" for m in memories) or "No prior context."
    return {**inputs, "context": context}

prompt = ChatPromptTemplate.from_messages([
    ("system", "You are a helpful assistant.

Memory:
{context}"),
    ("human", "{question}")
])

chain = RunnableLambda(recall_context) | prompt | ChatOpenAI(model="gpt-4o-mini")

result = chain.invoke({"question": "What are my UI preferences?"})
print(result.content)

Using MEMANTO’s Built-in Answer (Optional)

For cases where you want a direct, grounded response from memory without routing through your chain, MEMANTO exposes an answer endpoint that uses its native RAG model. No external LLM call is made on your side. This is useful as a quick lookup tool — for example, answering a simple factual question about a user before deciding whether to invoke the full chain.
import os, httpx

API_KEY = os.environ["MOORCHEH_API_KEY"]
AGENT_ID = "my-assistant"
BASE_URL = "http://localhost:8000/api/v2"

# Reuse the session token from MeMantoMemory or activate a fresh one
token = httpx.post(
    f"{BASE_URL}/agents/{AGENT_ID}/activate",
    headers={"Authorization": f"Bearer {API_KEY}"}
).json()["session_token"]

HEADERS = {"Authorization": f"Bearer {API_KEY}", "X-Session-Token": token}

def memanto_answer(question: str) -> str:
    """Get a synthesized answer from stored memories using MEMANTO's native RAG."""
    response = httpx.post(
        f"{BASE_URL}/agents/{AGENT_ID}/answer",
        params={"question": question},
        headers=HEADERS
    )
    response.raise_for_status()
    return response.json().get("answer", "")

# Direct memory answer — no OpenAI call needed
answer = memanto_answer("What UI preferences does Alice have?")
print(answer)
# -> "Alice prefers dark mode and concise responses."
You can also use this inside an LCEL chain as a conditional step — call memanto_answer first, and only invoke the full LLM if the memory answer is empty:
from langchain_core.runnables import RunnableLambda

def answer_or_recall(inputs: dict) -> dict:
    quick = memanto_answer(inputs["question"])
    if quick:
        return {**inputs, "context": f"Memory answer: {quick}"}
    # Fall back to raw recall
    resp = httpx.get(
        f"{BASE_URL}/agents/{AGENT_ID}/recall",
        params={"query": inputs["question"], "limit": 5},
        headers=HEADERS
    )
    memories = resp.json().get("memories", [])
    context = "
".join(f"- {m['content']}" for m in memories) or "No prior context."
    return {**inputs, "context": context}
When to use answer vs recall
  • Use recall (via load_memory_variables) when your LLM should reason over the raw memories itself.
  • Use answer when you want a ready-made response from memory, or to short-circuit the chain for simple factual lookups.

Persistent Memory Across Sessions

Memories stored via save_context survive process restarts and are available in future sessions for the same agent_id:
# View stored memories
memanto recall "all context" --agent my-assistant

# Export to file
memanto memory export --agent my-assistant

Next Steps