LangChain 入门4 RAG链式输出

企业开发 2024-11-02 23:16:07 阅读次数: 0

背景：

通常在问答应用程序中，向用户显示用于生成答案的来源是很重要的。最简单的方法是让链返回在每一代中检索到的文档。

代码实现：

依赖加载

import bs4
from langchain import hub
from langchain_chroma import Chroma
from langchain_community.document_loaders import WebBaseLoader
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnableParallel, RunnablePassthrough
#from langchain_openai import ChatOpenAI, OpenAIEmbeddings
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.llms import Ollama
from langchain_community.embeddings import OllamaEmbeddings

模型与embeding 实例化

llm = Ollama(model='llama2')
embeddings = OllamaEmbeddings()

加载、组块和索引博客内容

# 加载、组块和索引博客内容。
#数据下载
bs_strainer = bs4.SoupStrainer(class_=("post-content", "post-title", "post-header"))
loader = WebBaseLoader(
    web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
    bs_kwargs={
    
    "parse_only": bs_strainer},
)
docs = loader.load()

实例化数据拆分

#实例化数据拆分
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
#数据拆分
splits = text_splitter.split_documents(docs)
#灌入向量数据库chroma
vectorstore = Chroma.from_documents(documents=splits, embedding=embeddings)

构建向量搜索加载提示词

#构建向量搜索
retriever = vectorstore.as_retriever()
#加载提示词
prompt = hub.pull("rlm/rag-prompt")
#llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

#内容格式化
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

构建LCEL

#构建LCEL
rag_chain_from_docs = (
    RunnablePassthrough.assign(context=(lambda x: format_docs(x["context"]))) #RunnablePassthrough 固定输入格式只抽取 x["context"]
    | prompt
    | llm
    | StrOutputParser()
)

RunnableParallel 使用案例

RunnableParallel 固定输入格，使用案例需要单独执行。

from langchain_core.runnables import (
    RunnableLambda,
    RunnableParallel,
    RunnablePassthrough,
)

runnable = RunnableParallel(
    origin=RunnablePassthrough(),
    modified=lambda x: x+1
)

runnable.invoke(1) # {'origin': 1, 'modified': 2}

用于处理函数的相关输出

rag_chain_with_source = RunnableParallel(
    {
    
    "context": retriever, "question": RunnablePassthrough()}
).assign(answer=rag_chain_from_docs)#assign 将新字段分配给此可运行程序的dict输出。返回一个新的可运行文件。

RunnableParallel使用案例

RunnableParallel是LCEL的两个主要组成基元之一，与RunnableSequence并列。它同时调用Runnables，为每个Runnables提供相同的输入。 RunnableParallel可以直接实例化，也可以在序列中使用dict文字来实例化。

from langchain_core.runnables import RunnableLambda

def add_one(x: int) -> int:
    return x + 1

def mul_two(x: int) -> int:
    return x * 2

def mul_three(x: int) -> int:
    return x * 3

runnable_1 = RunnableLambda(add_one)
runnable_2 = RunnableLambda(mul_two)
runnable_3 = RunnableLambda(mul_three)

sequence = runnable_1 | {
    
      # this dict is coerced to a RunnableParallel
    "mul_two": runnable_2,
    "mul_three": runnable_3,
}

sequence.invoke(1)
await sequence.ainvoke(1)

sequence.batch([1, 2, 3])
await sequence.abatch([1, 2, 3])

流式处理最终输出

使用LCEL，进行流式传输最终输出：

for chunk in rag_chain_with_source.stream("What is Task Decomposition"):
    print(chunk)

返回
在这里插入图片描述
上面只是返回的部分内容

编译返回的流

output = {
    
    }
curr_key = None
for chunk in rag_chain_with_source.stream("What is Task Decomposition"):
    for key in chunk:
        if key not in output:
            output[key] = chunk[key]
        else:
            output[key] += chunk[key]
        if key != curr_key:
            print(f"\n\n{
      
      key}: {
      
      chunk[key]}", end="", flush=True)
        else:
            print(chunk[key], end="", flush=True)
        curr_key = key
print(output)

输出
在这里插入图片描述

流式处理中间步骤的输出

from operator import itemgetter

from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
from langchain_core.tracers.log_stream import LogEntry, LogStreamCallbackHandler

这里构建了两个提示词的模版

#给到提示词模版
contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""

#构建模版
contextualize_q_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", contextualize_q_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),#这详见LangChain 入门3
        ("human", "{question}"),
    ]
)
contextualize_q_chain = (contextualize_q_prompt | llm | StrOutputParser()).with_config(
    tags=["contextualize_q_chain"]
)

qa_system_prompt = """You are an assistant for question-answering tasks. \
Use the following pieces of retrieved context to answer the question. \
If you don't know the answer, just say that you don't know. \
Use three sentences maximum and keep the answer concise.\

{context}"""

qa_prompt = ChatPromptTemplate.from_messages(
    [
        ("system", qa_system_prompt),
        MessagesPlaceholder(variable_name="chat_history"),
        ("human", "{question}"),
    ]
)

def contextualized_question(input: dict):
    if input.get("chat_history"):
        return contextualize_q_chain
    else:
        return input["question"]

#构建LCEL
rag_chain = (
    RunnablePassthrough.assign(context=contextualize_q_chain | retriever | format_docs)
    | qa_prompt
    | llm
)

要流式传输中间步骤，使用astream_log方法。这是一个异步方法，产生JSONPatch操作，当以与接收到的构建RunState相同的顺序应用：

from typing import Any, Dict, List, Optional, TypedDict

class RunState(TypedDict):
    id: str
    """ID of the run."""
    streamed_output: List[Any]
    """List of output chunks streamed by Runnable.stream()"""
    final_output: Optional[Any]
    """Final output of the run, usually the result of aggregating (`+`) streamed_output.
    Only available after the run has finished successfully."""

    logs: Dict[str, LogEntry]
    """Map of run names to sub-runs. If filters were supplied, this list will
    contain only the runs that matched the filters."""

可以流式传输所有步骤（默认），也可以按名称、标记或元数据包括/排除步骤。在这种情况下，将只流式传输作为contextualize_q_chain和最终输出一部分的中间步骤。

#在Jupyter notebook 中使用
import nest_asyncio
nest_asyncio.apply()

from langchain_core.messages import HumanMessage
#空的历史对话
chat_history = []
#提问
question = "What is Task Decomposition?"
#LCEL架子提问和历史信息
ai_msg = rag_chain.invoke({
    
    "question": question, "chat_history": chat_history})
#提问后返回添加到历史信息中
chat_history.extend([HumanMessage(content=question), ai_msg])
#第二次提问
second_question = "What are common ways of doing it?"

ct = 0
#选择  contextualize_q_chain 的输出
async for jsonpatch_op in rag_chain.astream_log(
    {
    
    "question": second_question, "chat_history": chat_history},
    include_tags=["contextualize_q_chain"],
):
    print(jsonpatch_op)
    print("\n" + "-" * 30 + "\n")
    ct += 1
    if ct > 20:
        break

部分样例
在这里插入图片描述
如果我们想获得检索到的文档，可以根据名称“Retriever”进行筛选：

ct = 0
async for jsonpatch_op in rag_chain.astream_log(
    {
    
    "question": second_question, "chat_history": chat_history},
    include_names=["Retriever"],
    with_streamed_output_list=False,
):
    print(jsonpatch_op)
    print("\n" + "-" * 30 + "\n")
    ct += 1
    if ct > 20:
        break