StuffDocumentsChain Class — langchain Architecture

Architecture documentation for the StuffDocumentsChain class in stuff.py from the langchain codebase.

Class python

Entity Profile

Dependency Diagram

graph TD
  2d0480d1_fbf6_c0df_d443_92c8437d1fff["StuffDocumentsChain"]
  2f364d76_a69d_403d_0a63_04792fe626bb["BaseCombineDocumentsChain"]
  2d0480d1_fbf6_c0df_d443_92c8437d1fff -->|extends| 2f364d76_a69d_403d_0a63_04792fe626bb
  b665d6cc_6c91_59f8_2e5b_85b1b35bfe07["stuff.py"]
  2d0480d1_fbf6_c0df_d443_92c8437d1fff -->|defined in| b665d6cc_6c91_59f8_2e5b_85b1b35bfe07
  18de91de_6b8c_3fcf_904f_461c83f5c64d["get_default_document_variable_name()"]
  2d0480d1_fbf6_c0df_d443_92c8437d1fff -->|method| 18de91de_6b8c_3fcf_904f_461c83f5c64d
  8ae97478_71e8_9077_d2bb_45bdcb4af8e6["input_keys()"]
  2d0480d1_fbf6_c0df_d443_92c8437d1fff -->|method| 8ae97478_71e8_9077_d2bb_45bdcb4af8e6
  e7742348_cc60_c503_d46b_e83cb5c9de9a["_get_inputs()"]
  2d0480d1_fbf6_c0df_d443_92c8437d1fff -->|method| e7742348_cc60_c503_d46b_e83cb5c9de9a
  59bc2403_9bf0_8dbd_e707_e0536608da83["prompt_length()"]
  2d0480d1_fbf6_c0df_d443_92c8437d1fff -->|method| 59bc2403_9bf0_8dbd_e707_e0536608da83
  dae29ee3_4a9d_1ba1_f5e1_6b7cfa261600["combine_docs()"]
  2d0480d1_fbf6_c0df_d443_92c8437d1fff -->|method| dae29ee3_4a9d_1ba1_f5e1_6b7cfa261600
  ee1c7e93_6132_f120_3775_2b63f3575677["acombine_docs()"]
  2d0480d1_fbf6_c0df_d443_92c8437d1fff -->|method| ee1c7e93_6132_f120_3775_2b63f3575677
  2c625466_ec02_c89c_d60a_9708ccd2ec09["_chain_type()"]
  2d0480d1_fbf6_c0df_d443_92c8437d1fff -->|method| 2c625466_ec02_c89c_d60a_9708ccd2ec09

Relationship Graph

Source Code

libs/langchain/langchain_classic/chains/combine_documents/stuff.py lines 113–291

class StuffDocumentsChain(BaseCombineDocumentsChain):
    """Chain that combines documents by stuffing into context.

    This chain takes a list of documents and first combines them into a single string.
    It does this by formatting each document into a string with the `document_prompt`
    and then joining them together with `document_separator`. It then adds that new
    string to the inputs with the variable name set by `document_variable_name`.
    Those inputs are then passed to the `llm_chain`.

    Example:
        ```python
        from langchain_classic.chains import StuffDocumentsChain, LLMChain
        from langchain_core.prompts import PromptTemplate
        from langchain_openai import OpenAI

        # This controls how each document will be formatted. Specifically,
        # it will be passed to `format_document` - see that function for more
        # details.
        document_prompt = PromptTemplate(
            input_variables=["page_content"], template="{page_content}"
        )
        document_variable_name = "context"
        model = OpenAI()
        # The prompt here should take as an input variable the
        # `document_variable_name`
        prompt = PromptTemplate.from_template("Summarize this content: {context}")
        llm_chain = LLMChain(llm=model, prompt=prompt)
        chain = StuffDocumentsChain(
            llm_chain=llm_chain,
            document_prompt=document_prompt,
            document_variable_name=document_variable_name,
        )
        ```
    """

    llm_chain: LLMChain
    """LLM chain which is called with the formatted document string,
    along with any other inputs."""
    document_prompt: BasePromptTemplate = Field(
        default_factory=lambda: DEFAULT_DOCUMENT_PROMPT,
    )
    """Prompt to use to format each document, gets passed to `format_document`."""
    document_variable_name: str
    """The variable name in the llm_chain to put the documents in.
    If only one variable in the llm_chain, this need not be provided."""
    document_separator: str = "\n\n"
    """The string with which to join the formatted documents"""

    model_config = ConfigDict(
        arbitrary_types_allowed=True,
        extra="forbid",
    )

    @model_validator(mode="before")
    @classmethod
    def get_default_document_variable_name(cls, values: dict) -> Any:
        """Get default document variable name, if not provided.

        If only one variable is present in the llm_chain.prompt,
        we can infer that the formatted documents should be passed in
        with this variable name.
        """
        llm_chain_variables = values["llm_chain"].prompt.input_variables
        if "document_variable_name" not in values:
            if len(llm_chain_variables) == 1:
                values["document_variable_name"] = llm_chain_variables[0]
            else:
                msg = (
                    "document_variable_name must be provided if there are "
                    "multiple llm_chain_variables"
                )
                raise ValueError(msg)
        elif values["document_variable_name"] not in llm_chain_variables:
            msg = (
                f"document_variable_name {values['document_variable_name']} was "
                f"not found in llm_chain input_variables: {llm_chain_variables}"
            )
            raise ValueError(msg)
        return values

    @property