HypotheticalDocumentEmbedder Class — langchain Architecture
Architecture documentation for the HypotheticalDocumentEmbedder class in base.py from the langchain codebase.
Entity Profile
Dependency Diagram
graph TD 879fde74_4b2e_77b6_2f39_1c63a5d18d30["HypotheticalDocumentEmbedder"] 097a4781_5519_0b5d_6244_98c64eadc0d6["Chain"] 879fde74_4b2e_77b6_2f39_1c63a5d18d30 -->|extends| 097a4781_5519_0b5d_6244_98c64eadc0d6 c58e6864_9429_b081_883b_39ba15df0485["Embeddings"] 879fde74_4b2e_77b6_2f39_1c63a5d18d30 -->|extends| c58e6864_9429_b081_883b_39ba15df0485 8d3a235d_a08f_2979_f52a_1772067dd1d3["LLMChain"] 879fde74_4b2e_77b6_2f39_1c63a5d18d30 -->|extends| 8d3a235d_a08f_2979_f52a_1772067dd1d3 4b3d5656_35af_69ae_140b_cdc033a089a4["base.py"] 879fde74_4b2e_77b6_2f39_1c63a5d18d30 -->|defined in| 4b3d5656_35af_69ae_140b_cdc033a089a4 301d011e_e07f_d7fd_c95a_3c586a96e4dd["input_keys()"] 879fde74_4b2e_77b6_2f39_1c63a5d18d30 -->|method| 301d011e_e07f_d7fd_c95a_3c586a96e4dd ef84ab36_de8d_bf8e_2534_359392bc8576["output_keys()"] 879fde74_4b2e_77b6_2f39_1c63a5d18d30 -->|method| ef84ab36_de8d_bf8e_2534_359392bc8576 bcb1e62b_d8eb_b3d4_8e67_2ec700fb9325["embed_documents()"] 879fde74_4b2e_77b6_2f39_1c63a5d18d30 -->|method| bcb1e62b_d8eb_b3d4_8e67_2ec700fb9325 7c685c78_06d1_94dd_2d5d_8842a0cc2764["combine_embeddings()"] 879fde74_4b2e_77b6_2f39_1c63a5d18d30 -->|method| 7c685c78_06d1_94dd_2d5d_8842a0cc2764 a972acb2_0084_1e19_c9d7_f16c806a4a37["embed_query()"] 879fde74_4b2e_77b6_2f39_1c63a5d18d30 -->|method| a972acb2_0084_1e19_c9d7_f16c806a4a37 425ec347_7672_169e_02e3_f55fae196d37["_call()"] 879fde74_4b2e_77b6_2f39_1c63a5d18d30 -->|method| 425ec347_7672_169e_02e3_f55fae196d37 fe0bc8b7_4c80_7f6d_cd0c_37381864adeb["from_llm()"] 879fde74_4b2e_77b6_2f39_1c63a5d18d30 -->|method| fe0bc8b7_4c80_7f6d_cd0c_37381864adeb daaa8051_baac_de83_db46_497dccd06425["_chain_type()"] 879fde74_4b2e_77b6_2f39_1c63a5d18d30 -->|method| daaa8051_baac_de83_db46_497dccd06425
Relationship Graph
Source Code
libs/langchain/langchain_classic/chains/hyde/base.py lines 26–127
class HypotheticalDocumentEmbedder(Chain, Embeddings):
"""Generate hypothetical document for query, and then embed that.
Based on https://arxiv.org/abs/2212.10496
"""
base_embeddings: Embeddings
llm_chain: Runnable
model_config = ConfigDict(
arbitrary_types_allowed=True,
extra="forbid",
)
@property
def input_keys(self) -> list[str]:
"""Input keys for Hyde's LLM chain."""
return self.llm_chain.input_schema.model_json_schema()["required"]
@property
def output_keys(self) -> list[str]:
"""Output keys for Hyde's LLM chain."""
if isinstance(self.llm_chain, LLMChain):
return self.llm_chain.output_keys
return ["text"]
def embed_documents(self, texts: list[str]) -> list[list[float]]:
"""Call the base embeddings."""
return self.base_embeddings.embed_documents(texts)
def combine_embeddings(self, embeddings: list[list[float]]) -> list[float]:
"""Combine embeddings into final embeddings."""
try:
import numpy as np
return list(np.array(embeddings).mean(axis=0))
except ImportError:
logger.warning(
"NumPy not found in the current Python environment. "
"HypotheticalDocumentEmbedder will use a pure Python implementation "
"for internal calculations, which may significantly impact "
"performance, especially for large datasets. For optimal speed and "
"efficiency, consider installing NumPy: pip install numpy",
)
if not embeddings:
return []
num_vectors = len(embeddings)
return [
sum(dim_values) / num_vectors
for dim_values in zip(*embeddings, strict=False)
]
def embed_query(self, text: str) -> list[float]:
"""Generate a hypothetical document and embedded it."""
var_name = self.input_keys[0]
result = self.llm_chain.invoke({var_name: text})
if isinstance(self.llm_chain, LLMChain):
documents = [result[self.output_keys[0]]]
else:
documents = [result]
embeddings = self.embed_documents(documents)
return self.combine_embeddings(embeddings)
def _call(
self,
inputs: dict[str, Any],
run_manager: CallbackManagerForChainRun | None = None,
) -> dict[str, str]:
"""Call the internal llm chain."""
_run_manager = run_manager or CallbackManagerForChainRun.get_noop_manager()
return self.llm_chain.invoke(
inputs,
config={"callbacks": _run_manager.get_child()},
)
@classmethod
def from_llm(
cls,
llm: BaseLanguageModel,
base_embeddings: Embeddings,
prompt_key: str | None = None,
Extends
Source
Frequently Asked Questions
What is the HypotheticalDocumentEmbedder class?
HypotheticalDocumentEmbedder is a class in the langchain codebase, defined in libs/langchain/langchain_classic/chains/hyde/base.py.
Where is HypotheticalDocumentEmbedder defined?
HypotheticalDocumentEmbedder is defined in libs/langchain/langchain_classic/chains/hyde/base.py at line 26.
What does HypotheticalDocumentEmbedder extend?
HypotheticalDocumentEmbedder extends Chain, Embeddings, LLMChain.
Analyze Your Own Codebase
Get architecture documentation, dependency graphs, and domain analysis for your codebase in minutes.
Try Supermodel Free