1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
| class MultiSourceFusion: """ 多知识源融合 """
def __init__(self, sources: Dict[str, Any]): self.sources = sources
def fuse(self, query: str, source_results: Dict[str, List]) -> List[dict]: """ 融合多个知识源的检索结果
Args: query: 用户查询 source_results: 各知识源的检索结果字典 """ normalized = self._normalize(source_results)
scored = self._cross_source_scoring(query, normalized)
deduplicated = self._deduplicate_by_content(scored)
return sorted( deduplicated, key=lambda x: x['final_score'], reverse=True )[:20]
def _normalize(self, source_results: Dict[str, List]) -> List[dict]: """将不同来源的结果统一格式化""" normalized = []
for source_name, results in source_results.items(): for item in results: normalized.append({ 'source': source_name, 'content': item.get('content', ''), 'metadata': item.get('metadata', {}), 'original_score': item.get('score', 0), 'embedding': item.get('embedding', None) })
return normalized
def _cross_source_scoring(self, query: str, items: List[dict]) -> List[dict]: """跨源评分""" for item in items: base_score = item['original_score']
source_weight = self._get_source_weight(item['source'])
quality_score = self._assess_quality(item['content'])
consistency_score = self._check_consistency(item, items)
item['final_score'] = ( base_score * source_weight * 0.4 + quality_score * 0.3 + consistency_score * 0.3 )
return items
def _get_source_weight(self, source: str) -> float: """知识源权重配置""" weights = { 'knowledge_graph': 1.2, 'vector_store': 1.0, 'sql_database': 1.1, 'web_search': 0.8 } return weights.get(source, 1.0)
|
💬 互动讨论
欢迎留下你的见解、疑问或心得,精选评论有机会获得积分奖励哦!
使用 GitHub 账号登录评论 · 了解 Utterances
发现错误或有建议?提交反馈