Source code for lightning_ir.data.data

 1from dataclasses import dataclass
 2from typing import Any, Dict, List, Sequence
 3
 4import torch
 5
 6
[docs] 7@dataclass 8class RankSample: 9 query_id: str 10 query: str 11 doc_ids: Sequence[str] 12 docs: Sequence[str] 13 targets: torch.Tensor | None = None 14 qrels: List[Dict[str, Any]] | None = None
15 16
[docs] 17@dataclass 18class QuerySample: 19 query_id: str 20 query: str 21 22 @classmethod 23 def from_ir_dataset_sample(cls, sample): 24 return cls(sample[0], sample[1])
25 26
[docs] 27@dataclass 28class DocSample: 29 doc_id: str 30 doc: str 31 32 @classmethod 33 def from_ir_dataset_sample(cls, sample): 34 return cls(sample[0], sample.default_text())
35 36
[docs] 37@dataclass 38class RankBatch: 39 queries: Sequence[str] 40 docs: Sequence[Sequence[str]] 41 query_ids: Sequence[str] | None = None 42 doc_ids: Sequence[Sequence[str]] | None = None 43 qrels: List[Dict[str, int]] | None = None
44 45
[docs] 46@dataclass 47class TrainBatch(RankBatch): 48 targets: torch.Tensor | None = None
49 50
[docs] 51@dataclass 52class IndexBatch: 53 doc_ids: Sequence[str] 54 docs: Sequence[str]
55 56
[docs] 57@dataclass 58class SearchBatch: 59 query_ids: Sequence[str] 60 queries: Sequence[str] 61 doc_ids: Sequence[Sequence[str]] | None = None 62 qrels: List[Dict[str, int]] | None = None