Source code for lightning_ir.data.data
1from dataclasses import dataclass
2from typing import Any, Dict, List, Sequence
3
4import torch
5
6
[docs]
7@dataclass
8class RankSample:
9 query_id: str
10 query: str
11 doc_ids: Sequence[str]
12 docs: Sequence[str]
13 targets: torch.Tensor | None = None
14 qrels: List[Dict[str, Any]] | None = None
15
16
[docs]
17@dataclass
18class QuerySample:
19 query_id: str
20 query: str
21
22 @classmethod
23 def from_ir_dataset_sample(cls, sample):
24 return cls(sample[0], sample[1])
25
26
[docs]
27@dataclass
28class DocSample:
29 doc_id: str
30 doc: str
31
32 @classmethod
33 def from_ir_dataset_sample(cls, sample):
34 return cls(sample[0], sample.default_text())
35
36
[docs]
37@dataclass
38class RankBatch:
39 queries: Sequence[str]
40 docs: Sequence[Sequence[str]]
41 query_ids: Sequence[str] | None = None
42 doc_ids: Sequence[Sequence[str]] | None = None
43 qrels: List[Dict[str, int]] | None = None
44
45
[docs]
46@dataclass
47class TrainBatch(RankBatch):
48 targets: torch.Tensor | None = None
49
50
[docs]
51@dataclass
52class IndexBatch:
53 doc_ids: Sequence[str]
54 docs: Sequence[str]
55
56
[docs]
57@dataclass
58class SearchBatch:
59 query_ids: Sequence[str]
60 queries: Sequence[str]
61 doc_ids: Sequence[Sequence[str]] | None = None
62 qrels: List[Dict[str, int]] | None = None