Skip to content

Commit

Permalink
adjust log msg
Browse files Browse the repository at this point in the history
  • Loading branch information
manuelburger committed Nov 5, 2024
1 parent 72dc3a0 commit 3b46e69
Showing 1 changed file with 4 additions and 2 deletions.
6 changes: 4 additions & 2 deletions src/nanotron/data/petagraph_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,7 +206,9 @@ def __init__(self,
self.iterable_dataset = iter(sequences_unbatched)
else:
self.iterable_dataset = self.cyclic_iter(sequences_unbatched)
self.logging_func(f"Sample: {next(self.iterable_dataset)}")

sample = next(self.iterable_dataset)
self.logging_func(f"Sample ({len(sample)}: {sample[:32]}")

self.logging_func(f"Pipeline warmup:")
warmup_sample_size = 1024
Expand Down Expand Up @@ -298,7 +300,7 @@ def find_overlaps_and_build_graph(sequences, k_mer: int = 31):
graph = defaultdict(list)

# Check for overlaps
for i, seq1 in tqdm(enumerate(sequences), total=len(sequences)):
for i, seq1 in enumerate(sequences):
seq1_suffix = seq1[-min_overlap:]
graph[i] = []
for j in prefix_dict[seq1_suffix]:
Expand Down

0 comments on commit 3b46e69

Please sign in to comment.