Skip to content

Commit 32edb6c

Browse files
driftxmichaelsembwever
authored andcommitted
CNDB-15632: CNDB-15157: limit log lines during commitlog replay (#2001)
Very long log lines can be produced during commitlog replay Limits the amount of information printed at debug, but retains full information at trace
1 parent 43dbe78 commit 32edb6c

File tree

2 files changed

+255
-2
lines changed

2 files changed

+255
-2
lines changed

src/java/org/apache/cassandra/db/commitlog/CommitLogReplayer.java

Lines changed: 48 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -200,7 +200,35 @@ public static CommitLogReplayer construct(CommitLog commitLog, UUID localHostId)
200200
cfPersisted.put(cfs.metadata.id, filter);
201201
}
202202
CommitLogPosition globalPosition = firstNotCovered(cfPersisted.values());
203-
logger.debug("Global replay position is {} from columnfamilies {}", globalPosition, FBUtilities.toString(cfPersisted));
203+
204+
// Limit the amount of column family data logged to prevent massive log lines
205+
if (logger.isDebugEnabled())
206+
{
207+
int maxColumnFamiliesToLog = 10;
208+
int cfCount = cfPersisted.size();
209+
if (cfCount <= maxColumnFamiliesToLog)
210+
{
211+
logger.debug("Global replay position is {} from {} columnfamilies: {}",
212+
globalPosition, cfCount, FBUtilities.toString(cfPersisted));
213+
}
214+
else
215+
{
216+
// For large numbers of column families, just log the count and a sample
217+
Map<TableId, IntervalSet<CommitLogPosition>> sample = new HashMap<>();
218+
int count = 0;
219+
for (Map.Entry<TableId, IntervalSet<CommitLogPosition>> entry : cfPersisted.entrySet())
220+
{
221+
if (count++ >= maxColumnFamiliesToLog)
222+
break;
223+
sample.put(entry.getKey(), entry.getValue());
224+
}
225+
logger.debug("Global replay position is {} from {} columnfamilies (showing first {}): {}",
226+
globalPosition, cfCount, maxColumnFamiliesToLog, FBUtilities.toString(sample));
227+
logger.debug("Use TRACE level to see all {} columnfamilies", cfCount);
228+
logger.trace("Full columnfamilies list: {}", FBUtilities.toString(cfPersisted));
229+
}
230+
}
231+
204232
return new CommitLogReplayer(commitLog, globalPosition, cfPersisted, replayFilter);
205233
}
206234

@@ -459,7 +487,25 @@ public static IntervalSet<CommitLogPosition> persistedIntervals(Iterable<SSTable
459487

460488
if (!skippedSSTables.isEmpty()) {
461489
logger.warn("Origin of {} sstables is unknown or doesn't match the local node; commitLogIntervals for them were ignored", skippedSSTables.size());
462-
logger.debug("Ignored commitLogIntervals from the following sstables: {}", skippedSSTables);
490+
491+
// Limit the number of SSTable names logged to prevent massive log lines
492+
int maxSSTablesToLog = 100;
493+
if (skippedSSTables.size() <= maxSSTablesToLog) {
494+
logger.debug("Ignored commitLogIntervals from the following sstables: {}", skippedSSTables);
495+
} else {
496+
List<String> sample = new ArrayList<>();
497+
int count = 0;
498+
for (String sstable : skippedSSTables)
499+
{
500+
if (count++ >= maxSSTablesToLog)
501+
break;
502+
sample.add(sstable);
503+
}
504+
logger.debug("Ignored commitLogIntervals from {} sstables (showing first {}): {}",
505+
skippedSSTables.size(), maxSSTablesToLog, sample);
506+
logger.debug("Use TRACE level to see all {} skipped sstables", skippedSSTables.size());
507+
logger.trace("Full list of ignored sstables: {}", skippedSSTables);
508+
}
463509
}
464510

465511
if (truncatedAt != null)

test/unit/org/apache/cassandra/db/commitlog/CommitLogReplayerTest.java

Lines changed: 207 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,20 +18,40 @@
1818

1919
package org.apache.cassandra.db.commitlog;
2020

21+
22+
import java.util.ArrayList;
23+
import java.util.Collections;
24+
import java.util.List;
25+
import java.util.UUID;
26+
import java.util.concurrent.CompletableFuture;
27+
import java.util.concurrent.CompletionException;
2128
import java.util.concurrent.ExecutionException;
2229

30+
import org.junit.After;
2331
import org.junit.Assert;
2432
import org.junit.BeforeClass;
2533
import org.junit.Test;
2634
import org.junit.runner.RunWith;
2735

36+
import org.slf4j.LoggerFactory;
37+
38+
import ch.qos.logback.classic.Level;
39+
import ch.qos.logback.classic.spi.ILoggingEvent;
40+
import ch.qos.logback.core.AppenderBase;
2841
import org.apache.cassandra.config.DatabaseDescriptor;
2942
import org.apache.cassandra.db.Mutation;
3043
import org.apache.cassandra.utils.concurrent.Future;
44+
import org.apache.cassandra.io.sstable.format.SSTableReader;
45+
import org.apache.cassandra.io.sstable.metadata.StatsMetadata;
46+
import org.apache.cassandra.utils.EstimatedHistogram;
47+
import org.apache.cassandra.utils.streamhist.TombstoneHistogram;
3148
import org.jboss.byteman.contrib.bmunit.BMRule;
3249
import org.jboss.byteman.contrib.bmunit.BMRules;
3350
import org.jboss.byteman.contrib.bmunit.BMUnitRunner;
3451

52+
import static org.junit.Assert.assertTrue;
53+
54+
import org.apache.cassandra.utils.ByteBufferUtil;
3555
import static org.mockito.Mockito.mock;
3656
import static org.mockito.Mockito.when;
3757

@@ -60,4 +80,191 @@ public void testTrackingSegmentsWhenMutationFails()
6080
Assert.assertTrue(!replayer.getSegmentWithInvalidOrFailedMutations().isEmpty());
6181
Assert.assertTrue(replayer.getSegmentWithInvalidOrFailedMutations().contains(failedSegment));
6282
}
83+
84+
/**
85+
* Test that when there are few skipped SSTables (<= 100), all are logged in debug mode.
86+
* This tests the fix for CNDB-15157.
87+
*/
88+
@Test
89+
public void testPersistedIntervalsLogsAllSSTablesWhenFew()
90+
{
91+
ch.qos.logback.classic.Logger logger = (ch.qos.logback.classic.Logger) LoggerFactory.getLogger(CommitLogReplayer.class);
92+
InMemoryAppender appender = new InMemoryAppender();
93+
logger.addAppender(appender);
94+
Level originalLevel = logger.getLevel();
95+
logger.setLevel(Level.DEBUG);
96+
97+
try
98+
{
99+
// Create 50 mock SSTables with different host IDs (so they'll be skipped)
100+
List<SSTableReader> sstables = new ArrayList<>();
101+
UUID differentHostId = UUID.randomUUID();
102+
for (int i = 0; i < 50; i++)
103+
{
104+
SSTableReader reader = mock(SSTableReader.class);
105+
StatsMetadata metadata = createMinimalStatsMetadata(differentHostId);
106+
when(reader.getSSTableMetadata()).thenReturn(metadata);
107+
when(reader.getFilename()).thenReturn("sstable-" + i + ".db");
108+
sstables.add(reader);
109+
}
110+
111+
UUID localhostId = UUID.randomUUID();
112+
CommitLogReplayer.persistedIntervals(sstables, null, localhostId);
113+
114+
// Verify that debug log contains all sstables (not truncated)
115+
List<ILoggingEvent> debugEvents = appender.getEventsForLevel(Level.DEBUG);
116+
boolean foundFullList = false;
117+
for (ILoggingEvent event : debugEvents)
118+
{
119+
String message = event.getFormattedMessage();
120+
if (message.contains("Ignored commitLogIntervals from the following sstables:"))
121+
{
122+
foundFullList = true;
123+
// Should NOT contain the "showing first" message when count is <= 100
124+
Assert.assertFalse("Should not limit logging when <= 100 SSTables",
125+
message.contains("showing first"));
126+
break;
127+
}
128+
}
129+
assertTrue("Should have logged the full sstables list", foundFullList);
130+
}
131+
finally
132+
{
133+
logger.detachAppender(appender);
134+
logger.setLevel(originalLevel);
135+
}
136+
}
137+
138+
/**
139+
* Test that when there are many skipped SSTables (> 100), only the first 100 are logged.
140+
* This tests the fix for CNDB-15157.
141+
*/
142+
@Test
143+
public void testPersistedIntervalsLimitsLoggingWhenMany()
144+
{
145+
ch.qos.logback.classic.Logger logger = (ch.qos.logback.classic.Logger) LoggerFactory.getLogger(CommitLogReplayer.class);
146+
InMemoryAppender appender = new InMemoryAppender();
147+
logger.addAppender(appender);
148+
Level originalLevel = logger.getLevel();
149+
logger.setLevel(Level.TRACE); // Need TRACE level to capture the full list message
150+
151+
try
152+
{
153+
// Create 150 mock SSTables with different host IDs (so they'll be skipped)
154+
List<SSTableReader> sstables = new ArrayList<>();
155+
UUID differentHostId = UUID.randomUUID();
156+
for (int i = 0; i < 150; i++)
157+
{
158+
SSTableReader reader = mock(SSTableReader.class);
159+
StatsMetadata metadata = createMinimalStatsMetadata(differentHostId);
160+
when(reader.getSSTableMetadata()).thenReturn(metadata);
161+
when(reader.getFilename()).thenReturn("sstable-" + i + ".db");
162+
sstables.add(reader);
163+
}
164+
165+
UUID localhostId = UUID.randomUUID();
166+
CommitLogReplayer.persistedIntervals(sstables, null, localhostId);
167+
168+
// Verify that debug log is limited
169+
List<ILoggingEvent> debugEvents = appender.getEventsForLevel(Level.DEBUG);
170+
boolean foundLimitedList = false;
171+
boolean foundTraceMessage = false;
172+
for (ILoggingEvent event : debugEvents)
173+
{
174+
String message = event.getFormattedMessage();
175+
if (message.contains("Ignored commitLogIntervals from 150 sstables (showing first 100)"))
176+
{
177+
foundLimitedList = true;
178+
}
179+
if (message.contains("Use TRACE level to see all 150 skipped sstables"))
180+
{
181+
foundTraceMessage = true;
182+
}
183+
}
184+
assertTrue("Should have logged limited sstables list", foundLimitedList);
185+
assertTrue("Should have logged message about TRACE level", foundTraceMessage);
186+
187+
// Verify trace log has full list
188+
List<ILoggingEvent> traceEvents = appender.getEventsForLevel(Level.TRACE);
189+
boolean foundFullListInTrace = false;
190+
for (ILoggingEvent event : traceEvents)
191+
{
192+
String message = event.getFormattedMessage();
193+
if (message.contains("Full list of ignored sstables:"))
194+
{
195+
foundFullListInTrace = true;
196+
break;
197+
}
198+
}
199+
assertTrue("Should have logged full list at TRACE level", foundFullListInTrace);
200+
}
201+
finally
202+
{
203+
logger.detachAppender(appender);
204+
logger.setLevel(originalLevel);
205+
}
206+
}
207+
208+
/**
209+
* Creates a minimal StatsMetadata with only the fields needed for testing persistedIntervals.
210+
*/
211+
private static StatsMetadata createMinimalStatsMetadata(UUID originatingHostId)
212+
{
213+
return new StatsMetadata(new EstimatedHistogram(150, true),
214+
new EstimatedHistogram(150, true),
215+
IntervalSet.empty(),
216+
0L, // minTimestamp
217+
0L, // maxTimestamp
218+
Long.MAX_VALUE, // minLocalDeletionTime
219+
Long.MAX_VALUE, // maxLocalDeletionTime
220+
Integer.MAX_VALUE, // minTTL
221+
Integer.MAX_VALUE, // maxTTL
222+
0.0, // compressionRatio
223+
TombstoneHistogram.createDefault(), // estimatedTombstoneDropTime
224+
0, // sstableLevel
225+
null, // clusteringTypes
226+
null, // coveredClustering
227+
false, // hasLegacyCounterShards
228+
0L, // repairedAt
229+
0L, // totalColumnsSet
230+
0L, // totalRows
231+
0.0, // tokenSpaceCoverage
232+
originatingHostId,
233+
null, // pendingRepair
234+
false, // isTransient
235+
false, //boolean hasPartitionLevelDeletions,
236+
ByteBufferUtil.EMPTY_BYTE_BUFFER, //ByteBuffer firstKey,
237+
ByteBufferUtil.EMPTY_BYTE_BUFFER, //ByteBuffer lastKey,
238+
Collections.emptyMap(), // maxColumnValueLengths
239+
null); // zeroCopyMetadata
240+
}
241+
242+
private static class InMemoryAppender extends AppenderBase<ILoggingEvent>
243+
{
244+
private final List<ILoggingEvent> events = new ArrayList<>();
245+
246+
private InMemoryAppender()
247+
{
248+
start();
249+
}
250+
251+
@Override
252+
protected synchronized void append(ILoggingEvent event)
253+
{
254+
events.add(event);
255+
}
256+
257+
public synchronized List<ILoggingEvent> getEventsForLevel(Level level)
258+
{
259+
List<ILoggingEvent> result = new ArrayList<>();
260+
for (ILoggingEvent event : events)
261+
{
262+
if (event.getLevel() == level)
263+
{
264+
result.add(event);
265+
}
266+
}
267+
return result;
268+
}
269+
}
63270
}

0 commit comments

Comments
 (0)