Skip to content

Commit 546b71c

Browse files
k-rusdjatnieks
authored andcommitted
CNDB-12503 remove column name restrictions in SAI (#1539)
Trying to create an SAI or other index on a column, which name is either long or has non-alphanumeric characters, fails, while the table was successfully created with such name. This happens if an index name is not provided. The reason for this limitation is that the column name is used for generating the default index name, which is used in the file names. Fixes riptano/cndb#12503, fixes riptano/cndb#12609 This PR removes the check for column names during an index creation. Thus it allows to create indexes on existing columns with any names including qualified names and long names. 1. The check was preventing qualified column names with characters, which cannot be used in file names. This was never an issue as the index name construction from table and column names already removes any non-alphanumeric or non-underscore characters. 2. The check was preventing long column names. This limitation is removed by - Adding calculation of maximum amount characters, which can be used in index name, so it will allow to construct file names without exceeding the limit of 255 chars (255 is added as a constant). - Then the index name is truncated. 3. The uniqueness of index names is already fixed by existing implementation, which adds counter prefix when needed. This change to generated index name does not affect existing indexes as the existing name from `IndexMetadata` is always serialized to JSON, thus avoiding to re-generating the name. This fix required to fix CQL tester, since it was assuming that the restriction is in place. So column name regex pattern matcher is fixed in `createIndex` to allow non-restricted column names. There might still be possible corner cases on some column names used for generating index names, which will not work with `createIndex`. In such case `execute` should be used. It also includes several fixes in affected files: - Improve the name and usage of constants. - Fix warnings with minor improvements in affected files.
1 parent 3e1434a commit 546b71c

File tree

12 files changed

+278
-46
lines changed

12 files changed

+278
-46
lines changed

src/java/org/apache/cassandra/cql3/statements/schema/CreateIndexStatement.java

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ public Keyspaces apply(Keyspaces schema)
223223
long indexesOnAllTables = StreamSupport.stream(Keyspace.all().spliterator(), false).flatMap(ks -> ks.getColumnFamilyStores().stream())
224224
.flatMap(ks -> ks.indexManager.listIndexes().stream())
225225
.map(i -> i.getIndexMetadata().getIndexClassName())
226-
.filter(otherClassName -> className.equals(otherClassName)).count();
226+
.filter(className::equals).count();
227227
guardRails.totalThreshold.guard(indexesOnAllTables + 1, indexDescription, false, state);
228228
}
229229

@@ -272,10 +272,6 @@ private void validateIndexTarget(TableMetadata table, IndexMetadata.Kind kind, I
272272
if ((kind == IndexMetadata.Kind.CUSTOM) && !SchemaConstants.isValidName(target.column.toString()))
273273
throw ire(INVALID_CUSTOM_INDEX_TARGET, target.column, SchemaConstants.NAME_LENGTH);
274274

275-
if ((kind == IndexMetadata.Kind.CUSTOM) && !SchemaConstants.isValidName(target.column.toString()))
276-
throw ire("Column '%s' is longer than the permissible name length of %d characters or" +
277-
" contains non-alphanumeric-underscore characters", target.column, SchemaConstants.NAME_LENGTH);
278-
279275
if (column.type.referencesDuration())
280276
{
281277
if (column.type.isCollection())
@@ -322,7 +318,7 @@ private String generateIndexName(KeyspaceMetadata keyspace, List<IndexTarget> ta
322318
{
323319
String baseName = targets.size() == 1
324320
? IndexMetadata.generateDefaultIndexName(tableName, targets.get(0).column)
325-
: IndexMetadata.generateDefaultIndexName(tableName);
321+
: IndexMetadata.generateDefaultIndexName(tableName, null);
326322
return keyspace.findAvailableIndexName(baseName);
327323
}
328324

src/java/org/apache/cassandra/db/lifecycle/LogRecord.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -163,7 +163,7 @@ public static LogRecord makeAbort(long updateTime)
163163

164164
public static LogRecord make(Type type, SSTable table)
165165
{
166-
String absoluteTablePath = table.descriptor.baseFileUri() + Component.separator;
166+
String absoluteTablePath = table.descriptor.baseFileUri() + Component.SEPARATOR;
167167
return make(type, getExistingFiles(absoluteTablePath), table.getComponentSize(), absoluteTablePath);
168168
}
169169

@@ -172,7 +172,7 @@ public static Map<SSTable, LogRecord> make(Type type, Iterable<SSTableReader> ta
172172
// contains a mapping from sstable absolute path (everything up until the 'Data'/'Index'/etc part of the filename) to the sstable
173173
Map<String, SSTable> absolutePaths = new HashMap<>();
174174
for (SSTableReader table : tables)
175-
absolutePaths.put(table.descriptor.baseFileUri() + Component.separator, table);
175+
absolutePaths.put(table.descriptor.baseFileUri() + Component.SEPARATOR, table);
176176

177177
// maps sstable base file name to the actual files on disk
178178
Map<String, List<File>> existingFiles = getExistingFiles(absolutePaths.keySet());

src/java/org/apache/cassandra/index/sai/disk/format/Version.java

Lines changed: 45 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@
3636
import org.apache.cassandra.index.sai.disk.v6.V6OnDiskFormat;
3737
import org.apache.cassandra.index.sai.disk.v7.V7OnDiskFormat;
3838
import org.apache.cassandra.index.sai.utils.TypeUtil;
39+
import org.apache.cassandra.io.sstable.format.bti.BtiFormat;
40+
import org.apache.cassandra.schema.SchemaConstants;
3941
import org.apache.cassandra.utils.bytecomparable.ByteComparable;
4042

4143
import static com.google.common.base.Preconditions.checkArgument;
@@ -102,6 +104,48 @@ public static Version latest()
102104
return LATEST;
103105
}
104106

107+
/**
108+
* Calculates the maximum allowed length for SAI index names to ensure generated filenames
109+
* do not exceed the system's filename length limit (defined in {@link SchemaConstants#FILENAME_LENGTH}).
110+
* This accounts for all additional components in the filename.
111+
*/
112+
public static int calculateIndexNameAllowedLength()
113+
{
114+
int addedLength = getAddedLengthFromDescriptorAndVersion();
115+
assert addedLength < SchemaConstants.FILENAME_LENGTH;
116+
return SchemaConstants.FILENAME_LENGTH - addedLength;
117+
}
118+
119+
/**
120+
* Calculates the length of the added prefixes and suffixes from Descriptor constructor
121+
* and {@link Version#stargazerFileNameFormat}.
122+
*
123+
* @return the length of the added prefixes and suffixes
124+
*/
125+
private static int getAddedLengthFromDescriptorAndVersion()
126+
{
127+
// Prefixes and suffixes constructed by Version.stargazerFileNameFormat
128+
int versionNameLength = latest().toString().length();
129+
// room for up to 999 generations
130+
int generationLength = 3 + SAI_SEPARATOR.length();
131+
int addedLength = SAI_DESCRIPTOR.length()
132+
+ versionNameLength
133+
+ generationLength
134+
+ IndexComponentType.PRIMARY_KEY_BLOCK_OFFSETS.representation.length()
135+
+ SAI_SEPARATOR.length() * 3
136+
+ EXTENSION.length();
137+
138+
// Prefixes from Descriptor constructor
139+
int separatorLength = 1;
140+
int indexVersionLength = 2;
141+
int tableIdLength = 28;
142+
addedLength += indexVersionLength
143+
+ BtiFormat.NAME.length()
144+
+ tableIdLength
145+
+ separatorLength * 3;
146+
return addedLength;
147+
}
148+
105149
@Override
106150
public int hashCode()
107151
{
@@ -183,7 +227,7 @@ default String format(IndexComponentType indexComponentType, IndexContext indexC
183227
*/
184228
public static Optional<ParsedFileName> tryParseFileName(String filename)
185229
{
186-
if (!filename.endsWith(".db"))
230+
if (!filename.endsWith(EXTENSION))
187231
return Optional.empty();
188232

189233
// For flexibility, we handle both "full" filename, of the form "<descriptor>-SAI+....db", or just the component

src/java/org/apache/cassandra/io/sstable/Component.java

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@
4141
*/
4242
public class Component
4343
{
44-
public static final char separator = '-';
44+
public static final char SEPARATOR = '-';
4545

4646
/**
4747
* WARNING: Be careful while changing the names or string representation of the enum
@@ -230,8 +230,8 @@ public boolean isValidFor(Descriptor descriptor)
230230
public File getFile(String absolutePath)
231231
{
232232
File ret;
233-
if (absolutePath.lastIndexOf(separator) != (absolutePath.length() - 1))
234-
ret = new File(PathUtils.getPath(absolutePath + separator + name));
233+
if (absolutePath.lastIndexOf(SEPARATOR) != (absolutePath.length() - 1))
234+
ret = new File(PathUtils.getPath(absolutePath + SEPARATOR + name));
235235
else
236236
ret = new File(PathUtils.getPath(absolutePath + name));
237237

src/java/org/apache/cassandra/io/sstable/Descriptor.java

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@
4646
import org.apache.cassandra.utils.Pair;
4747

4848
import static com.google.common.base.Preconditions.checkNotNull;
49-
import static org.apache.cassandra.io.sstable.Component.separator;
49+
import static org.apache.cassandra.io.sstable.Component.SEPARATOR;
5050
import static org.apache.cassandra.utils.TimeUUID.Generator.nextTimeUUID;
5151

5252
/**
@@ -179,7 +179,7 @@ public File tmpFileForStreaming(Component component)
179179

180180
public String filenameFor(Component component)
181181
{
182-
return prefix + separator + component.name();
182+
return prefix + SEPARATOR + component.name();
183183
}
184184

185185
public File fileFor(Component component)
@@ -213,9 +213,9 @@ public File baseFile()
213213

214214
private void appendFileName(StringBuilder buff)
215215
{
216-
buff.append(version).append(separator);
216+
buff.append(version).append(SEPARATOR);
217217
buff.append(id.toString());
218-
buff.append(separator).append(version.format.name());
218+
buff.append(SEPARATOR).append(version.format.name());
219219
}
220220

221221
public String baseFileUri()
@@ -237,7 +237,7 @@ public String relativeFilenameFor(Component component)
237237
}
238238

239239
appendFileName(buff);
240-
buff.append(separator).append(component.name());
240+
buff.append(SEPARATOR).append(component.name());
241241
return buff.toString();
242242
}
243243

src/java/org/apache/cassandra/schema/IndexMetadata.java

Lines changed: 48 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,10 @@
2020

2121
import java.io.IOException;
2222
import java.lang.reflect.InvocationTargetException;
23-
import java.util.*;
23+
import java.util.HashMap;
24+
import java.util.List;
25+
import java.util.Map;
26+
import java.util.UUID;
2427
import java.util.concurrent.ConcurrentHashMap;
2528
import java.util.stream.Collectors;
2629

@@ -40,12 +43,15 @@
4043
import org.apache.cassandra.index.Index;
4144
import org.apache.cassandra.index.internal.CassandraIndex;
4245
import org.apache.cassandra.index.sai.StorageAttachedIndex;
46+
import org.apache.cassandra.index.sai.disk.format.Version;
4347
import org.apache.cassandra.index.sasi.SASIIndex;
4448
import org.apache.cassandra.io.util.DataInputPlus;
4549
import org.apache.cassandra.io.util.DataOutputPlus;
4650
import org.apache.cassandra.utils.FBUtilities;
4751
import org.apache.cassandra.utils.UUIDSerializer;
4852

53+
import javax.annotation.Nullable;
54+
4955
import static org.apache.cassandra.schema.SchemaConstants.PATTERN_NON_WORD_CHAR;
5056
import static org.apache.cassandra.schema.SchemaConstants.isValidName;
5157

@@ -58,6 +64,7 @@ public final class IndexMetadata
5864

5965
public static final Serializer serializer = new Serializer();
6066

67+
static final String INDEX_POSTFIX = "_idx";
6168
/**
6269
* A mapping of user-friendly index names to their fully qualified index class names.
6370
*/
@@ -109,20 +116,55 @@ public static IndexMetadata fromIndexTargets(List<IndexTarget> targets,
109116
return new IndexMetadata(name, newOptions, kind);
110117
}
111118

112-
public static String generateDefaultIndexName(String table, ColumnIdentifier column)
119+
/**
120+
* Generates a default index name from the table and column names.
121+
* Characters other than alphanumeric and underscore are removed.
122+
* Long index names are truncated to fit the length allowing constructing filenames.
123+
*
124+
* @param table the table name
125+
* @param column the column identifier. Can be null if the index is not column specific.
126+
* @return the generated index name
127+
*/
128+
public static String generateDefaultIndexName(String table, @Nullable ColumnIdentifier column)
113129
{
114-
return PATTERN_NON_WORD_CHAR.matcher(table + '_' + column.toString() + "_idx").replaceAll("");
130+
String indexNameUncleaned = table;
131+
if (column != null)
132+
indexNameUncleaned += '_' + column.toString();
133+
String indexNameUntrimmed = PATTERN_NON_WORD_CHAR.matcher(indexNameUncleaned).replaceAll("");
134+
String indexNameTrimmed = indexNameUntrimmed
135+
.substring(0,
136+
Math.min(calculateGeneratedIndexNameMaxLength(),
137+
indexNameUntrimmed.length()));
138+
return indexNameTrimmed + INDEX_POSTFIX;
115139
}
116140

117-
public static String generateDefaultIndexName(String table)
141+
/**
142+
* Calculates the maximum length of the generated index name to fit file names.
143+
* It includes the generated suffixes in account.
144+
* The calculation depends on how index implements file names construciton from index names.
145+
* This needs to be addressed, see CNDB-13240.
146+
*
147+
* @return the allowed length of the generated index name
148+
*/
149+
private static int calculateGeneratedIndexNameMaxLength()
118150
{
119-
return PATTERN_NON_WORD_CHAR.matcher(table + '_' + "idx").replaceAll("");
151+
// Speculative assumption that uniqueness breaker will fit into 999.
152+
// The value is used for trimming the index name if needed.
153+
// Introducing validation of index name length is TODO for CNDB-13198.
154+
int uniquenessSuffixLength = 4;
155+
int indexNameAddition = uniquenessSuffixLength + INDEX_POSTFIX.length();
156+
int allowedIndexNameLength = Version.calculateIndexNameAllowedLength();
157+
158+
assert allowedIndexNameLength >= indexNameAddition : "cannot happen with current implementation as allowedIndexNameLength is approximately 255 - ~76. However, allowedIndexNameLength was " + allowedIndexNameLength + " and indexNameAddition was " + indexNameAddition;
159+
160+
return allowedIndexNameLength - indexNameAddition;
120161
}
121162

122163
public void validate(TableMetadata table)
123164
{
124165
if (!isValidName(name, true))
125-
throw new ConfigurationException("Illegal index name " + name);
166+
throw new ConfigurationException(String.format("Index name must not be empty, or contain non-alphanumeric-underscore characters (got \"%s\")",
167+
name));
126168

127169
if (kind == null)
128170
throw new ConfigurationException("Index kind is null for index " + name);

src/java/org/apache/cassandra/schema/SchemaConstants.java

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,14 +70,21 @@ public final class SchemaConstants
7070
public static final Set<String> VIRTUAL_KEYSPACE_NAMES = ImmutableSet.of(SCHEMA_VIRTUAL_KEYSPACE_NAME, SYSTEM_VIEWS_KEYSPACE_NAME);
7171

7272
/**
73+
* Longest acceptable file name. Longer names lead to file write or read errors.
74+
*/
75+
public static final int FILENAME_LENGTH = 255;
76+
77+
/**
7378
* The longest permissible KS or CF name.
7479
*
7580
* Before CASSANDRA-16956, we used to care about not having the entire path longer than 255 characters because of
7681
* Windows support but this limit is by implementing CASSANDRA-16956 not in effect anymore.
7782
*
7883
* Note: This extended to 222 for CNDB tenant specific keyspaces.
84+
* 222 is maximum filename length of 255 chars minus a separator char and
85+
* 32 chars for table UUID.
7986
*/
80-
public static final int NAME_LENGTH = 222;
87+
public static final int NAME_LENGTH = FILENAME_LENGTH - 32 - 1;
8188

8289
// 59adb24e-f3cd-3e02-97f0-5b395827453f
8390
public static final UUID emptyVersion;

test/unit/org/apache/cassandra/cql3/CQLTester.java

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -275,11 +275,12 @@ public abstract class CQLTester
275275
public static final List<ProtocolVersion> PROTOCOL_VERSIONS = new ArrayList<>(ProtocolVersion.SUPPORTED.size());
276276

277277
private static final String CREATE_INDEX_NAME_REGEX = "(\\s*(\\w*|\"\\w*\")\\s*)";
278+
private static final String CREATE_INDEX_NAME_QUOTED_REGEX = "(\\s*(\\w*|\"[^\"]*\")\\s*)";
278279
private static final String CREATE_INDEX_REGEX = String.format("\\A\\s*CREATE(?:\\s+CUSTOM)?\\s+INDEX" +
279280
"(?:\\s+IF\\s+NOT\\s+EXISTS)?\\s*" +
280281
"%s?\\s*ON\\s+(%<s\\.)?%<s\\s*" +
281-
"(\\((?:\\s*\\w+\\s*\\()?%<s\\))?",
282-
CREATE_INDEX_NAME_REGEX);
282+
"(\\((?:\\s*\\w+\\s*\\()?%s\\))?",
283+
CREATE_INDEX_NAME_REGEX, CREATE_INDEX_NAME_QUOTED_REGEX);
283284
private static final Pattern CREATE_INDEX_PATTERN = Pattern.compile(CREATE_INDEX_REGEX, Pattern.CASE_INSENSITIVE);
284285

285286
public static final NettyOptions IMMEDIATE_CONNECTION_SHUTDOWN_NETTY_OPTIONS = new NettyOptions()
@@ -1364,26 +1365,27 @@ protected static Pair<String, String> getCreateIndexName(String keyspace, String
13641365
keyspace = parsedKeyspace;
13651366

13661367
String index = matcher.group(2);
1368+
boolean isQuotedGeneratedIndexName = false;
13671369
if (Strings.isNullOrEmpty(index))
13681370
{
13691371
String table = matcher.group(7);
13701372
if (Strings.isNullOrEmpty(table))
13711373
throw new IllegalArgumentException("Table name should be specified: " + formattedQuery);
13721374

13731375
String column = matcher.group(9);
1376+
isQuotedGeneratedIndexName = ParseUtils.isQuoted(column, '\"');
13741377

13751378
String baseName = Strings.isNullOrEmpty(column)
1376-
? IndexMetadata.generateDefaultIndexName(table)
1379+
? IndexMetadata.generateDefaultIndexName(table, null)
13771380
: IndexMetadata.generateDefaultIndexName(table, new ColumnIdentifier(column, true));
13781381

13791382
KeyspaceMetadata ks = Schema.instance.getKeyspaceMetadata(keyspace);
13801383
assertNotNull(ks);
13811384
index = ks.findAvailableIndexName(baseName);
13821385
}
1383-
13841386
index = ParseUtils.isQuoted(index, '\"')
13851387
? ParseUtils.unDoubleQuote(index)
1386-
: index.toLowerCase();
1388+
: isQuotedGeneratedIndexName ? index : index.toLowerCase();
13871389

13881390
return Pair.create(keyspace, index);
13891391
}

test/unit/org/apache/cassandra/db/DirectoriesTest.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -880,7 +880,7 @@ private void testDirectoriesSymlinksHelper(boolean oldStyle) throws IOException
880880
for (TableMetadata tm : CFM)
881881
{
882882
Path keyspacedir = Files.createDirectories(ddir2.resolve(tm.keyspace));
883-
String tabledir = tm.name + (oldStyle ? "" : Component.separator + tm.id.toHexString());
883+
String tabledir = tm.name + (oldStyle ? "" : Component.SEPARATOR + tm.id.toHexString());
884884
Files.createSymbolicLink(keyspacedir.resolve(tabledir), symlinktarget);
885885
}
886886

@@ -1147,7 +1147,7 @@ public void testHasAvailableSpaceSumming()
11471147

11481148
private String getNewFilename(TableMetadata tm, boolean oldStyle)
11491149
{
1150-
return tm.keyspace + File.pathSeparator() + tm.name + (oldStyle ? "" : Component.separator + tm.id.toHexString()) + "/na-1-big-Data.db";
1150+
return tm.keyspace + File.pathSeparator() + tm.name + (oldStyle ? "" : Component.SEPARATOR + tm.id.toHexString()) + "/na-1-big-Data.db";
11511151
}
11521152

11531153
private List<Directories.DataDirectoryCandidate> getWriteableDirectories(DataDirectory[] dataDirectories, long writeSize)

0 commit comments

Comments
 (0)