diff --git a/packages/cdk/lambda/utils/bedrockAgentApi.ts b/packages/cdk/lambda/utils/bedrockAgentApi.ts index 7a385aa39..6a6f64452 100644 --- a/packages/cdk/lambda/utils/bedrockAgentApi.ts +++ b/packages/cdk/lambda/utils/bedrockAgentApi.ts @@ -21,6 +21,7 @@ import { BraveSearchResult, } from 'generative-ai-use-cases'; import { streamingChunk } from './streamingChunk'; +import { convertToSafeFilename } from './fileNameUtils'; import { initBedrockAgentClient, initBedrockAgentRuntimeClient, @@ -127,7 +128,7 @@ const bedrockAgentApi: ApiInterface = { files: messages .flatMap((m: UnrecordedMessage) => { return m.extraData?.map((file) => ({ - name: file.name.replace(/[^a-zA-Z0-9\s\-()[\].]/g, 'X'), // If the file name contains Japanese, it is not recognized, so replace it + name: convertToSafeFilename(file.name), source: { sourceType: 'BYTE_CONTENT', byteContent: { diff --git a/packages/cdk/lambda/utils/fileNameUtils.ts b/packages/cdk/lambda/utils/fileNameUtils.ts new file mode 100644 index 000000000..6e4160032 --- /dev/null +++ b/packages/cdk/lambda/utils/fileNameUtils.ts @@ -0,0 +1,27 @@ +import crypto from 'crypto'; + +/** + * Convert filename to safe format for AWS Bedrock API + * AWS Bedrock DocumentBlock.name only allows: alphanumeric, whitespace, hyphens, parentheses, square brackets + * Replaces non-allowed characters with '_' and adds hash suffix only when replacements occur + * @param filename Original filename + * @returns Safe filename with hash suffix (only if non-allowed characters were replaced) + */ +export const convertToSafeFilename = (filename: string): string => { + const lastDotIndex = filename.lastIndexOf('.'); + const nameWithoutExt = + lastDotIndex > 0 ? filename.substring(0, lastDotIndex) : filename; + const safeName = nameWithoutExt.replace(/[^a-zA-Z0-9\s\-()[\]]/g, '_'); + + // Add hash only if non-ASCII characters were replaced + if (safeName !== nameWithoutExt) { + const hash = crypto + .createHash('md5') + .update(filename) + .digest('hex') + .substring(0, 8); + return `${safeName}_${hash}`; + } + + return safeName; +}; diff --git a/packages/cdk/lambda/utils/models.ts b/packages/cdk/lambda/utils/models.ts index 2d7dda05c..c11433139 100644 --- a/packages/cdk/lambda/utils/models.ts +++ b/packages/cdk/lambda/utils/models.ts @@ -31,6 +31,7 @@ import { applyAutoCacheToSystem, } from './promptCache'; import { getFormatFromMimeType, getMimeTypeFromFileName } from './media'; +import { convertToSafeFilename } from './fileNameUtils'; // Default Models @@ -403,9 +404,7 @@ const createConverseCommandInput = ( contentBlocks.push({ document: { format, - name: extra.name - .split('.')[0] - .replace(/[^a-zA-Z0-9\s\-()[\]]/g, 'X'), // If the file name contains Japanese, it will cause an error, so convert it + name: convertToSafeFilename(extra.name), source: { bytes: Buffer.from(extra.source.data, 'base64'), }, diff --git a/packages/cdk/test/lambda/utils/fileNameUtils.test.ts b/packages/cdk/test/lambda/utils/fileNameUtils.test.ts new file mode 100644 index 000000000..77eb347d3 --- /dev/null +++ b/packages/cdk/test/lambda/utils/fileNameUtils.test.ts @@ -0,0 +1,54 @@ +/* eslint-disable i18nhelper/no-jp-string */ +import { convertToSafeFilename } from '../../../lambda/utils/fileNameUtils'; + +describe('convertToSafeFilename', () => { + it('should return filename without hash when only ASCII characters', () => { + const result = convertToSafeFilename('document.pdf'); + expect(result).toBe('document'); + }); + + it('should return filename without hash for ASCII with allowed special chars', () => { + const result = convertToSafeFilename('report-2024 (final)[v1].pdf'); + expect(result).toBe('report-2024 (final)[v1]'); + }); + + it('should add hash when Japanese characters are present', () => { + const result = convertToSafeFilename('資料.pdf'); + expect(result).toBe('___46a890b2'); + }); + + it('should add hash when mixed Japanese and ASCII characters', () => { + const result = convertToSafeFilename('report資料2024.pdf'); + expect(result).toBe('report__2024_f3805637'); + }); + + it('should generate different hashes for different Japanese filenames with same length', () => { + const result1 = convertToSafeFilename('資料.pdf'); + const result2 = convertToSafeFilename('書類.pdf'); + expect(result1).toBe('___46a890b2'); + expect(result2).toBe('___5c4aa342'); + expect(result1).not.toBe(result2); + }); + + it('should generate consistent hash for same filename', () => { + const result1 = convertToSafeFilename('資料.pdf'); + const result2 = convertToSafeFilename('資料.pdf'); + expect(result1).toBe('___46a890b2'); + expect(result2).toBe('___46a890b2'); + }); + + it('should handle filename without extension', () => { + const result = convertToSafeFilename('document'); + expect(result).toBe('document'); + }); + + it('should handle filename with multiple dots', () => { + const result = convertToSafeFilename('report.final.pdf'); + expect(result).toBe('report_final_8d101382'); + }); + + it('should replace special characters with underscore and add hash', () => { + const result = convertToSafeFilename('file@#$.pdf'); + expect(result).toBe('file____cf25ced4'); + }); +});