Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion packages/cdk/lambda/utils/bedrockAgentApi.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import {
BraveSearchResult,
} from 'generative-ai-use-cases';
import { streamingChunk } from './streamingChunk';
import { convertToSafeFilename } from './fileNameUtils';
import {
initBedrockAgentClient,
initBedrockAgentRuntimeClient,
Expand Down Expand Up @@ -127,7 +128,7 @@ const bedrockAgentApi: ApiInterface = {
files: messages
.flatMap((m: UnrecordedMessage) => {
return m.extraData?.map((file) => ({
name: file.name.replace(/[^a-zA-Z0-9\s\-()[\].]/g, 'X'), // If the file name contains Japanese, it is not recognized, so replace it
name: convertToSafeFilename(file.name),
source: {
sourceType: 'BYTE_CONTENT',
byteContent: {
Expand Down
27 changes: 27 additions & 0 deletions packages/cdk/lambda/utils/fileNameUtils.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
import crypto from 'crypto';

/**
* Convert filename to safe format for AWS Bedrock API
* AWS Bedrock DocumentBlock.name only allows: alphanumeric, whitespace, hyphens, parentheses, square brackets
* Replaces non-allowed characters with '_' and adds hash suffix only when replacements occur
* @param filename Original filename
* @returns Safe filename with hash suffix (only if non-allowed characters were replaced)
*/
export const convertToSafeFilename = (filename: string): string => {
const lastDotIndex = filename.lastIndexOf('.');
const nameWithoutExt =
lastDotIndex > 0 ? filename.substring(0, lastDotIndex) : filename;
const safeName = nameWithoutExt.replace(/[^a-zA-Z0-9\s\-()[\]]/g, '_');

// Add hash only if non-ASCII characters were replaced
if (safeName !== nameWithoutExt) {
const hash = crypto
.createHash('md5')
.update(filename)
.digest('hex')
.substring(0, 8);
return `${safeName}_${hash}`;
}

return safeName;
};
5 changes: 2 additions & 3 deletions packages/cdk/lambda/utils/models.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ import {
applyAutoCacheToSystem,
} from './promptCache';
import { getFormatFromMimeType, getMimeTypeFromFileName } from './media';
import { convertToSafeFilename } from './fileNameUtils';

// Default Models

Expand Down Expand Up @@ -403,9 +404,7 @@ const createConverseCommandInput = (
contentBlocks.push({
document: {
format,
name: extra.name
.split('.')[0]
.replace(/[^a-zA-Z0-9\s\-()[\]]/g, 'X'), // If the file name contains Japanese, it will cause an error, so convert it
name: convertToSafeFilename(extra.name),
source: {
bytes: Buffer.from(extra.source.data, 'base64'),
},
Expand Down
54 changes: 54 additions & 0 deletions packages/cdk/test/lambda/utils/fileNameUtils.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
/* eslint-disable i18nhelper/no-jp-string */
import { convertToSafeFilename } from '../../../lambda/utils/fileNameUtils';

describe('convertToSafeFilename', () => {
it('should return filename without hash when only ASCII characters', () => {
const result = convertToSafeFilename('document.pdf');
expect(result).toBe('document');
});

it('should return filename without hash for ASCII with allowed special chars', () => {
const result = convertToSafeFilename('report-2024 (final)[v1].pdf');
expect(result).toBe('report-2024 (final)[v1]');
});

it('should add hash when Japanese characters are present', () => {
const result = convertToSafeFilename('資料.pdf');
expect(result).toBe('___46a890b2');
});

it('should add hash when mixed Japanese and ASCII characters', () => {
const result = convertToSafeFilename('report資料2024.pdf');
expect(result).toBe('report__2024_f3805637');
});

it('should generate different hashes for different Japanese filenames with same length', () => {
const result1 = convertToSafeFilename('資料.pdf');
const result2 = convertToSafeFilename('書類.pdf');
expect(result1).toBe('___46a890b2');
expect(result2).toBe('___5c4aa342');
expect(result1).not.toBe(result2);
});

it('should generate consistent hash for same filename', () => {
const result1 = convertToSafeFilename('資料.pdf');
const result2 = convertToSafeFilename('資料.pdf');
expect(result1).toBe('___46a890b2');
expect(result2).toBe('___46a890b2');
});

it('should handle filename without extension', () => {
const result = convertToSafeFilename('document');
expect(result).toBe('document');
});

it('should handle filename with multiple dots', () => {
const result = convertToSafeFilename('report.final.pdf');
expect(result).toBe('report_final_8d101382');
});

it('should replace special characters with underscore and add hash', () => {
const result = convertToSafeFilename('file@#$.pdf');
expect(result).toBe('file____cf25ced4');
});
});
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nice test

Loading