diff --git a/libs/community/langchain_community/document_loaders/__init__.py b/libs/community/langchain_community/document_loaders/__init__.py
index c91345daa..72d8a6a3a 100644
--- a/libs/community/langchain_community/document_loaders/__init__.py
+++ b/libs/community/langchain_community/document_loaders/__init__.py
@@ -532,6 +532,9 @@
     from langchain_community.document_loaders.yuque import (
         YuqueLoader,
     )
+    from langchain_community.document_loaders.ocr_pdf import (
+        OCRPDFLoader,
+    )
 
 
 _module_lookup = {
@@ -732,6 +735,7 @@
     "YoutubeAudioLoader": "langchain_community.document_loaders.blob_loaders",
     "YoutubeLoader": "langchain_community.document_loaders.youtube",
     "YuqueLoader": "langchain_community.document_loaders.yuque",
+    "OCRPDFLoader": "langchain_community.document_loaders.ocr_pdf",
 }
 
 
@@ -940,4 +944,5 @@ def __getattr__(name: str) -> Any:
     "YoutubeAudioLoader",
     "YoutubeLoader",
     "YuqueLoader",
+    "OCRPDFLoader",
 ]
diff --git a/libs/community/langchain_community/document_loaders/ocr_pdf.py b/libs/community/langchain_community/document_loaders/ocr_pdf.py
new file mode 100644
index 000000000..7a7be4918
--- /dev/null
+++ b/libs/community/langchain_community/document_loaders/ocr_pdf.py
@@ -0,0 +1,174 @@
+"""Loader for extracting text from scanned PDFs using OCR."""
+
+from __future__ import annotations
+
+import logging
+from pathlib import Path
+from typing import Iterator, List, Optional
+
+from langchain_core.documents import Document
+from langchain_community.document_loaders.base import BaseLoader
+
+logger = logging.getLogger(__name__)
+
+
+class OCRPDFLoader(BaseLoader):
+    """Load scanned PDF files using OCR (Optical Character Recognition).
+
+    This loader converts PDF pages to images and applies Tesseract OCR
+    to extract text from scanned documents.
+
+    Setup:
+        Install required packages:
+        ```bash
+        pip install pdf2image pytesseract
+        ```
+
+        Install system dependencies:
+        - **Linux**: `sudo apt-get install poppler-utils tesseract-ocr`
+        - **macOS**: `brew install poppler tesseract`
+        - **Windows**: Download and install Poppler and Tesseract, add to PATH
+
+    Example:
+        ```python
+        from langchain_community.document_loaders import OCRPDFLoader
+
+        loader = OCRPDFLoader("scanned_document.pdf")
+        documents = loader.load()
+
+        # Access extracted text and metadata
+        for doc in documents:
+            print(f"Page {doc.metadata['page']}: {doc.page_content[:100]}...")
+        ```
+    """
+
+    def __init__(
+        self,
+        file_path: str | Path,
+        *,
+        tesseract_config: str = "",
+        poppler_path: Optional[str] = None,
+        first_page: Optional[int] = None,
+        last_page: Optional[int] = None,
+        dpi: int = 200,
+        fmt: str = "JPEG",
+    ) -> None:
+        """Initialize the OCR PDF loader.
+
+        Args:
+            file_path: Path to the PDF file to load.
+            tesseract_config: Additional configuration options for Tesseract OCR.
+                Example: "--psm 6" for uniform text blocks.
+            poppler_path: Path to poppler installation (Windows only).
+            first_page: First page to process (1-indexed). If None, starts from page 1.
+            last_page: Last page to process (1-indexed). If None, processes all pages.
+            dpi: Resolution for PDF to image conversion. Higher values improve
+                OCR accuracy but increase processing time.
+            fmt: Image format for conversion ("JPEG", "PNG", etc.).
+
+        Raises:
+            FileNotFoundError: If the specified PDF file does not exist.
+            ImportError: If required dependencies are not installed.
+        """
+        try:
+            import pdf2image  # noqa: F401
+            import pytesseract  # noqa: F401
+        except ImportError as e:
+            raise ImportError(
+                "OCRPDFLoader requires pdf2image and pytesseract. "
+                "Install with: pip install pdf2image pytesseract"
+            ) from e
+
+        self.file_path = Path(file_path)
+        if not self.file_path.exists():
+            raise FileNotFoundError(f"PDF file not found: {self.file_path}")
+
+        self.tesseract_config = tesseract_config
+        self.poppler_path = poppler_path
+        self.first_page = first_page
+        self.last_page = last_page
+        self.dpi = dpi
+        self.fmt = fmt
+
+    def load(self) -> List[Document]:
+        """Load all pages and return as a list of Documents.
+
+        Returns:
+            List of Document objects, one per page with extracted text.
+        """
+        return list(self.lazy_load())
+
+    def lazy_load(self) -> Iterator[Document]:
+        """Lazy load pages one at a time.
+
+        Yields:
+            Document objects with extracted text and metadata.
+
+        Raises:
+            Exception: If PDF processing or OCR fails.
+        """
+        try:
+            from pdf2image import convert_from_path
+            import pytesseract
+        except ImportError as e:
+            raise ImportError(
+                "Required dependencies not found. "
+                "Install with: pip install pdf2image pytesseract"
+            ) from e
+
+        try:
+            # Convert PDF pages to images
+            conversion_kwargs = {
+                "pdf_path": self.file_path,
+                "dpi": self.dpi,
+                "fmt": self.fmt,
+            }
+
+            if self.poppler_path:
+                conversion_kwargs["poppler_path"] = self.poppler_path
+            if self.first_page:
+                conversion_kwargs["first_page"] = self.first_page
+            if self.last_page:
+                conversion_kwargs["last_page"] = self.last_page
+
+            pages = convert_from_path(**conversion_kwargs)
+            total_pages = len(pages)
+
+            logger.info(f"Processing {total_pages} pages from {self.file_path}")
+
+        except Exception as e:
+            raise RuntimeError(f"Failed to convert PDF to images: {e}") from e
+
+        # Process each page with OCR
+        start_page = self.first_page or 1
+
+        for i, page_image in enumerate(pages):
+            page_number = start_page + i
+
+            try:
+                # Extract text using Tesseract OCR
+                ocr_kwargs = {"image": page_image}
+                if self.tesseract_config:
+                    ocr_kwargs["config"] = self.tesseract_config
+
+                text = pytesseract.image_to_string(**ocr_kwargs)
+
+                # Only yield documents with non-empty text
+                if text.strip():
+                    yield Document(
+                        page_content=text.strip(),
+                        metadata={
+                            "source": str(self.file_path),
+                            "page": page_number,
+                            "total_pages": total_pages,
+                            "loader": "OCRPDFLoader",
+                            "ocr_engine": "tesseract",
+                        },
+                    )
+                else:
+                    logger.warning(f"No text extracted from page {page_number}")
+
+            except Exception as e:
+                logger.error(f"OCR failed for page {page_number}: {e}")
+                # Continue processing other pages even if one fails
+                continue
diff --git a/libs/community/tests/unit_tests/document_loaders/test_ocr_pdf.py b/libs/community/tests/unit_tests/document_loaders/test_ocr_pdf.py
new file mode 100644
index 000000000..b2798c7fd
--- /dev/null
+++ b/libs/community/tests/unit_tests/document_loaders/test_ocr_pdf.py
@@ -0,0 +1,281 @@
+"""Tests for OCR PDF Loader."""
+
+import pytest
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+from langchain_core.documents import Document
+from langchain_community.document_loaders.ocr_pdf import OCRPDFLoader
+
+
+class TestOCRPDFLoader:
+    """Test suite for OCRPDFLoader."""
+
+    def test_initialization_with_valid_path(self, tmp_path: Path) -> None:
+        """Test loader initialization with valid file path."""
+        # Create a temporary PDF file
+        pdf_file = tmp_path / "test.pdf"
+        pdf_file.write_bytes(b"fake pdf content")
+
+        loader = OCRPDFLoader(file_path=str(pdf_file))
+
+        assert loader.file_path == pdf_file
+        assert loader.tesseract_config == ""
+        assert loader.dpi == 200
+        assert loader.fmt == "JPEG"
+
+    def test_initialization_with_custom_params(self, tmp_path: Path) -> None:
+        """Test loader initialization with custom parameters."""
+        pdf_file = tmp_path / "test.pdf"
+        pdf_file.write_bytes(b"fake pdf content")
+
+        loader = OCRPDFLoader(
+            file_path=str(pdf_file),
+            tesseract_config="--psm 6",
+            dpi=300,
+            fmt="PNG",
+            first_page=2,
+            last_page=5,
+        )
+
+        assert loader.tesseract_config == "--psm 6"
+        assert loader.dpi == 300
+        assert loader.fmt == "PNG"
+        assert loader.first_page == 2
+        assert loader.last_page == 5
+
+    def test_initialization_file_not_found(self) -> None:
+        """Test loader initialization with non-existent file."""
+        with pytest.raises(FileNotFoundError, match="PDF file not found"):
+            OCRPDFLoader("nonexistent.pdf")
+
+    @patch("pdf2image.convert_from_path")
+    @patch("pytesseract.image_to_string")
+    def test_missing_dependencies(
+        self, mock_pytesseract, mock_pdf2image, tmp_path: Path
+    ) -> None:
+        """Test error handling when dependencies are missing."""
+        pdf_file = tmp_path / "test.pdf"
+        pdf_file.write_bytes(b"fake pdf content")
+
+        # Mock the import to raise ImportError
+        with patch.dict("sys.modules", {"pdf2image": None, "pytesseract": None}):
+            with patch(
+                "builtins.__import__", side_effect=ImportError("pdf2image not found")
+            ):
+                with pytest.raises(
+                    ImportError, match="OCRPDFLoader requires pdf2image"
+                ):
+                    OCRPDFLoader(str(pdf_file))
+
+    @patch("pdf2image.convert_from_path")
+    @patch("pytesseract.image_to_string")
+    def test_load_with_mocked_ocr(self, mock_ocr, mock_convert, tmp_path: Path) -> None:
+        """Test load() method with mocked OCR results."""
+        pdf_file = tmp_path / "test.pdf"
+        pdf_file.write_bytes(b"fake pdf content")
+
+        # Mock dependencies
+        mock_pages = [MagicMock(), MagicMock(), MagicMock()]
+        mock_convert.return_value = mock_pages
+        mock_ocr.side_effect = [
+            "Text from page 1",
+            "Text from page 2",
+            "",  # Empty text (should be skipped)
+        ]
+
+        loader = OCRPDFLoader(str(pdf_file))
+        documents = loader.load()
+
+        # Should only return 2 documents (skipping empty text)
+        assert len(documents) == 2
+
+        # Check first document
+        assert isinstance(documents[0], Document)
+        assert documents[0].page_content == "Text from page 1"
+        assert documents[0].metadata == {
+            "source": str(pdf_file),
+            "page": 1,
+            "total_pages": 3,
+            "loader": "OCRPDFLoader",
+            "ocr_engine": "tesseract",
+        }
+
+        # Check second document
+        assert documents[1].page_content == "Text from page 2"
+        assert documents[1].metadata["page"] == 2
+
+    @patch("pdf2image.convert_from_path")
+    @patch("pytesseract.image_to_string")
+    def test_lazy_load_with_mocked_ocr(
+        self, mock_ocr, mock_convert, tmp_path: Path
+    ) -> None:
+        """Test lazy_load() method with mocked OCR results."""
+        pdf_file = tmp_path / "test.pdf"
+        pdf_file.write_bytes(b"fake pdf content")
+
+        mock_pages = [MagicMock(), MagicMock()]
+        mock_convert.return_value = mock_pages
+        mock_ocr.side_effect = ["Page 1 content", "Page 2 content"]
+
+        loader = OCRPDFLoader(str(pdf_file))
+        documents = list(loader.lazy_load())
+
+        assert len(documents) == 2
+        assert all(isinstance(doc, Document) for doc in documents)
+
+    @patch("pdf2image.convert_from_path")
+    @patch("pytesseract.image_to_string")
+    def test_load_with_tesseract_config(
+        self, mock_ocr, mock_convert, tmp_path: Path
+    ) -> None:
+        """Test OCR with custom Tesseract configuration."""
+        pdf_file = tmp_path / "test.pdf"
+        pdf_file.write_bytes(b"fake pdf content")
+
+        mock_page = MagicMock()
+        mock_convert.return_value = [mock_page]
+        mock_ocr.return_value = "OCR result"
+
+        loader = OCRPDFLoader(str(pdf_file), tesseract_config="--psm 6")
+        list(loader.lazy_load())
+
+        # Verify OCR was called with config
+        mock_ocr.assert_called_once_with(image=mock_page, config="--psm 6")
+
+    @patch("pdf2image.convert_from_path")
+    @patch("pytesseract.image_to_string")
+    def test_load_with_page_range(self, mock_ocr, mock_convert, tmp_path: Path) -> None:
+        """Test loading with specific page range."""
+        pdf_file = tmp_path / "test.pdf"
+        pdf_file.write_bytes(b"fake pdf content")
+
+        mock_pages = [MagicMock(), MagicMock()]
+        mock_convert.return_value = mock_pages
+        mock_ocr.side_effect = ["Page 3 content", "Page 4 content"]
+
+        loader = OCRPDFLoader(str(pdf_file), first_page=3, last_page=4)
+        documents = loader.load()
+
+        # Check that convert_from_path was called with page range
+        mock_convert.assert_called_once()
+        call_kwargs = mock_convert.call_args[1]
+        assert call_kwargs["first_page"] == 3
+        assert call_kwargs["last_page"] == 4
+
+        # Check document metadata reflects correct page numbers
+        assert documents[0].metadata["page"] == 3
+        assert documents[1].metadata["page"] == 4
+
+    @patch("pdf2image.convert_from_path")
+    def test_conversion_error_handling(self, mock_convert, tmp_path: Path) -> None:
+        """Test error handling during PDF to image conversion."""
+        pdf_file = tmp_path / "test.pdf"
+        pdf_file.write_bytes(b"fake pdf content")
+
+        mock_convert.side_effect = RuntimeError("Conversion failed")
+
+        loader = OCRPDFLoader(str(pdf_file))
+
+        with pytest.raises(RuntimeError, match="Failed to convert PDF to images"):
+            list(loader.lazy_load())
+
+    @patch("pdf2image.convert_from_path")
+    @patch("pytesseract.image_to_string")
+    def test_ocr_error_handling(
+        self, mock_ocr, mock_convert, tmp_path: Path, caplog
+    ) -> None:
+        """Test error handling during OCR processing."""
+        pdf_file = tmp_path / "test.pdf"
+        pdf_file.write_bytes(b"fake pdf content")
+
+        mock_pages = [MagicMock(), MagicMock()]
+        mock_convert.return_value = mock_pages
+        mock_ocr.side_effect = [
+            "Successful OCR",
+            RuntimeError("OCR failed"),
+        ]
+
+        loader = OCRPDFLoader(str(pdf_file))
+        documents = loader.load()
+
+        # Should return only the successful document
+        assert len(documents) == 1
+        assert documents[0].page_content == "Successful OCR"
+
+        # Should log error for failed page
+        assert "OCR failed for page 2" in caplog.text
+
+    @patch("pdf2image.convert_from_path")
+    @patch("pytesseract.image_to_string")
+    def test_empty_text_filtering(
+        self, mock_ocr, mock_convert, tmp_path: Path, caplog
+    ) -> None:
+        """Test that pages with empty OCR results are filtered out."""
+        pdf_file = tmp_path / "test.pdf"
+        pdf_file.write_bytes(b"fake pdf content")
+
+        mock_pages = [MagicMock(), MagicMock()]
+        mock_convert.return_value = mock_pages
+        mock_ocr.side_effect = [
+            "Good content",
+            "   \n\t  ",
+        ]  # Second is whitespace only
+
+        loader = OCRPDFLoader(str(pdf_file))
+        documents = loader.load()
+
+        # Should only return document with actual content
+        assert len(documents) == 1
+        assert documents[0].page_content == "Good content"
+
+        # Should log warning for empty page
+        assert "No text extracted from page 2" in caplog.text
+
+    def test_pathlib_path_support(self, tmp_path: Path) -> None:
+        """Test that loader accepts pathlib.Path objects."""
+        pdf_file = tmp_path / "test.pdf"
+        pdf_file.write_bytes(b"fake pdf content")
+
+        # Should accept Path object directly
+        loader = OCRPDFLoader(pdf_file)
+        assert loader.file_path == pdf_file
+
+    @patch("pdf2image.convert_from_path")
+    @patch("pytesseract.image_to_string")
+    def test_poppler_path_parameter(
+        self, mock_ocr, mock_convert, tmp_path: Path
+    ) -> None:
+        """Test that poppler_path is passed to convert_from_path."""
+        pdf_file = tmp_path / "test.pdf"
+        pdf_file.write_bytes(b"fake pdf content")
+
+        mock_convert.return_value = [MagicMock()]
+        mock_ocr.return_value = "Test content"
+
+        loader = OCRPDFLoader(str(pdf_file), poppler_path="/custom/poppler/path")
+        list(loader.lazy_load())
+
+        # Verify poppler_path was passed
+        call_kwargs = mock_convert.call_args[1]
+        assert call_kwargs["poppler_path"] == "/custom/poppler/path"
+
+    @patch("pdf2image.convert_from_path")
+    @patch("pytesseract.image_to_string")
+    def test_dpi_and_format_parameters(
+        self, mock_ocr, mock_convert, tmp_path: Path
+    ) -> None:
+        """Test that DPI and format parameters are passed correctly."""
+        pdf_file = tmp_path / "test.pdf"
+        pdf_file.write_bytes(b"fake pdf content")
+
+        mock_convert.return_value = [MagicMock()]
+        mock_ocr.return_value = "Test content"
+
+        loader = OCRPDFLoader(str(pdf_file), dpi=300, fmt="PNG")
+        list(loader.lazy_load())
+
+        # Verify parameters were passed
+        call_kwargs = mock_convert.call_args[1]
+        assert call_kwargs["dpi"] == 300
+        assert call_kwargs["fmt"] == "PNG"