From 5ff4c7c41acee3cdf77585d86738432db614b2bf Mon Sep 17 00:00:00 2001 From: "Anastasiia.Holub" Date: Thu, 14 Aug 2025 12:26:48 +0300 Subject: [PATCH 01/65] feat: add documentation for importing, exporting, and managing AcroForm data in PDF --- .../acroforms/export-import-json/_index.md | 173 +++++++++++ .../acroforms/modifing-form/_index.md | 269 ++++++++++++++++++ .../acroforms/posting-acroform/_index.md | 190 +++++++++++++ .../acroforms/remove-form/_index.md | 211 ++++++++++++++ 4 files changed, 843 insertions(+) create mode 100644 en/python-net/advanced-operations/working-with-forms/acroforms/export-import-json/_index.md create mode 100644 en/python-net/advanced-operations/working-with-forms/acroforms/modifing-form/_index.md create mode 100644 en/python-net/advanced-operations/working-with-forms/acroforms/posting-acroform/_index.md create mode 100644 en/python-net/advanced-operations/working-with-forms/acroforms/remove-form/_index.md diff --git a/en/python-net/advanced-operations/working-with-forms/acroforms/export-import-json/_index.md b/en/python-net/advanced-operations/working-with-forms/acroforms/export-import-json/_index.md new file mode 100644 index 000000000..54e7c2db4 --- /dev/null +++ b/en/python-net/advanced-operations/working-with-forms/acroforms/export-import-json/_index.md @@ -0,0 +1,173 @@ +--- +title: Import and Export Form Data +type: docs +weight: 80 +url: /python-net/import-export-json/ +description: This section explains how to import and Export Form Data. +lastmod: "2025-08-05" +TechArticle: true +AlternativeHeadline: +Abstract: +--- + +## Import Form Data from an XML file + +Next example demonstrates how to import form data from an XML file into an existing PDF form using Aspose.PDF for Python. By binding a PDF form, loading XML data, and saving the updated file, you can quickly populate interactive form fields without manually editing each page. This method is ideal for automating bulk form filling, processing large datasets, and ensuring data consistency across multiple documents. + +Use the following steps: + +1. Create Form Object. +1. Bind the PDF Form. +1. Load XML Data. +1. Import XML into PDF. +1. Save Updated PDF. + +```python + + from io import FileIO, StringIO + import json + from os import path + import aspose.pdf as ap + + # Construct full file paths using the working directory + path_infile = path.join(self.workdir_path, infile) + path_datafile = path.join(self.workdir_path, datafile) + path_outfile = path.join(self.workdir_path, outfile) + + # Create a Form object from Aspose.PDF + form = ap.facades.Form() + + # Bind the input PDF form + form.bind_pdf(path_infile) + + # Import XML data into the form + with FileIO(path_datafile, "r") as f: + form.import_xml(f) + + # Save the form with imported data to a new PDF file + form.save(path_outfile) +``` + +## Export Form field Data from a PDF document to an XML file + +Python library shows how to export form field data from a PDF document to an XML file using Aspose.PDF for Python. By binding a PDF form and saving its field values in XML format, you can easily store, process, or reuse the form data in other applications or workflows. This approach is ideal for data backup, analysis, and integration with external systems. + +Use the following steps: + +1. Create Form Object +1. Bind the PDF Form +1. Export Form Data +1. Save Field Values to XML + +```python + + from io import FileIO, StringIO + import json + from os import path + import aspose.pdf as ap + + # Combine the working directory path with the input PDF file name + path_infile = path.join(self.workdir_path, infile) + + # Combine the working directory path with the output XML file name + path_outfile = path.join(self.workdir_path, datafile) + + # Create a Form object to work with PDF form fields + form = ap.facades.Form() + + # Bind the PDF document containing the form + form.bind_pdf(path_infile) + + # Open the XML file in write mode to store exported form data + with FileIO(path_outfile, "w") as f: + # Export form field data from the PDF to the XML file + form.export_xml(f) +``` + +## + + + + + + + + + + + + + + + + + + + + + + +## Error handling + +This example demonstrates how to export form fields from a PDF to a JSON file using Aspose.PDF, including handling different statuses for each field during the export process. + +Let's break down this Aspose.PDF example step by step: + +1. Loading the Document. We load a PDF document named "Sample.pdf" from a specified directory. +1. Setting Export Options. Here, we create an instance of ExportFieldsToJsonOptions with two settings: + * `ExportPasswordValue` : This includes password fields in the export. + * `WriteIndented` : This formats the JSON output to be indented for readability +1. Export form fields to JSON. We export the form fields from the PDF file to a JSON file called "export.json" using the specified parameters. +1. Processing Export Results: + This loop iterates through the export results and prints the status of each field: + + * Success: Indicates the field was successfully exported. + * Warning: Prints any warning messages related to the field. + * Error: Prints any error messages related to the field. + + +```cs +// For complete examples and data files, visit https://github.com/aspose-pdf/Aspose.PDF-for-.NET +private static void ExportFieldsToJsonWithOptions() +{ + // The path to the documents directory + var dataDir = RunExamples.GetDataDir_AsposePdf_Forms(); + + // Open PDF document + using (var document = new Aspose.Pdf.Document(Path.Combine(dataDir, "Forms", "Sample.pdf"))) + { + // Create ExportFieldsToJsonOptions with specific settings + var options = new Aspose.Pdf.ExportFieldsToJsonOptions + { + ExportPasswordValue = true, + WriteIndented = true, + }; + + var exportResults = document.Form.ExportToJson(File.OpenWrite("export.json"), options); + + foreach (var result in exportResults) + { + Console.Write($"{result.FieldFullName} "); + switch (result.FieldSerializationStatus) + { + case Aspose.Pdf.FieldSerializationStatus.Success: + Console.WriteLine("Success"); + break; + case Aspose.Pdf.FieldSerializationStatus.Warning: + foreach (var messages in result.WarningMessages) + { + Console.WriteLine(messages); + } + break; + case Aspose.Pdf.FieldSerializationStatus.Error: + foreach (var messages in result.ErrorMessages) + { + Console.WriteLine(messages); + } + break; + } + } + } +} +``` + diff --git a/en/python-net/advanced-operations/working-with-forms/acroforms/modifing-form/_index.md b/en/python-net/advanced-operations/working-with-forms/acroforms/modifing-form/_index.md new file mode 100644 index 000000000..049dc15ec --- /dev/null +++ b/en/python-net/advanced-operations/working-with-forms/acroforms/modifing-form/_index.md @@ -0,0 +1,269 @@ +--- +title: Modifing AcroForm +linktitle: Modifing AcroForm +type: docs +ai_search_scope: pdf_net +ai_search_endpoint: https://docsearch.api.aspose.cloud/ask +weight: 40 +url: /net/modifing-form/ +description: Modifing form in your PDF file with Aspose.PDF for .NET library. You can Add or remove fields in existing form, getand set fieldlimit and etc. +lastmod: "2022-02-17" +sitemap: + changefreq: "weekly" + priority: 0.7 +--- + + +The following code snippet also work with [Aspose.PDF.Drawing](/pdf/net/drawing/) library. + +## Get or Set Field Limit + +The FormEditor class SetFieldLimit(field, limit) method allows you to set a field limit, the maximum number of characters that can be entered into a field. + +```csharp +// For complete examples and data files, visit https://github.com/aspose-pdf/Aspose.PDF-for-.NET +private static void SetFieldLimit() +{ + // The path to the documents directory + var dataDir = RunExamples.GetDataDir_AsposePdf_Forms(); + + // Create FormEditor instance + using (var form = new Aspose.Pdf.Facades.FormEditor()) + { + // Bind PDF document + form.BindPdf(dataDir + "input.pdf"); + + // Set field limit for "textbox1" + form.SetFieldLimit("textbox1", 15); + + // Save PDF document + form.Save(dataDir + "SetFieldLimit_out.pdf"); + } +} +``` + +Similarly, Aspose.PDF has a method that gets the field limit using the DOM approach. The following code snippet shows the steps. + +```csharp +// For complete examples and data files, visit https://github.com/aspose-pdf/Aspose.PDF-for-.NET +private static void GetFieldLimitUsingDOM() +{ + // The path to the documents directory + var dataDir = RunExamples.GetDataDir_AsposePdf_Forms(); + + // Open PDF document + using (var document = new Aspose.Pdf.Document(dataDir + "FieldLimit.pdf")) + { + // Get the field and its maximum limit + if (document.Form["textbox1"] is Aspose.Pdf.Forms.TextBoxField textBoxField) + { + Console.WriteLine("Limit: " + textBoxField.MaxLen); + } + } +} +``` + +You can also get the same value using the *Aspose.Pdf.Facades* namespace using the following code snippet. + +```csharp +// For complete examples and data files, visit https://github.com/aspose-pdf/Aspose.PDF-for-.NET +private static void GetFieldLimitUsingFacades() +{ + // The path to the documents directory + var dataDir = RunExamples.GetDataDir_AsposePdf_Forms(); + + // Create Form instance + using (var form = new Aspose.Pdf.Facades.Form()) + { + // Bind PDF document + form.BindPdf(dataDir + "FieldLimit.pdf"); + + // Get the field limit for "textbox1" + Console.WriteLine("Limit: " + form.GetFieldLimit("textbox1")); + } +} +``` + +## Set Custom Font for the Form Field + +Form fields in Adobe PDF files can be configured to use specific default fonts. In the early versions of Aspose.PDF, only the 14 default fonts were supported. Later releases allowed developers to apply any font. To set and update the default font used for form fields, use the DefaultAppearance(Font font, double size, Color color) class. This class can be found under the Aspose.Pdf.InteractiveFeatures namespace. To use this object, use the Field class DefaultAppearance property. + +The following code snippet shows how to set the default font for PDF form fields. + +```csharp +// For complete examples and data files, visit https://github.com/aspose-pdf/Aspose.PDF-for-.NET +private static void SetFormFieldFont() +{ + // The path to the documents directory + var dataDir = RunExamples.GetDataDir_AsposePdf_Forms(); + + // Open PDF document + using (var document = new Aspose.Pdf.Document(dataDir + "FormFieldFont14.pdf")) + { + // Get particular form field from document + if (document.Form["textbox1"] is Aspose.Pdf.Forms.Field field) + { + // Create font object + var font = Aspose.Pdf.Text.FontRepository.FindFont("ComicSansMS"); + + // Set the font information for form field + field.DefaultAppearance = new Aspose.Pdf.Annotations.DefaultAppearance(font, 10, System.Drawing.Color.Black); + } + + // Save PDF document + document.Save(dataDir + "FormFieldFont14_out.pdf"); + } +} +``` + +## Add/remove fields in existing form + +All the form fields are contained in the Document object's Form collection. This collection provides different methods that manage form fields, including the Delete method. If you want to delete a particular field, pass the field name as a parameter to the Delete method and then save the updated PDF document. The following code snippet shows how to delete a particular field from a PDF document. + +```csharp +// For complete examples and data files, visit https://github.com/aspose-pdf/Aspose.PDF-for-.NET +private static void DeleteFormField() +{ + // The path to the documents directory + var dataDir = RunExamples.GetDataDir_AsposePdf_Forms(); + + // Open PDF document + using (var document = new Aspose.Pdf.Document(dataDir + "DeleteFormField.pdf")) + { + // Delete a particular field by name + document.Form.Delete("textbox1"); + + // Save PDF document + document.Save(dataDir + "DeleteFormField_out.pdf"); + } +} +``` + + diff --git a/en/python-net/advanced-operations/working-with-forms/acroforms/posting-acroform/_index.md b/en/python-net/advanced-operations/working-with-forms/acroforms/posting-acroform/_index.md new file mode 100644 index 000000000..a7a428e05 --- /dev/null +++ b/en/python-net/advanced-operations/working-with-forms/acroforms/posting-acroform/_index.md @@ -0,0 +1,190 @@ +--- +title: Posting AcroForm Data +linktitle: Posting AcroForm +type: docs +ai_search_scope: pdf_net +ai_search_endpoint: https://docsearch.api.aspose.cloud/ask +weight: 50 +url: /net/posting-acroform-data/ +description: You can import and export form data to and XML file with Aspose.Pdf.Facades namespace in Aspose.PDF for .NET. +lastmod: "2022-02-17" +sitemap: + changefreq: "weekly" + priority: 0.7 +--- + + +{{% alert color="primary" %}} + +AcroForm is an important type of the PDF document. You can not only create and edit an AcroForm using [Aspose.Pdf.Facades namepsace](https://docs-qa.aspose.com/display/pdftemp/Aspose.Pdf.Facades+namespace), but also import and export form data to and XML file. Aspose.Pdf.Facades namespace in Aspose.PDF for .NET allows you to implement another feature of AcroForm, which helps you post an AcroForm data to an external web page. This web page then reads the post variables and uses this data for further processing. This feature is useful in the cases when you don’t just want to keep the data in the PDF file, rather you want to send it to your server and then save it in a database etc. + +{{% /alert %}} + +## Implementation details + +The following code snippet also work with [Aspose.PDF.Drawing](/pdf/net/drawing/) library. + +In this article, we have tried to create an AcroForm using [Aspose.Pdf.Facades namepsace](https://docs-qa.aspose.com/display/pdftemp/Aspose.Pdf.Facades+namespace). We have also added a submit button and set its target URL. The following code snippets show you how to create the form and then receive the posted data on the web page. + +```csharp +// For complete examples and data files, visit https://github.com/aspose-pdf/Aspose.PDF-for-.NET +private static void CreateAcroFormWithFields() +{ + // The path to the documents directory + var dataDir = RunExamples.GetDataDir_AsposePdf_Forms(); + + // Create an instance of FormEditor class and bind input and output pdf files + using (var editor = new Aspose.Pdf.Facades.FormEditor(dataDir + "input.pdf", dataDir + "output.pdf")) + { + // Create AcroForm fields - I have created only two fields for simplicity + editor.AddField(Aspose.Pdf.Facades.FieldType.Text, "firstname", 1, 100, 600, 200, 625); + editor.AddField(Aspose.Pdf.Facades.FieldType.Text, "lastname", 1, 100, 550, 200, 575); + + // Add a submit button and set target URL + editor.AddSubmitBtn("submitbutton", 1, "Submit", "http://localhost/csharptesting/show.aspx", 100, 450, 150, 475); + + // Save PDF document + editor.Save(); + } +} +``` + +{{% alert color="primary" %}} + +Following code snippet shows you how receive the posted values on the target web page. In this example, I have used a page named Show. a spx, and I have added simple one line code on the page load method. + +{{% /alert %}} + +```csharp +// Show the posted values on the target web page +Response.Write("Hello " + Request.Form.Get("firstname") + " " + Request.Form.Get("lastname")); +``` + + diff --git a/en/python-net/advanced-operations/working-with-forms/acroforms/remove-form/_index.md b/en/python-net/advanced-operations/working-with-forms/acroforms/remove-form/_index.md new file mode 100644 index 000000000..81c753aa6 --- /dev/null +++ b/en/python-net/advanced-operations/working-with-forms/acroforms/remove-form/_index.md @@ -0,0 +1,211 @@ +--- +title: Delete Forms from PDF in C# +linktitle: Delete Forms +type: docs +ai_search_scope: pdf_net +ai_search_endpoint: https://docsearch.api.aspose.cloud/ask +weight: 70 +url: /net/remove-form/ +description: Remove Text based on subtype/form with Aspose.PDF for .NET library. Remove all forms from the PDF. +lastmod: "2024-09-17" +sitemap: + changefreq: "weekly" + priority: 0.7 +--- + + +## Remove Text based on subtype/form + +The next code snippet creates a new Document object using an input variable that contains the path to the PDF file. The example accesses the forms presented on page 2 of the document and checks for forms with certain properties. If a form with the type "Typewriter" and subtype "Form" is found, it uses TextFragmentAbsorber to visit and remove all text fragments in this form. + +```cs +// For complete examples and data files, visit https://github.com/aspose-pdf/Aspose.PDF-for-.NET +private static void ClearTextInForm(string input, string output) +{ + // The path to the documents directory + var dataDir = RunExamples.GetDataDir_AsposePdf_Forms(); + + // Open PDF document + using (var document = new Aspose.Pdf.Document(dataDir + "TextBox.pdf")) + { + // Get the forms from the first page + var forms = document.Pages[1].Resources.Forms; + + foreach (var form in forms) + { + // Check if the form is of type "Typewriter" and subtype "Form" + if (form.IT == "Typewriter" && form.Subtype == "Form") + { + // Create a TextFragmentAbsorber to find text fragments + var absorber = new Aspose.Pdf.Text.TextFragmentAbsorber(); + absorber.Visit(form); + + // Clear the text in each fragment + foreach (var fragment in absorber.TextFragments) + { + fragment.Text = ""; + } + } + } + + // Save PDF document + document.Save(dataDir + "TextBox_out.pdf"); + } +} +``` + +## Remove Forms with "Typewriter" and a Subtype of "Form" from PDF + +This code snippet searches the forms on the first page of a PDF document for forms with an intent (IT) of "Typewriter" and a subtype (Subtype) of "Form." If both conditions are met, the form is deleted from the PDF. The modified document is then saved to the specified output file. + +The Aspose.PDF library provides two ways to remove such forms from PDFs: + +```cs +// For complete examples and data files, visit https://github.com/aspose-pdf/Aspose.PDF-for-.NET +private static void DeleteSpecifiedForm(string input, string output) +{ + // The path to the documents directory + var dataDir = RunExamples.GetDataDir_AsposePdf_Forms(); + + // Open PDF document + using (var document = new Aspose.Pdf.Document(dataDir + "TextField.pdf")) + { + // Get the forms from the first page + var forms = document.Pages[1].Resources.Forms; + + // Iterate through the forms and delete the ones with type "Typewriter" and subtype "Form" + for (int i = forms.Count; i > 0; i--) + { + if (forms[i].IT == "Typewriter" && forms[i].Subtype == "Form") + { + forms.Delete(forms[i].Name); + } + } + + // Save PDF document + document.Save(dataDir + "TextBox_out.pdf"); + } +} +``` + +Method 2: + +```cs +// For complete examples and data files, visit https://github.com/aspose-pdf/Aspose.PDF-for-.NET +private static void DeleteSpecifiedForm(string input, string output) +{ + // The path to the documents directory + var dataDir = RunExamples.GetDataDir_AsposePdf_Forms(); + + // Open PDF document + using (var document = new Aspose.Pdf.Document(dataDir + "TextField.pdf")) + { + // Get the forms from the first page + var forms = document.Pages[1].Resources.Forms; + + // Iterate through the forms and delete the ones with type "Typewriter" and subtype "Form" + foreach (var form in forms) + { + if (form.IT == "Typewriter" && form.Subtype == "Form") + { + var name = forms.GetFormName(form); + forms.Delete(name); + } + } + + // Save PDF document + document.Save(dataDir + "TextBox_out.pdf"); + } +} +``` + +## Remove all Forms from PDF + +This code removes all form elements from the first page of a PDF document and then saves the modified document to the specified output path. + +```cs +// For complete examples and data files, visit https://github.com/aspose-pdf/Aspose.PDF-for-.NET +private static void RemoveAllForms(string input, string output) +{ + // The path to the documents directory + var dataDir = RunExamples.GetDataDir_AsposePdf_Forms(); + + // Open PDF document + using (var document = new Aspose.Pdf.Document(dataDir + "TextField.pdf")) + { + // Get the forms from the first page + var forms = document.Pages[1].Resources.Forms; + + // Clear all forms + forms.Clear(); + + // Save PDF document + document.Save(dataDir + "TextBox_out.pdf"); + } +} +``` \ No newline at end of file From 78ae312488d4dd6cb80021f1ec75a44bee92b933 Mon Sep 17 00:00:00 2001 From: "Anastasiia.Holub" Date: Tue, 26 Aug 2025 11:18:01 +0300 Subject: [PATCH 02/65] feat: add FDF import functionality to PDF form data handling --- .../acroforms/export-import-json/_index.md | 27 ++++++++++++++++++- 1 file changed, 26 insertions(+), 1 deletion(-) diff --git a/en/python-net/advanced-operations/working-with-forms/acroforms/export-import-json/_index.md b/en/python-net/advanced-operations/working-with-forms/acroforms/export-import-json/_index.md index 54e7c2db4..c6db1689d 100644 --- a/en/python-net/advanced-operations/working-with-forms/acroforms/export-import-json/_index.md +++ b/en/python-net/advanced-operations/working-with-forms/acroforms/export-import-json/_index.md @@ -84,10 +84,35 @@ Use the following steps: form.export_xml(f) ``` -## +## Import Form field Data from an FDF +The 'import_data_from_fdf' method imports form field data from an FDF (Forms Data Format) file into an existing PDF form and saves the updated document. This approach is useful for pre-filling or updating PDF forms programmatically without modifying the structure of the document. +Use the following steps: + +1. Create Form Object. +1. Bind the input PDF. +1. Import the form data with import_fdf(). +1. Save the PDF with the imported data to the specified output file path. + +```python + + from io import FileIO, StringIO + import json + from os import path + import aspose.pdf as ap + path_infile = path.join(self.workdir_path, infile) + path_datafile = path.join(self.workdir_path, datafile) + path_outfile = path.join(self.workdir_path, outfile) + + form = ap.facades.Form() + form.bind_pdf(path_infile) + + with FileIO(path_datafile, "r") as f: + form.import_fdf(f) + form.save(path_outfile) +``` From 078e3b3bade533a857114de1b26ee6c288c6744d Mon Sep 17 00:00:00 2001 From: "Anastasiia.Holub" Date: Wed, 27 Aug 2025 10:54:37 +0300 Subject: [PATCH 03/65] fix: update installation guide for Aspose.PDF for Rust via C++ --- en/go-cpp/get-started/installation/_index.md | 76 ++++----- .../get-started/installation/_index.md | 154 ++++++++++++++++-- 2 files changed, 177 insertions(+), 53 deletions(-) diff --git a/en/go-cpp/get-started/installation/_index.md b/en/go-cpp/get-started/installation/_index.md index 6ce974502..5dd20ce6d 100644 --- a/en/go-cpp/get-started/installation/_index.md +++ b/en/go-cpp/get-started/installation/_index.md @@ -21,65 +21,65 @@ This package includes a large file which is stored as a bzip2 archive. 1. Add the asposepdf package to Your Project: - ```sh - go get github.com/aspose-pdf/aspose-pdf-go-cpp@latest - ``` +```sh +go get github.com/aspose-pdf/aspose-pdf-go-cpp@latest +``` 2. Generate the large file: -For **macOS and linux** +- **macOS and linux** - 1. Open Terminal +1. Open Terminal - 1. List the folders of the github.com/aspose-pdf within the Go module cache: +2. List the folders of the github.com/aspose-pdf within the Go module cache: - ```sh - ls $(go env GOMODCACHE)/github.com/aspose-pdf/ - ``` +```sh +ls $(go env GOMODCACHE)/github.com/aspose-pdf/ +``` - 1. Change curent folder to the specific version folder of the package obtained in the previous step: +3. Change curent folder to the specific version folder of the package obtained in the previous step: - ```sh - cd $(go env GOMODCACHE)/github.com/aspose-pdf/aspose-pdf-go-cpp@vx.x.x - ``` +```sh +cd $(go env GOMODCACHE)/github.com/aspose-pdf/aspose-pdf-go-cpp@vx.x.x +``` - Replace `@vx.x.x` with the actual package version. +Replace `@vx.x.x` with the actual package version. - 1. Run go generate with superuser privileges: +4. Run go generate with superuser privileges: - ```sh - sudo go generate - ``` +```sh +sudo go generate +``` + +- **Windows** -For **Windows** +1. Open Command Prompt - 1. Open Command Prompt - - 1. List the folders of the github.com/aspose-pdf within the Go module cache: +2. List the folders of the github.com/aspose-pdf within the Go module cache: - ```cmd - for /f "delims=" %G in ('go env GOMODCACHE') do for /d %a in ("%G\github.com\aspose-pdf\*") do echo %~fa - ``` +```cmd +for /f "delims=" %G in ('go env GOMODCACHE') do for /d %a in ("%G\github.com\aspose-pdf\*") do echo %~fa +``` - 1. Change curent folder to the specific version folder of the package obtained in the previous step: +3. Change curent folder to the specific version folder of the package obtained in the previous step: - ```cmd - cd - ``` +```cmd +cd +``` - 1. Run go generate: +4. Run go generate: - ```cmd - go generate - ``` +```cmd +go generate +``` - 1. Add specific version folder of the package to the %PATH% environment variable: +5. Add specific version folder of the package to the %PATH% environment variable: - ```cmd - setx PATH "%PATH%;\lib\" - ``` +```cmd +setx PATH "%PATH%;\lib\" +``` - Replace `` with the actual path obtained from step 2. +Replace `` with the actual path obtained from step 2. ## Testing diff --git a/en/rust-cpp/get-started/installation/_index.md b/en/rust-cpp/get-started/installation/_index.md index a9854012d..b220229ac 100644 --- a/en/rust-cpp/get-started/installation/_index.md +++ b/en/rust-cpp/get-started/installation/_index.md @@ -17,35 +17,159 @@ SoftwareApplication: rust-cpp ## Installation +### Installation from Aspose website + This package includes a large file which is stored as a bzip2 archive. -1. Download the archive Aspose.PDF for Rust via C++ from the official Aspose website. The latest (most recent) version is listed at the top and is downloaded by default when you click the Download button. It is recommended to use this latest version. Only download a previous version if needed. Example: Aspose.PDF-for-Rust-via-CPP-25.6.zip +1. **Download** the archive **Aspose.PDF for Rust via C++** from the official Aspose website. + The latest (most recent) version is listed at the top and is downloaded by default when you click the **Download** button. + It is recommended to use this latest version. Only download a previous version if needed. + Example: `Aspose.PDF-for-Rust-via-CPP-25.6.zip` + + The archive filename format is: `Aspose.PDF-for-Rust-via-CPP-YY.M.zip`, where: + - `YY` = last two digits of the year (e.g., `25` for 2025) + - `M` = month number from `1` to `12` + +2. **Extract** the archive to your chosen directory `{path}` using a suitable tool: + - On Linux/macOS: + ```bash + unzip Aspose.PDF-for-Rust-via-CPP-YY.M.zip -d {path} + ``` + - On Windows, use built-in Explorer extraction or any unzip tool (7-Zip, WinRAR). + +3. **Add** the library as a dependency in your Rust project. You can do this in two ways: + + - **Using the command line:** + ```bash + cargo add asposepdf --path {path}/asposepdf + ``` + + - **Manually editing `Cargo.toml`:** + Open your project's `Cargo.toml` and add the following under `[dependencies]`: + ```toml + [dependencies] + asposepdf = { path = "{path}/asposepdf" } + ``` + +4. **Build** your project (`cargo build`). On the first build, the dynamic library for your platform will be unpacked automatically from the `.bz2` archive in the `lib` folder. This may cause a short delay. + +> **Notes** +> - The `lib` folder contains all platform-specific `.bz2` archives with corresponding `.sha256` checksum files. +> - If the checksum file is missing or invalid, the build will fail. +> - Update the library by replacing the extracted files with a newer archive version. + +### Installation from GitHub + +This package includes precompiled native libraries (`.dll`, `.so`, `.dylib`) which are stored as compressed `.bz2` archives inside the GitHub repository. + +1. **Add** the library as a dependency in your Rust project. You can do this in two ways: -The archive filename format is: Aspose.PDF-for-Rust-via-CPP-YY.M.zip, where: - YY = last two digits of the year (e.g., 25 for 2025) - M = month number from 1 to 12 +- **Using the command line:** -1. Extract the archive to your chosen directory {path} using a suitable tool: +```bash +cargo add asposepdf --git https://github.com/aspose-pdf/aspose-pdf-rust-cpp.git +``` + +- **Manually editing `Cargo.toml`:** + +Open your project's `Cargo.toml` and add the following under `[dependencies]`: -- On Linux/macOS: bash unzip Aspose.PDF-for-Rust-via-CPP-YY.M.zip -d {path} -- On Windows, use built-in Explorer extraction or any unzip tool (7-Zip, WinRAR). +```toml +[dependencies] +asposepdf = { git = "https://github.com/aspose-pdf/aspose-pdf-rust-cpp.git" } +``` + +**Note:** To use a specific release version, you can specify a tag: + +```toml +asposepdf = { git = "https://github.com/aspose-pdf/aspose-pdf-rust-cpp.git", tag = "v1.25.7" } +``` -2. Add the library as a dependency in your Rust project. You can do this in two ways: +2. **Build** your project (`cargo build`). On the first build, the appropriate dynamic library for your platform will be automatically unpacked from the `.bz2` archive in the `lib` folder. This may cause a short delay. -- Using the command line: +**Notes** -```bash +- You do not need to manually download or extract any files — everything is included in the GitHub repository. +- All `.bz2` archives have matching `.sha256` checksum files. The checksum is verified before unpacking. +- If the checksum verification fails or the archive is missing, the build will fail with a detailed error. +- The build script links the appropriate native library and ensures runtime availability using platform-specific options. - cargo add asposepdf --path {path}/asposepdf +### Installation from crates.io + +This package is available on [crates.io](https://crates.io/crates/asposepdf) and includes a build script that automatically extracts the required native library (`.dll`, `.so`, or `.dylib`) from a compressed `.bz2` archive during the build process. + +1. **Add** the library as a dependency in your Rust project. You can do this in two ways: + +- **Using the command line:** + +```bash +cargo add asposepdf ``` -- Manually editing Cargo.toml: Open your project's Cargo.toml and add the following under [dependencies]: toml [dependencies] asposepdf = { path = "{path}/asposepdf" } +- **Manually editing `Cargo.toml`:** + +Open your project's `Cargo.toml` and add the following under `[dependencies]`: + +```toml +[dependencies] +asposepdf = "1.25.7" +``` + +**Note:** The crates.io package requires you to provide the native dynamic libraries yourself (the .dll, .so, .dylib files). + +2. **Set the path** to the directory containing the native libraries and download the required files: + +- **Set the environment variable `ASPOSE_PDF_LIB_DIR`** to point to the folder where you will place the native `.bz2` archives, their `.sha256` checksum files, and the extracted native libraries (`.dll`, `.so`, `.dylib`, and for Windows also `.lib`): + +- On Linux/macOS: + +```bash +export ASPOSE_PDF_LIB_DIR=/path/to/lib +``` + +- On Windows (Command Prompt): + +```cmd +set ASPOSE_PDF_LIB_DIR=C:\path\to\lib +``` + +- On Windows (PowerShell): + +```powershell +$env:ASPOSE_PDF_LIB_DIR = "C:\path\to\lib" +``` + +- **Download the required `.bz2` archives** and checksum files from the GitHub repository's [`lib/` folder](https://github.com/aspose-pdf/aspose-pdf-rust-cpp/tree/main/lib) and **place them** into the folder set in `ASPOSE_PDF_LIB_DIR`: + +- For **Linux x64**, download: +- `libAsposePDFforRust_linux_amd64.so.bz2` +- `libAsposePDFforRust_linux_amd64.so.bz2.sha256` + +- For **macOS x86_64**, download: +- `libAsposePDFforRust_darwin_amd64.dylib.bz2` +- `libAsposePDFforRust_darwin_amd64.dylib.bz2.sha256` + +- For **macOS arm64**, download: +- `libAsposePDFforRust_darwin_arm64.dylib.bz2` +- `libAsposePDFforRust_darwin_arm64.dylib.bz2.sha256` + +- For **Windows x64**, download: +- `AsposePDFforRust_windows_amd64.dll.bz2` +- `AsposePDFforRust_windows_amd64.dll.bz2.sha256` +- `AsposePDFforRust_windows_amd64.lib` (native import library, not compressed) + +**Note:** + +You need to manually download these files from GitHub and place them into the directory pointed by `ASPOSE_PDF_LIB_DIR`. +The build script will automatically unpack the native libraries from the `.bz2` archives on first build. -3. Build your project (cargo build). On the first build, the dynamic library for your platform will be unpacked automatically from the .bz2 archive in the lib folder. This may cause a short delay. +3. **Build** your project (`cargo build`). On the first build, the native library matching your platform will be automatically extracted and linked. This step may take a few seconds. -## Notes +**Notes** -- The lib folder contains all platform-specific .bz2 archives with corresponding .sha256 checksum files. -- If the checksum file is missing or invalid, the build will fail. -- Update the library by replacing the extracted files with a newer archive version. +- You must provide the folder containing the `.bz2` and `.sha256` files separately, as these binary archives are not distributed via crates.io. +- If the required archive is missing or the checksum fails, the build will fail with a detailed error. +- The same binary files used for installation via GitHub or the Aspose website can be reused here. ## Quick Start From d662d6181422a360f927fb0b893fa78bff4c0406 Mon Sep 17 00:00:00 2001 From: Andriy Andruhovski Date: Mon, 1 Sep 2025 16:12:52 +0300 Subject: [PATCH 04/65] fix: moved article Working with Headings in PDF into correct location --- .../working-with-headings/_index.md | 2 ++ 1 file changed, 2 insertions(+) rename en/python-net/advanced-operations/{working-with-documents => working-with-text}/working-with-headings/_index.md (99%) diff --git a/en/python-net/advanced-operations/working-with-documents/working-with-headings/_index.md b/en/python-net/advanced-operations/working-with-text/working-with-headings/_index.md similarity index 99% rename from en/python-net/advanced-operations/working-with-documents/working-with-headings/_index.md rename to en/python-net/advanced-operations/working-with-text/working-with-headings/_index.md index 51ade7120..78f8a09d3 100644 --- a/en/python-net/advanced-operations/working-with-documents/working-with-headings/_index.md +++ b/en/python-net/advanced-operations/working-with-text/working-with-headings/_index.md @@ -26,10 +26,12 @@ Headings are the important parts of any document. Writers always try to make hea |NumeralsRomanLowercase|Roman lower type, for example, i,i.ii, ...| |LettersUppercase|English upper type, for example, A,A.B, ...| |LettersLowercase|English lower type, for example, a,a.b, ...| + The [style](https://reference.aspose.com/pdf/python-net/aspose.pdf/heading/#properties) property of [Heading](https://reference.aspose.com/pdf/python-net/aspose.pdf/heading/) class is used to set the numbering styles of the headings. |**Figure: Pre-defined numbering styles**| | :- | + The source code, to obtain the output shown in the above figure, is given below in the example. ```python From 39b8cce84194381c74e544677d3fbc16166cd665 Mon Sep 17 00:00:00 2001 From: "Anastasiia.Holub" Date: Tue, 2 Sep 2025 16:09:31 +0300 Subject: [PATCH 05/65] Refactor AcroForm examples for Python --- .../working-with-forms/acroforms/_index.md | 4 +- .../acroforms/export-import-json/_index.md | 215 ++++++++--- .../acroforms/extract-form/_index.md | 24 +- .../acroforms/fill-form/_index.md | 32 +- .../acroforms/modifing-form/_index.md | 348 ++++++------------ .../acroforms/posting-acroform/_index.md | 190 ---------- .../acroforms/remove-form/_index.md | 231 +++--------- 7 files changed, 349 insertions(+), 695 deletions(-) delete mode 100644 en/python-net/advanced-operations/working-with-forms/acroforms/posting-acroform/_index.md diff --git a/en/python-net/advanced-operations/working-with-forms/acroforms/_index.md b/en/python-net/advanced-operations/working-with-forms/acroforms/_index.md index 9619677ff..dd7406ec2 100644 --- a/en/python-net/advanced-operations/working-with-forms/acroforms/_index.md +++ b/en/python-net/advanced-operations/working-with-forms/acroforms/_index.md @@ -29,4 +29,6 @@ For more detailed learning of the capabilities of the Java library, see the foll - [Create AcroForm](/pdf/python-net/create-form) - create form from scratch with Python. - [Fill AcroForm](/pdf/python-net/fill-form) - fill form field in your PDF document. - [Extract AcroForm](/pdf/python-net/extract-form) - get value from all or an individual field of PDF document. - +- [Import and Export Form Data](/pdf/python-net/import-export-json/) - how to import and Export Form Data. +- [Modifing AcroForm](/pdf/python-net/modifing-form/) - modifing AcroForm PDF form fields. +- [Delete Forms from PDF](/pdf/python-net/remove-form/) - remove Forms from PDF. \ No newline at end of file diff --git a/en/python-net/advanced-operations/working-with-forms/acroforms/export-import-json/_index.md b/en/python-net/advanced-operations/working-with-forms/acroforms/export-import-json/_index.md index c6db1689d..ae78e0987 100644 --- a/en/python-net/advanced-operations/working-with-forms/acroforms/export-import-json/_index.md +++ b/en/python-net/advanced-operations/working-with-forms/acroforms/export-import-json/_index.md @@ -6,8 +6,8 @@ url: /python-net/import-export-json/ description: This section explains how to import and Export Form Data. lastmod: "2025-08-05" TechArticle: true -AlternativeHeadline: -Abstract: +AlternativeHeadline: Import and Export techniques using Aspose.PDF for Python via .NET. +Abstract: This compilation presents a comprehensive suite of form data import and export techniques using Aspose.PDF for Python via .NET. It covers workflows for integrating PDF forms with external data formats including XML, FDF, XFDF, and JSON. Users can automate bulk form filling by importing structured data into interactive fields, or extract field values for analysis, backup, or integration with other systems. The examples demonstrate how to bind PDF forms, transfer data between formats, and save updated documents, enabling scalable and consistent document processing across diverse applications. --- ## Import Form Data from an XML file @@ -16,7 +16,7 @@ Next example demonstrates how to import form data from an XML file into an exist Use the following steps: -1. Create Form Object. +1. Create a Form object using Aspose.PDF. 1. Bind the PDF Form. 1. Load XML Data. 1. Import XML into PDF. @@ -54,7 +54,7 @@ Python library shows how to export form field data from a PDF document to an XML Use the following steps: -1. Create Form Object +1. Create a Form object using Aspose.PDF. 1. Bind the PDF Form 1. Export Form Data 1. Save Field Values to XML @@ -90,7 +90,7 @@ The 'import_data_from_fdf' method imports form field data from an FDF (Forms Dat Use the following steps: -1. Create Form Object. +1. Create a Form object using Aspose.PDF. 1. Bind the input PDF. 1. Import the form data with import_fdf(). 1. Save the PDF with the imported data to the specified output file path. @@ -114,85 +114,182 @@ Use the following steps: form.save(path_outfile) ``` +## Export Form field Data to FDF +This example demonstrates how to export form data from a PDF document into an FDF (Forms Data Format) file using Aspose.PDF for Python via .NET. +1. Create a Form object using Aspose.PDF. +1. Bind the PDF document. +1. Export form data with export_fdf(). +```python + from io import FileIO, StringIO + import json + from os import path + import aspose.pdf as ap + path_infile = path.join(self.workdir_path, infile) + path_outfile = path.join(self.workdir_path, outfile) + # Create Form object + form = ap.facades.Form() + # Bind PDF document + form.bind_pdf(path_infile) + # Export form data to an FDF file + with FileIO(path_outfile, "w") as f: + form.export_fdf(f) +``` +## Import Form field Data from an XFDF +This example exports form field data from a PDF document into an XFDF (XML Forms Data Format) file and then saves the updated PDF using Aspose.PDF for Python via .NET. +1. Create a Form object using Aspose.PDF. +1. Bind the PDF document to the form. +1. Export form data to an XFDF file. +1. Save the processed form. +```python + from io import FileIO, StringIO + import json + from os import path + import aspose.pdf as ap + path_infile = path.join(self.workdir_path, infile) + path_datafile = path.join(self.workdir_path, datafile) + path_outfile = path.join(self.workdir_path, outfile) + # Create Form object + form = ap.facades.Form() + # Bind PDF document + form.bind_pdf(path_infile) + + # Export form data to an XFDF file + with FileIO(path_datafile, "w") as f: + form.export_xfdf(f) + form.save(path_outfile) +``` + +## Export Form field Data to XFDF +This code extracts form field data from a PDF document and exports it into an XFDF (XML Forms Data Format) file using Aspose.PDF for Python. -## Error handling +1. Create a Form object using Aspose.PDF. +1. Bind the PDF document to the form. +1. Export form data to an FDF file. -This example demonstrates how to export form fields from a PDF to a JSON file using Aspose.PDF, including handling different statuses for each field during the export process. +```python -Let's break down this Aspose.PDF example step by step: + from io import FileIO, StringIO + import json + from os import path + import aspose.pdf as ap -1. Loading the Document. We load a PDF document named "Sample.pdf" from a specified directory. -1. Setting Export Options. Here, we create an instance of ExportFieldsToJsonOptions with two settings: - * `ExportPasswordValue` : This includes password fields in the export. - * `WriteIndented` : This formats the JSON output to be indented for readability -1. Export form fields to JSON. We export the form fields from the PDF file to a JSON file called "export.json" using the specified parameters. -1. Processing Export Results: - This loop iterates through the export results and prints the status of each field: + path_infile = path.join(self.workdir_path, infile) + path_outfile = path.join(self.workdir_path, outfile) + + # Create Form object + form = ap.facades.Form() + + # Bind PDF document + form.bind_pdf(path_infile) + + # Export form data to an FDF file + with FileIO(path_outfile, "w") as f: + form.export_xfdf(f) +``` - * Success: Indicates the field was successfully exported. - * Warning: Prints any warning messages related to the field. - * Error: Prints any error messages related to the field. +## Import Data from another PDF +This code snippet demonstrates how to transfer form field data from a source PDF to a destination PDF. The form data is exported to an XFDF stream from the source PDF and then imported into the target PDF using Aspose.PDF for Python via .NET. -```cs -// For complete examples and data files, visit https://github.com/aspose-pdf/Aspose.PDF-for-.NET -private static void ExportFieldsToJsonWithOptions() -{ - // The path to the documents directory - var dataDir = RunExamples.GetDataDir_AsposePdf_Forms(); +1. Create a Form object using Aspose.PDF. +1. Bind the PDF document to the form. +1. Export form data from source PDF. +1. Import form data into destination PDF. +1. Save the updated destination PDF. - // Open PDF document - using (var document = new Aspose.Pdf.Document(Path.Combine(dataDir, "Forms", "Sample.pdf"))) - { - // Create ExportFieldsToJsonOptions with specific settings - var options = new Aspose.Pdf.ExportFieldsToJsonOptions - { - ExportPasswordValue = true, - WriteIndented = true, - }; +```python + + from io import FileIO, StringIO + import json + from os import path + import aspose.pdf as ap + + path_infile = path.join(self.workdir_path, infile) + path_outfile = path.join(self.workdir_path, outfile) + + # Create Form object + form_source = ap.facades.Form() + form_dest = ap.facades.Form() + + # Bind PDF document + form_source.bind_pdf(path_infile) + form_dest.bind_pdf(path_outfile) + + # Export form data to an FDF file + with StringIO() as f: + form_source.export_xfdf(f) + form_dest.import_xfdf(f) + form_dest.save() +``` + +## Extract Form Fields to Json + +This method extracts form fields from an input file and exports them to a JSON file. + +1. Create a Form object using Aspose.PDF. +1. Open the output file in write mode using FileIO(). +1. Export form fields to the JSON file using form.export_json() method. + +```python + + from io import FileIO, StringIO + import json + from os import path + import aspose.pdf as ap + + path_infile = path.join(self.workdir_path, infile) + path_outfile = path.join(self.workdir_path, outfile) + + form = ap.facades.Form(path_infile) + with FileIO(path_outfile, "w") as json_file: + form.export_json(json_file, True) +``` + +## Extract Form Fields to Json DOC + +1. Create a Form object from the input file. +1. Initialize an empty dictionary to store form field data. +1. Iterate through all form fields and extract their values. +1. Serialize the form data dictionary to a JSON string with 4-space indentation. +1. Write the JSON string to the output file with UTF-8 encoding. + +```python + + from io import FileIO, StringIO + import json + from os import path + import aspose.pdf as ap + + path_infile = path.join(self.workdir_path, infile) + path_outfile = path.join(self.workdir_path, outfile) - var exportResults = document.Form.ExportToJson(File.OpenWrite("export.json"), options); + form = ap.facades.Form(path_infile) + form_data = {} + # Get values from all fields + for formField in form.field_names: + # Analyze names and values if needed + form_data[formField] = form.get_field(formField) - foreach (var result in exportResults) - { - Console.Write($"{result.FieldFullName} "); - switch (result.FieldSerializationStatus) - { - case Aspose.Pdf.FieldSerializationStatus.Success: - Console.WriteLine("Success"); - break; - case Aspose.Pdf.FieldSerializationStatus.Warning: - foreach (var messages in result.WarningMessages) - { - Console.WriteLine(messages); - } - break; - case Aspose.Pdf.FieldSerializationStatus.Error: - foreach (var messages in result.ErrorMessages) - { - Console.WriteLine(messages); - } - break; - } - } - } -} -``` + # Serialize to JSON + json_string = json.dumps(form_data, indent=4) + with open(path_outfile, "w", encoding="utf-8") as json_file: + json_file.write(json_string) +``` \ No newline at end of file diff --git a/en/python-net/advanced-operations/working-with-forms/acroforms/extract-form/_index.md b/en/python-net/advanced-operations/working-with-forms/acroforms/extract-form/_index.md index eb8042442..9950dc920 100644 --- a/en/python-net/advanced-operations/working-with-forms/acroforms/extract-form/_index.md +++ b/en/python-net/advanced-operations/working-with-forms/acroforms/extract-form/_index.md @@ -26,13 +26,21 @@ The following Python code snippets show how to get the values of all the fields import aspose.pdf as ap - # Open document - pdfDocument = ap.Document(input_file) - - # Get values from all fields - for formField in pdfDocument.form.fields: - # Analyze names and values if need - print("Field Name : " + formField.partial_name) - print("Value : " + str(formField.value)) + # Construct the full path to the input PDF file + path_infile = dataDir + infile + + # Create a Form object from the PDF file + form = ap.facades.Form(path_infile) + + # Initialize an empty dictionary to store form values + form_values = {} + + # Iterate through all form fields in the PDF + for formField in form.field_names: + # Retrieve the value for each form field and store in the dictionary + form_values[formField] = form.get_field(formField) + + # Print and return the extracted form values + print(form_values) ``` diff --git a/en/python-net/advanced-operations/working-with-forms/acroforms/fill-form/_index.md b/en/python-net/advanced-operations/working-with-forms/acroforms/fill-form/_index.md index 252c86e08..c17684da8 100644 --- a/en/python-net/advanced-operations/working-with-forms/acroforms/fill-form/_index.md +++ b/en/python-net/advanced-operations/working-with-forms/acroforms/fill-form/_index.md @@ -16,23 +16,33 @@ Abstract: The article provides a guide on how to fill a form field in a PDF docu ## Fill Form Field in a PDF Document -To fill a form field, get the field from the Document object's [Form](https://reference.aspose.com/pdf/python-net/aspose.pdf.forms/form/) collection. then set the field value using the field's [value](https://reference.aspose.com/pdf/python-net/aspose.pdf.forms/form/#properties) property. - -This example selects a [TextBoxField](https://reference.aspose.com/pdf/python-net/aspose.pdf.forms/textboxfield/) and sets its value using the Value property. +The next example fills PDF form fields with new values using Aspose.PDF for Python via .NET and saves the updated document. Supports updating multiple fields by specifying a dictionary of field names and values. ```python import aspose.pdf as ap - # Open document - pdfDocument = ap.Document(input_file) - for formField in pdfDocument.form.fields: - if formField.partial_name == "Field 1": - # Modify field value - formField.value = "777" + path_infile = self.dataDir + infile + path_outfile = self.dataDir + outfile + + # Define the new field values + new_field_values = { + "First Name": "Alexander_New", + "Last Name": "Greenfield_New", + "City": "Yellowtown_New", + "Country": "Redland_New" + } + + # Create a Form object from the input PDF file + form = ap.facades.Form(path_infile) + + # Fill out the form fields with the new values + for formField in form.field_names: + if formField in new_field_values: + form.fill_field(formField, new_field_values[formField]) - # Save updated document - pdfDocument.save(output_pdf) + # Save the modified form to the output PDF file + form.save(path_outfile) ``` diff --git a/en/python-net/advanced-operations/working-with-forms/acroforms/modifing-form/_index.md b/en/python-net/advanced-operations/working-with-forms/acroforms/modifing-form/_index.md index 049dc15ec..16b8522a8 100644 --- a/en/python-net/advanced-operations/working-with-forms/acroforms/modifing-form/_index.md +++ b/en/python-net/advanced-operations/working-with-forms/acroforms/modifing-form/_index.md @@ -2,268 +2,142 @@ title: Modifing AcroForm linktitle: Modifing AcroForm type: docs -ai_search_scope: pdf_net -ai_search_endpoint: https://docsearch.api.aspose.cloud/ask -weight: 40 -url: /net/modifing-form/ -description: Modifing form in your PDF file with Aspose.PDF for .NET library. You can Add or remove fields in existing form, getand set fieldlimit and etc. -lastmod: "2022-02-17" +weight: 45 +url: /python-net/modifing-form/ +description: Modifing form in your PDF file with Aspose.PDF for Python via .NET library. You can Add or remove fields in existing form, getand set fieldlimit and etc. +lastmod: "2025-02-17" sitemap: changefreq: "weekly" priority: 0.7 +TechArticle: true +AlternativeHeadline: Managing and Customizing PDF Form Fields +Abstract: This collection of examples showcases various techniques for managing and customizing PDF form fields using Aspose.PDF for Python via .NET. It includes methods for clearing text from Typewriter-style form fields using TextFragmentAbsorber, setting and retrieving character limits with FormEditor, applying custom fonts and styling to text box fields, and removing specific form fields by name. These operations enable developers to sanitize, format, and tailor PDF forms for redistribution, branding, or data validation purposes, supporting both aesthetic and functional refinement in automated document workflows. --- - - -The following code snippet also work with [Aspose.PDF.Drawing](/pdf/net/drawing/) library. -## Get or Set Field Limit +## Clear Text in Form + +This example demonstrates how to clear text from Typewriter form fields in a PDF using Aspose.PDF for Python via .NET. It scans the first page of the PDF, identifies Typewriter forms, removes their content, and saves the updated document. This approach is useful for resetting or sanitizing form fields before redistributing a PDF. + +1. Load the input PDF document. +1. Access forms on the first page. +1. Iterate over each form and check if it is a “Typewriter” form. +1. Use TextFragmentAbsorber to find text fragments in the form. +1. Clear the text of each fragment. +1. Save the modified PDF to the output file. + +```python -The FormEditor class SetFieldLimit(field, limit) method allows you to set a field limit, the maximum number of characters that can be entered into a field. + import aspose.pdf as ap + from aspose.pycore import cast, is_assignable -```csharp -// For complete examples and data files, visit https://github.com/aspose-pdf/Aspose.PDF-for-.NET -private static void SetFieldLimit() -{ - // The path to the documents directory - var dataDir = RunExamples.GetDataDir_AsposePdf_Forms(); + path_infile = self.dataDir + infile + path_outfile = self.dataDir + outfile + document = ap.Document(path_infile) - // Create FormEditor instance - using (var form = new Aspose.Pdf.Facades.FormEditor()) - { - // Bind PDF document - form.BindPdf(dataDir + "input.pdf"); + # Get the forms from the first page + forms = document.pages[1].resources.forms - // Set field limit for "textbox1" - form.SetFieldLimit("textbox1", 15); + for form in forms: + # Check if the form is of type "Typewriter" and subtype "Form" + if (form.it == "Typewriter" and form.subtype == "Form"): + # Create a TextFragmentAbsorber to find text fragments + absorber = ap.Text.TextFragmentAbsorber() + absorber.visit(form) - // Save PDF document - form.Save(dataDir + "SetFieldLimit_out.pdf"); - } -} + # Clear the text in each fragment + for fragment in absorber.text_fragments: + fragment.Text = "" + + # Save PDF document + document.save(path_outfile) ``` -Similarly, Aspose.PDF has a method that gets the field limit using the DOM approach. The following code snippet shows the steps. - -```csharp -// For complete examples and data files, visit https://github.com/aspose-pdf/Aspose.PDF-for-.NET -private static void GetFieldLimitUsingDOM() -{ - // The path to the documents directory - var dataDir = RunExamples.GetDataDir_AsposePdf_Forms(); - - // Open PDF document - using (var document = new Aspose.Pdf.Document(dataDir + "FieldLimit.pdf")) - { - // Get the field and its maximum limit - if (document.Form["textbox1"] is Aspose.Pdf.Forms.TextBoxField textBoxField) - { - Console.WriteLine("Limit: " + textBoxField.MaxLen); - } - } -} +## Get or Set Field Limit + +The FormEditor class set_field_limit(field, limit) method allows you to set a field limit, the maximum number of characters that can be entered into a field. + +```python + + import aspose.pdf as ap + from aspose.pycore import cast, is_assignable + + # The path to the documents directory + path_infile = self.dataDir + infile + path_outfile = self.dataDir + outfile + + # Create FormEditor instance + form = ap.facades.FormEditor() + + # Bind PDF document + form.bind_pdf(path_infile) + + # Set field limit for "First Name" + form.set_field_limit("First Name", 15) + + # Save PDF document + form.save(path_outfile) ``` -You can also get the same value using the *Aspose.Pdf.Facades* namespace using the following code snippet. - -```csharp -// For complete examples and data files, visit https://github.com/aspose-pdf/Aspose.PDF-for-.NET -private static void GetFieldLimitUsingFacades() -{ - // The path to the documents directory - var dataDir = RunExamples.GetDataDir_AsposePdf_Forms(); - - // Create Form instance - using (var form = new Aspose.Pdf.Facades.Form()) - { - // Bind PDF document - form.BindPdf(dataDir + "FieldLimit.pdf"); - - // Get the field limit for "textbox1" - Console.WriteLine("Limit: " + form.GetFieldLimit("textbox1")); - } -} +Similarly, Aspose.PDF has a method that gets the field limit. + +```python + + import aspose.pdf as ap + from aspose.pycore import cast, is_assignable + + # The path to the documents directory + path_infile = self.dataDir + infile + + document = ap.Document(path_infile) + if is_assignable(document.form[1], ap.forms.TextBoxField): + textBoxField = cast(ap.forms.TextBoxField, document.form[1]) + print(f"Limit: {textBoxField.max_len}") ``` ## Set Custom Font for the Form Field -Form fields in Adobe PDF files can be configured to use specific default fonts. In the early versions of Aspose.PDF, only the 14 default fonts were supported. Later releases allowed developers to apply any font. To set and update the default font used for form fields, use the DefaultAppearance(Font font, double size, Color color) class. This class can be found under the Aspose.Pdf.InteractiveFeatures namespace. To use this object, use the Field class DefaultAppearance property. +Form fields in Adobe PDF files can be configured to use specific default fonts. In the early versions of Aspose.PDF, only the 14 default fonts were supported. Later releases allowed developers to apply any font. + +This code updates the appearance of a text box field in a PDF form by setting a custom font, size, and color, and then saves the modified document using Aspose.PDF for Python via .NET. The following code snippet shows how to set the default font for PDF form fields. -```csharp -// For complete examples and data files, visit https://github.com/aspose-pdf/Aspose.PDF-for-.NET -private static void SetFormFieldFont() -{ - // The path to the documents directory - var dataDir = RunExamples.GetDataDir_AsposePdf_Forms(); - - // Open PDF document - using (var document = new Aspose.Pdf.Document(dataDir + "FormFieldFont14.pdf")) - { - // Get particular form field from document - if (document.Form["textbox1"] is Aspose.Pdf.Forms.Field field) - { - // Create font object - var font = Aspose.Pdf.Text.FontRepository.FindFont("ComicSansMS"); - - // Set the font information for form field - field.DefaultAppearance = new Aspose.Pdf.Annotations.DefaultAppearance(font, 10, System.Drawing.Color.Black); - } - - // Save PDF document - document.Save(dataDir + "FormFieldFont14_out.pdf"); - } -} +```python + + import aspose.pdf as ap + from aspose.pycore import cast, is_assignable + + path_infile = self.dataDir + infile + path_outfile = self.dataDir + outfile + + document = ap.Document(path_infile) + if is_assignable(document.form[1], ap.forms.TextBoxField): + textBoxField = cast(ap.forms.TextBoxField, document.form[1]) + font = ap.text.FontRepository.find_font("Calibri") + textBoxField.default_appearance = ap.annotations.DefaultAppearance(font, 10, ap.Color.black.to_rgb()) + + document.save(path_outfile) ``` -## Add/remove fields in existing form +## Remove fields in existing form -All the form fields are contained in the Document object's Form collection. This collection provides different methods that manage form fields, including the Delete method. If you want to delete a particular field, pass the field name as a parameter to the Delete method and then save the updated PDF document. The following code snippet shows how to delete a particular field from a PDF document. +This code removes a specific form field (by its name) from a PDF document and saves the updated file using Aspose.PDF for Python via .NET. -```csharp -// For complete examples and data files, visit https://github.com/aspose-pdf/Aspose.PDF-for-.NET -private static void DeleteFormField() -{ - // The path to the documents directory - var dataDir = RunExamples.GetDataDir_AsposePdf_Forms(); +1. Load the PDF document. +1. Delete a form field by name. +1. Save the updated PDF. - // Open PDF document - using (var document = new Aspose.Pdf.Document(dataDir + "DeleteFormField.pdf")) - { - // Delete a particular field by name - document.Form.Delete("textbox1"); +```python - // Save PDF document - document.Save(dataDir + "DeleteFormField_out.pdf"); - } -} -``` + import aspose.pdf as ap + from aspose.pycore import cast, is_assignable + + path_infile = self.dataDir + infile + path_outfile = self.dataDir + outfile - + document = ap.Document(path_infile) + # Delete a particular field by name + document.form.delete("First Name") + # Save PDF document + document.save(path_outfile) +``` \ No newline at end of file diff --git a/en/python-net/advanced-operations/working-with-forms/acroforms/posting-acroform/_index.md b/en/python-net/advanced-operations/working-with-forms/acroforms/posting-acroform/_index.md deleted file mode 100644 index a7a428e05..000000000 --- a/en/python-net/advanced-operations/working-with-forms/acroforms/posting-acroform/_index.md +++ /dev/null @@ -1,190 +0,0 @@ ---- -title: Posting AcroForm Data -linktitle: Posting AcroForm -type: docs -ai_search_scope: pdf_net -ai_search_endpoint: https://docsearch.api.aspose.cloud/ask -weight: 50 -url: /net/posting-acroform-data/ -description: You can import and export form data to and XML file with Aspose.Pdf.Facades namespace in Aspose.PDF for .NET. -lastmod: "2022-02-17" -sitemap: - changefreq: "weekly" - priority: 0.7 ---- - - -{{% alert color="primary" %}} - -AcroForm is an important type of the PDF document. You can not only create and edit an AcroForm using [Aspose.Pdf.Facades namepsace](https://docs-qa.aspose.com/display/pdftemp/Aspose.Pdf.Facades+namespace), but also import and export form data to and XML file. Aspose.Pdf.Facades namespace in Aspose.PDF for .NET allows you to implement another feature of AcroForm, which helps you post an AcroForm data to an external web page. This web page then reads the post variables and uses this data for further processing. This feature is useful in the cases when you don’t just want to keep the data in the PDF file, rather you want to send it to your server and then save it in a database etc. - -{{% /alert %}} - -## Implementation details - -The following code snippet also work with [Aspose.PDF.Drawing](/pdf/net/drawing/) library. - -In this article, we have tried to create an AcroForm using [Aspose.Pdf.Facades namepsace](https://docs-qa.aspose.com/display/pdftemp/Aspose.Pdf.Facades+namespace). We have also added a submit button and set its target URL. The following code snippets show you how to create the form and then receive the posted data on the web page. - -```csharp -// For complete examples and data files, visit https://github.com/aspose-pdf/Aspose.PDF-for-.NET -private static void CreateAcroFormWithFields() -{ - // The path to the documents directory - var dataDir = RunExamples.GetDataDir_AsposePdf_Forms(); - - // Create an instance of FormEditor class and bind input and output pdf files - using (var editor = new Aspose.Pdf.Facades.FormEditor(dataDir + "input.pdf", dataDir + "output.pdf")) - { - // Create AcroForm fields - I have created only two fields for simplicity - editor.AddField(Aspose.Pdf.Facades.FieldType.Text, "firstname", 1, 100, 600, 200, 625); - editor.AddField(Aspose.Pdf.Facades.FieldType.Text, "lastname", 1, 100, 550, 200, 575); - - // Add a submit button and set target URL - editor.AddSubmitBtn("submitbutton", 1, "Submit", "http://localhost/csharptesting/show.aspx", 100, 450, 150, 475); - - // Save PDF document - editor.Save(); - } -} -``` - -{{% alert color="primary" %}} - -Following code snippet shows you how receive the posted values on the target web page. In this example, I have used a page named Show. a spx, and I have added simple one line code on the page load method. - -{{% /alert %}} - -```csharp -// Show the posted values on the target web page -Response.Write("Hello " + Request.Form.Get("firstname") + " " + Request.Form.Get("lastname")); -``` - - diff --git a/en/python-net/advanced-operations/working-with-forms/acroforms/remove-form/_index.md b/en/python-net/advanced-operations/working-with-forms/acroforms/remove-form/_index.md index 81c753aa6..b658925e8 100644 --- a/en/python-net/advanced-operations/working-with-forms/acroforms/remove-form/_index.md +++ b/en/python-net/advanced-operations/working-with-forms/acroforms/remove-form/_index.md @@ -1,211 +1,64 @@ --- -title: Delete Forms from PDF in C# +title: Delete Forms from PDF in Python linktitle: Delete Forms type: docs -ai_search_scope: pdf_net -ai_search_endpoint: https://docsearch.api.aspose.cloud/ask weight: 70 -url: /net/remove-form/ -description: Remove Text based on subtype/form with Aspose.PDF for .NET library. Remove all forms from the PDF. -lastmod: "2024-09-17" +url: /python-net/remove-form/ +description: Remove Text based on subtype/form with Aspose.PDF for Python via .NET library. Remove all forms from the PDF. +lastmod: "2025-09-17" sitemap: changefreq: "weekly" priority: 0.7 +TechArticle: true +AlternativeHeadline: Remove Forms from PDF with Aspose.PDF for Python via .NET +Abstract: This document presents two Python-based approaches for removing form elements from PDF files using Aspose.PDF for Python via .NET. The first method demonstrates how to clear all form objects from a specific page by accessing its resource dictionary and invoking the clear() method on the forms collection. The second method provides a more targeted solution by iterating through form annotations, identifying typewriter-style forms, and selectively deleting them based on their attributes. Both techniques conclude with saving the updated PDF to a specified output path, offering flexible options for form cleanup and document refinement in automated workflows. --- - -## Remove Text based on subtype/form - -The next code snippet creates a new Document object using an input variable that contains the path to the PDF file. The example accesses the forms presented on page 2 of the document and checks for forms with certain properties. If a form with the type "Typewriter" and subtype "Form" is found, it uses TextFragmentAbsorber to visit and remove all text fragments in this form. - -```cs -// For complete examples and data files, visit https://github.com/aspose-pdf/Aspose.PDF-for-.NET -private static void ClearTextInForm(string input, string output) -{ - // The path to the documents directory - var dataDir = RunExamples.GetDataDir_AsposePdf_Forms(); - - // Open PDF document - using (var document = new Aspose.Pdf.Document(dataDir + "TextBox.pdf")) - { - // Get the forms from the first page - var forms = document.Pages[1].Resources.Forms; - - foreach (var form in forms) - { - // Check if the form is of type "Typewriter" and subtype "Form" - if (form.IT == "Typewriter" && form.Subtype == "Form") - { - // Create a TextFragmentAbsorber to find text fragments - var absorber = new Aspose.Pdf.Text.TextFragmentAbsorber(); - absorber.Visit(form); - - // Clear the text in each fragment - foreach (var fragment in absorber.TextFragments) - { - fragment.Text = ""; - } - } - } - - // Save PDF document - document.Save(dataDir + "TextBox_out.pdf"); - } -} -``` - -## Remove Forms with "Typewriter" and a Subtype of "Form" from PDF - -This code snippet searches the forms on the first page of a PDF document for forms with an intent (IT) of "Typewriter" and a subtype (Subtype) of "Form." If both conditions are met, the form is deleted from the PDF. The modified document is then saved to the specified output file. - -The Aspose.PDF library provides two ways to remove such forms from PDFs: - -```cs -// For complete examples and data files, visit https://github.com/aspose-pdf/Aspose.PDF-for-.NET -private static void DeleteSpecifiedForm(string input, string output) -{ - // The path to the documents directory - var dataDir = RunExamples.GetDataDir_AsposePdf_Forms(); - - // Open PDF document - using (var document = new Aspose.Pdf.Document(dataDir + "TextField.pdf")) - { - // Get the forms from the first page - var forms = document.Pages[1].Resources.Forms; - - // Iterate through the forms and delete the ones with type "Typewriter" and subtype "Form" - for (int i = forms.Count; i > 0; i--) - { - if (forms[i].IT == "Typewriter" && forms[i].Subtype == "Form") - { - forms.Delete(forms[i].Name); - } - } - - // Save PDF document - document.Save(dataDir + "TextBox_out.pdf"); - } -} -``` +## Remove all Forms from PDF -Method 2: +This code removes all form elements from the first page of a PDF document and then saves the modified document to the specified output path. -```cs -// For complete examples and data files, visit https://github.com/aspose-pdf/Aspose.PDF-for-.NET -private static void DeleteSpecifiedForm(string input, string output) -{ - // The path to the documents directory - var dataDir = RunExamples.GetDataDir_AsposePdf_Forms(); +1. Load the PDF document. +1. Access page resources. +1. Clear form objects. +1. Save the updated document. - // Open PDF document - using (var document = new Aspose.Pdf.Document(dataDir + "TextField.pdf")) - { - // Get the forms from the first page - var forms = document.Pages[1].Resources.Forms; +```python - // Iterate through the forms and delete the ones with type "Typewriter" and subtype "Form" - foreach (var form in forms) - { - if (form.IT == "Typewriter" && form.Subtype == "Form") - { - var name = forms.GetFormName(form); - forms.Delete(name); - } - } + import aspose.pdf as ap + import os - // Save PDF document - document.Save(dataDir + "TextBox_out.pdf"); - } -} + path_infile = os.path.join(self.dataDir, infile) + path_outfile = os.path.join(self.dataDir, outfile) + document = ap.Document(path_infile) + forms = document.pages[page_num].resources.forms + forms.clear() + document.save(path_outfile) ``` -## Remove all Forms from PDF +## Remove Specified Form -This code removes all form elements from the first page of a PDF document and then saves the modified document to the specified output path. +Next example iterates through the form objects on a given PDF page, identifies typewriter form annotations, deletes them, and then saves the updated PDF using Aspose.PDF for Python via .NET. -```cs -// For complete examples and data files, visit https://github.com/aspose-pdf/Aspose.PDF-for-.NET -private static void RemoveAllForms(string input, string output) -{ - // The path to the documents directory - var dataDir = RunExamples.GetDataDir_AsposePdf_Forms(); +1. Load the PDF document. +1. Access page forms. +1. Iterate over forms. +1. Check for typewriter forms. +1. Delete the matched form. +1. Save the updated document. - // Open PDF document - using (var document = new Aspose.Pdf.Document(dataDir + "TextField.pdf")) - { - // Get the forms from the first page - var forms = document.Pages[1].Resources.Forms; +```python - // Clear all forms - forms.Clear(); + import aspose.pdf as ap + import os - // Save PDF document - document.Save(dataDir + "TextBox_out.pdf"); - } -} + path_infile = os.path.join(self.dataDir, infile) + path_outfile = os.path.join(self.dataDir, outfile) + document = ap.Document(path_infile) + forms = document.pages[page_num].resources.forms + for form in forms: + if (form.it == "Typewriter" and form.subtype == "Form"): + name = forms.get_form_name(form) + forms.delete(name) + document.save(path_outfile) ``` \ No newline at end of file From f1f3269db96ea6911e3455bc70de8bd334c37f80 Mon Sep 17 00:00:00 2001 From: AnHolub <68945813+AnHolub@users.noreply.github.com> Date: Tue, 2 Sep 2025 16:25:09 +0300 Subject: [PATCH 06/65] Update en/python-net/advanced-operations/working-with-forms/acroforms/export-import-json/_index.md Co-authored-by: Andriy Andruhovski --- .../working-with-forms/acroforms/export-import-json/_index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/en/python-net/advanced-operations/working-with-forms/acroforms/export-import-json/_index.md b/en/python-net/advanced-operations/working-with-forms/acroforms/export-import-json/_index.md index ae78e0987..6e6ba032e 100644 --- a/en/python-net/advanced-operations/working-with-forms/acroforms/export-import-json/_index.md +++ b/en/python-net/advanced-operations/working-with-forms/acroforms/export-import-json/_index.md @@ -2,7 +2,7 @@ title: Import and Export Form Data type: docs weight: 80 -url: /python-net/import-export-json/ +url: /python-net/import-export-form-data/ description: This section explains how to import and Export Form Data. lastmod: "2025-08-05" TechArticle: true From 9ef3a5c94716df53c138c9f3eeea22bb7ebed5f9 Mon Sep 17 00:00:00 2001 From: AnHolub <68945813+AnHolub@users.noreply.github.com> Date: Tue, 2 Sep 2025 16:25:17 +0300 Subject: [PATCH 07/65] Update en/python-net/advanced-operations/working-with-forms/acroforms/_index.md Co-authored-by: Andriy Andruhovski --- .../advanced-operations/working-with-forms/acroforms/_index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/en/python-net/advanced-operations/working-with-forms/acroforms/_index.md b/en/python-net/advanced-operations/working-with-forms/acroforms/_index.md index dd7406ec2..a0d24072d 100644 --- a/en/python-net/advanced-operations/working-with-forms/acroforms/_index.md +++ b/en/python-net/advanced-operations/working-with-forms/acroforms/_index.md @@ -29,6 +29,6 @@ For more detailed learning of the capabilities of the Java library, see the foll - [Create AcroForm](/pdf/python-net/create-form) - create form from scratch with Python. - [Fill AcroForm](/pdf/python-net/fill-form) - fill form field in your PDF document. - [Extract AcroForm](/pdf/python-net/extract-form) - get value from all or an individual field of PDF document. -- [Import and Export Form Data](/pdf/python-net/import-export-json/) - how to import and Export Form Data. +- [Import and Export Form Data](/pdf/python-net/import-export-form-data/) - how to import and Export Form Data. - [Modifing AcroForm](/pdf/python-net/modifing-form/) - modifing AcroForm PDF form fields. - [Delete Forms from PDF](/pdf/python-net/remove-form/) - remove Forms from PDF. \ No newline at end of file From bf7185bb7320435e3d1281ce00b93114700d49a9 Mon Sep 17 00:00:00 2001 From: Andriy Andruhovski Date: Thu, 4 Sep 2025 00:54:15 +0300 Subject: [PATCH 08/65] Update en/python-net/advanced-operations/working-with-forms/acroforms/modifing-form/_index.md Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .../working-with-forms/acroforms/modifing-form/_index.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/en/python-net/advanced-operations/working-with-forms/acroforms/modifing-form/_index.md b/en/python-net/advanced-operations/working-with-forms/acroforms/modifing-form/_index.md index 16b8522a8..5763f3bc3 100644 --- a/en/python-net/advanced-operations/working-with-forms/acroforms/modifing-form/_index.md +++ b/en/python-net/advanced-operations/working-with-forms/acroforms/modifing-form/_index.md @@ -1,10 +1,10 @@ --- -title: Modifing AcroForm -linktitle: Modifing AcroForm +title: Modifying AcroForm +linktitle: Modifying AcroForm type: docs weight: 45 -url: /python-net/modifing-form/ -description: Modifing form in your PDF file with Aspose.PDF for Python via .NET library. You can Add or remove fields in existing form, getand set fieldlimit and etc. +url: /python-net/modifying-form/ +description: Modifying form in your PDF file with Aspose.PDF for Python via .NET library. You can Add or remove fields in existing form, get and set field limit and etc. lastmod: "2025-02-17" sitemap: changefreq: "weekly" From f8f998a4afaf2d95a30a753934bdbdbd54febdb4 Mon Sep 17 00:00:00 2001 From: Andriy Andruhovski Date: Thu, 4 Sep 2025 00:55:01 +0300 Subject: [PATCH 09/65] Update en/python-net/advanced-operations/working-with-forms/acroforms/_index.md Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .../advanced-operations/working-with-forms/acroforms/_index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/en/python-net/advanced-operations/working-with-forms/acroforms/_index.md b/en/python-net/advanced-operations/working-with-forms/acroforms/_index.md index a0d24072d..63d3a57a0 100644 --- a/en/python-net/advanced-operations/working-with-forms/acroforms/_index.md +++ b/en/python-net/advanced-operations/working-with-forms/acroforms/_index.md @@ -30,5 +30,5 @@ For more detailed learning of the capabilities of the Java library, see the foll - [Fill AcroForm](/pdf/python-net/fill-form) - fill form field in your PDF document. - [Extract AcroForm](/pdf/python-net/extract-form) - get value from all or an individual field of PDF document. - [Import and Export Form Data](/pdf/python-net/import-export-form-data/) - how to import and Export Form Data. -- [Modifing AcroForm](/pdf/python-net/modifing-form/) - modifing AcroForm PDF form fields. +- [Modifying AcroForm](/pdf/python-net/modifying-form/) - modifying AcroForm PDF form fields. - [Delete Forms from PDF](/pdf/python-net/remove-form/) - remove Forms from PDF. \ No newline at end of file From 91006304b3223daec38874d9ab018a1a9cacfb8f Mon Sep 17 00:00:00 2001 From: Andriy Andruhovski Date: Thu, 4 Sep 2025 01:09:21 +0300 Subject: [PATCH 10/65] fix: fixed typos --- .../acroforms/{modifing-form => modifying-form}/_index.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) rename en/python-net/advanced-operations/working-with-forms/acroforms/{modifing-form => modifying-form}/_index.md (98%) diff --git a/en/python-net/advanced-operations/working-with-forms/acroforms/modifing-form/_index.md b/en/python-net/advanced-operations/working-with-forms/acroforms/modifying-form/_index.md similarity index 98% rename from en/python-net/advanced-operations/working-with-forms/acroforms/modifing-form/_index.md rename to en/python-net/advanced-operations/working-with-forms/acroforms/modifying-form/_index.md index 5763f3bc3..2df513755 100644 --- a/en/python-net/advanced-operations/working-with-forms/acroforms/modifing-form/_index.md +++ b/en/python-net/advanced-operations/working-with-forms/acroforms/modifying-form/_index.md @@ -11,7 +11,7 @@ sitemap: priority: 0.7 TechArticle: true AlternativeHeadline: Managing and Customizing PDF Form Fields -Abstract: This collection of examples showcases various techniques for managing and customizing PDF form fields using Aspose.PDF for Python via .NET. It includes methods for clearing text from Typewriter-style form fields using TextFragmentAbsorber, setting and retrieving character limits with FormEditor, applying custom fonts and styling to text box fields, and removing specific form fields by name. These operations enable developers to sanitize, format, and tailor PDF forms for redistribution, branding, or data validation purposes, supporting both aesthetic and functional refinement in automated document workflows. +Abstract: This collection of examples showcases various techniques for managing and customizing PDF form fields using Aspose.PDF for Python via .NET. It includes methods for clearing text from Typewriter-style form fields using TextFragmentAbsorber, setting and retrieving character limits with FormEditor, applying custom fonts and styling to text box fields, and removing specific form fields by name. These operations enable developers to sanitize, format, and tailor PDF forms for redistribution, branding, or data validation purposes, supporting both aesthetic and functional refinement in automated document workflows. --- ## Clear Text in Form @@ -96,7 +96,7 @@ Similarly, Aspose.PDF has a method that gets the field limit. ## Set Custom Font for the Form Field -Form fields in Adobe PDF files can be configured to use specific default fonts. In the early versions of Aspose.PDF, only the 14 default fonts were supported. Later releases allowed developers to apply any font. +Form fields in Adobe PDF files can be configured to use specific default fonts. In the early versions of Aspose.PDF, only the 14 default fonts were supported. Later releases allowed developers to apply any font. This code updates the appearance of a text box field in a PDF form by setting a custom font, size, and color, and then saves the modified document using Aspose.PDF for Python via .NET. @@ -140,4 +140,4 @@ This code removes a specific form field (by its name) from a PDF document and sa document.form.delete("First Name") # Save PDF document document.save(path_outfile) -``` \ No newline at end of file +``` From ace9735a7c617c6dfceae80c31f77b5538e1659b Mon Sep 17 00:00:00 2001 From: Andriy Andruhovski Date: Thu, 4 Sep 2025 01:14:03 +0300 Subject: [PATCH 11/65] fix: Rename AcroForms JSON export doc and update sitemap Renamed 'export-import-json' documentation to 'import-export-form-data' for clarity. Updated the sitemap change frequency for the modifying form documentation from weekly to monthly. --- .../{export-import-json => import-export-form-data}/_index.md | 0 .../working-with-forms/acroforms/modifying-form/_index.md | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename en/python-net/advanced-operations/working-with-forms/acroforms/{export-import-json => import-export-form-data}/_index.md (100%) diff --git a/en/python-net/advanced-operations/working-with-forms/acroforms/export-import-json/_index.md b/en/python-net/advanced-operations/working-with-forms/acroforms/import-export-form-data/_index.md similarity index 100% rename from en/python-net/advanced-operations/working-with-forms/acroforms/export-import-json/_index.md rename to en/python-net/advanced-operations/working-with-forms/acroforms/import-export-form-data/_index.md diff --git a/en/python-net/advanced-operations/working-with-forms/acroforms/modifying-form/_index.md b/en/python-net/advanced-operations/working-with-forms/acroforms/modifying-form/_index.md index 2df513755..afdc3921f 100644 --- a/en/python-net/advanced-operations/working-with-forms/acroforms/modifying-form/_index.md +++ b/en/python-net/advanced-operations/working-with-forms/acroforms/modifying-form/_index.md @@ -7,7 +7,7 @@ url: /python-net/modifying-form/ description: Modifying form in your PDF file with Aspose.PDF for Python via .NET library. You can Add or remove fields in existing form, get and set field limit and etc. lastmod: "2025-02-17" sitemap: - changefreq: "weekly" + changefreq: "monthly" priority: 0.7 TechArticle: true AlternativeHeadline: Managing and Customizing PDF Form Fields From 1089acfaed4f1f17fc7e5ee7771220e62648db8b Mon Sep 17 00:00:00 2001 From: Andriy Andruhovski Date: Thu, 4 Sep 2025 01:20:25 +0300 Subject: [PATCH 12/65] Fix formatting and capitalization in AcroForm documentation --- .../working-with-forms/acroforms/modifying-form/_index.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/en/python-net/advanced-operations/working-with-forms/acroforms/modifying-form/_index.md b/en/python-net/advanced-operations/working-with-forms/acroforms/modifying-form/_index.md index afdc3921f..27efa3df6 100644 --- a/en/python-net/advanced-operations/working-with-forms/acroforms/modifying-form/_index.md +++ b/en/python-net/advanced-operations/working-with-forms/acroforms/modifying-form/_index.md @@ -4,7 +4,7 @@ linktitle: Modifying AcroForm type: docs weight: 45 url: /python-net/modifying-form/ -description: Modifying form in your PDF file with Aspose.PDF for Python via .NET library. You can Add or remove fields in existing form, get and set field limit and etc. +description: Modifying form in your PDF file with Aspose.PDF for Python via .NET library. You can add or remove fields in existing form, get and set field limit and etc. lastmod: "2025-02-17" sitemap: changefreq: "monthly" @@ -20,7 +20,7 @@ This example demonstrates how to clear text from Typewriter form fields in a PDF 1. Load the input PDF document. 1. Access forms on the first page. -1. Iterate over each form and check if it is a “Typewriter” form. +1. Iterate over each form and check if it is a `Typewriter` form. 1. Use TextFragmentAbsorber to find text fragments in the form. 1. Clear the text of each fragment. 1. Save the modified PDF to the output file. From 9cb7f3b274ad7e8adaf543147bbb878ac8a29324 Mon Sep 17 00:00:00 2001 From: Andriy Andruhovski Date: Thu, 4 Sep 2025 01:21:51 +0300 Subject: [PATCH 13/65] Update en/python-net/advanced-operations/working-with-forms/acroforms/modifying-form/_index.md Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .../working-with-forms/acroforms/modifying-form/_index.md | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/en/python-net/advanced-operations/working-with-forms/acroforms/modifying-form/_index.md b/en/python-net/advanced-operations/working-with-forms/acroforms/modifying-form/_index.md index 27efa3df6..9c65d30eb 100644 --- a/en/python-net/advanced-operations/working-with-forms/acroforms/modifying-form/_index.md +++ b/en/python-net/advanced-operations/working-with-forms/acroforms/modifying-form/_index.md @@ -30,8 +30,9 @@ This example demonstrates how to clear text from Typewriter form fields in a PDF import aspose.pdf as ap from aspose.pycore import cast, is_assignable - path_infile = self.dataDir + infile - path_outfile = self.dataDir + outfile + dataDir = "path/to/your/data/dir/" + path_infile = dataDir + infile + path_outfile = dataDir + outfile document = ap.Document(path_infile) # Get the forms from the first page From a39ad14c1e1b2235dbea99035662e4f3a9ace4a0 Mon Sep 17 00:00:00 2001 From: Andriy Andruhovski Date: Thu, 4 Sep 2025 01:22:06 +0300 Subject: [PATCH 14/65] Update en/python-net/advanced-operations/working-with-forms/acroforms/import-export-form-data/_index.md Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .../acroforms/import-export-form-data/_index.md | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/en/python-net/advanced-operations/working-with-forms/acroforms/import-export-form-data/_index.md b/en/python-net/advanced-operations/working-with-forms/acroforms/import-export-form-data/_index.md index 6e6ba032e..48a8f1ab5 100644 --- a/en/python-net/advanced-operations/working-with-forms/acroforms/import-export-form-data/_index.md +++ b/en/python-net/advanced-operations/working-with-forms/acroforms/import-export-form-data/_index.md @@ -29,10 +29,13 @@ Use the following steps: from os import path import aspose.pdf as ap + # Define the working directory path + workdir_path = "/path/to/working/directory" + # Construct full file paths using the working directory - path_infile = path.join(self.workdir_path, infile) - path_datafile = path.join(self.workdir_path, datafile) - path_outfile = path.join(self.workdir_path, outfile) + path_infile = path.join(workdir_path, infile) + path_datafile = path.join(workdir_path, datafile) + path_outfile = path.join(workdir_path, outfile) # Create a Form object from Aspose.PDF form = ap.facades.Form() From a1017a1b373cb9b526f238713b2baa70ccab39fa Mon Sep 17 00:00:00 2001 From: Andriy Andruhovski Date: Thu, 4 Sep 2025 01:22:39 +0300 Subject: [PATCH 15/65] Update en/python-net/advanced-operations/working-with-forms/acroforms/fill-form/_index.md Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .../working-with-forms/acroforms/fill-form/_index.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/en/python-net/advanced-operations/working-with-forms/acroforms/fill-form/_index.md b/en/python-net/advanced-operations/working-with-forms/acroforms/fill-form/_index.md index c17684da8..e2e94b448 100644 --- a/en/python-net/advanced-operations/working-with-forms/acroforms/fill-form/_index.md +++ b/en/python-net/advanced-operations/working-with-forms/acroforms/fill-form/_index.md @@ -22,8 +22,11 @@ The next example fills PDF form fields with new values using Aspose.PDF for Pyth import aspose.pdf as ap - path_infile = self.dataDir + infile - path_outfile = self.dataDir + outfile + dataDir = "/path/to/your/pdf/files/" + infile = "input.pdf" + outfile = "output.pdf" + path_infile = dataDir + infile + path_outfile = dataDir + outfile # Define the new field values new_field_values = { From caada0add6afbcee20b36145c9e819271fa5b2aa Mon Sep 17 00:00:00 2001 From: Andriy Andruhovski Date: Thu, 4 Sep 2025 01:37:30 +0300 Subject: [PATCH 16/65] refactor: Refactor PDF form file path handling in docs Updated code examples to use explicit 'data_dir' and 'work_dir' variables for constructing input and output PDF file paths, replacing previous usage of 'self.dataDir'. This improves clarity and consistency in file path management across documentation for modifying and removing PDF forms. --- .../acroforms/modifying-form/_index.md | 18 +++++++++++------- .../acroforms/remove-form/_index.md | 14 +++++++++----- 2 files changed, 20 insertions(+), 12 deletions(-) diff --git a/en/python-net/advanced-operations/working-with-forms/acroforms/modifying-form/_index.md b/en/python-net/advanced-operations/working-with-forms/acroforms/modifying-form/_index.md index 9c65d30eb..820a81b30 100644 --- a/en/python-net/advanced-operations/working-with-forms/acroforms/modifying-form/_index.md +++ b/en/python-net/advanced-operations/working-with-forms/acroforms/modifying-form/_index.md @@ -63,8 +63,9 @@ The FormEditor class set_field_limit(field, limit) method allows you to set a fi from aspose.pycore import cast, is_assignable # The path to the documents directory - path_infile = self.dataDir + infile - path_outfile = self.dataDir + outfile + data_dir = "/path/to/your/pdf/files/" + path_infile = os.path.join(work_dir, infile) + path_outfile = os.path.join(work_dir, outfile) # Create FormEditor instance form = ap.facades.FormEditor() @@ -87,7 +88,8 @@ Similarly, Aspose.PDF has a method that gets the field limit. from aspose.pycore import cast, is_assignable # The path to the documents directory - path_infile = self.dataDir + infile + data_dir = "/path/to/your/pdf/files/" + path_infile = os.path.join(work_dir, infile) document = ap.Document(path_infile) if is_assignable(document.form[1], ap.forms.TextBoxField): @@ -108,8 +110,9 @@ The following code snippet shows how to set the default font for PDF form fields import aspose.pdf as ap from aspose.pycore import cast, is_assignable - path_infile = self.dataDir + infile - path_outfile = self.dataDir + outfile + data_dir = "/path/to/your/pdf/files/" + path_infile = os.path.join(work_dir, infile) + path_outfile = os.path.join(work_dir, outfile) document = ap.Document(path_infile) if is_assignable(document.form[1], ap.forms.TextBoxField): @@ -133,8 +136,9 @@ This code removes a specific form field (by its name) from a PDF document and sa import aspose.pdf as ap from aspose.pycore import cast, is_assignable - path_infile = self.dataDir + infile - path_outfile = self.dataDir + outfile + data_dir = "/path/to/your/pdf/files/" + path_infile = os.path.join(work_dir, infile) + path_outfile = os.path.join(work_dir, outfile) document = ap.Document(path_infile) # Delete a particular field by name diff --git a/en/python-net/advanced-operations/working-with-forms/acroforms/remove-form/_index.md b/en/python-net/advanced-operations/working-with-forms/acroforms/remove-form/_index.md index b658925e8..bb917aad7 100644 --- a/en/python-net/advanced-operations/working-with-forms/acroforms/remove-form/_index.md +++ b/en/python-net/advanced-operations/working-with-forms/acroforms/remove-form/_index.md @@ -11,7 +11,7 @@ sitemap: priority: 0.7 TechArticle: true AlternativeHeadline: Remove Forms from PDF with Aspose.PDF for Python via .NET -Abstract: This document presents two Python-based approaches for removing form elements from PDF files using Aspose.PDF for Python via .NET. The first method demonstrates how to clear all form objects from a specific page by accessing its resource dictionary and invoking the clear() method on the forms collection. The second method provides a more targeted solution by iterating through form annotations, identifying typewriter-style forms, and selectively deleting them based on their attributes. Both techniques conclude with saving the updated PDF to a specified output path, offering flexible options for form cleanup and document refinement in automated workflows. +Abstract: This document presents two Python-based approaches for removing form elements from PDF files using Aspose.PDF for Python via .NET. The first method demonstrates how to clear all form objects from a specific page by accessing its resource dictionary and invoking the clear() method on the forms collection. The second method provides a more targeted solution by iterating through form annotations, identifying typewriter-style forms, and selectively deleting them based on their attributes. Both techniques conclude with saving the updated PDF to a specified output path, offering flexible options for form cleanup and document refinement in automated workflows. --- ## Remove all Forms from PDF @@ -28,8 +28,10 @@ This code removes all form elements from the first page of a PDF document and th import aspose.pdf as ap import os - path_infile = os.path.join(self.dataDir, infile) - path_outfile = os.path.join(self.dataDir, outfile) + data_dir = "/path/to/your/pdf/files/" + path_infile = os.path.join(data_dir, infile) + path_outfile = os.path.join(data_dir, outfile) + document = ap.Document(path_infile) forms = document.pages[page_num].resources.forms forms.clear() @@ -52,8 +54,10 @@ Next example iterates through the form objects on a given PDF page, identifies t import aspose.pdf as ap import os - path_infile = os.path.join(self.dataDir, infile) - path_outfile = os.path.join(self.dataDir, outfile) + data_dir = "/path/to/your/pdf/files/" + path_infile = os.path.join(work_dir, infile) + path_outfile = os.path.join(work_dir, outfile) + document = ap.Document(path_infile) forms = document.pages[page_num].resources.forms for form in forms: From 4ac073a14d78823b6526b4d630fa4859d739a45e Mon Sep 17 00:00:00 2001 From: Andriy Andruhovski Date: Thu, 4 Sep 2025 09:55:32 +0300 Subject: [PATCH 17/65] refactor: Update file path handling in AcroForm examples for consistency --- .../working-with-forms/acroforms/create-form/_index.md | 8 ++++++-- .../working-with-forms/acroforms/extract-form/_index.md | 3 ++- .../working-with-forms/acroforms/fill-form/_index.md | 8 +++----- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/en/python-net/advanced-operations/working-with-forms/acroforms/create-form/_index.md b/en/python-net/advanced-operations/working-with-forms/acroforms/create-form/_index.md index 8a63fa892..1159edc3b 100644 --- a/en/python-net/advanced-operations/working-with-forms/acroforms/create-form/_index.md +++ b/en/python-net/advanced-operations/working-with-forms/acroforms/create-form/_index.md @@ -33,8 +33,12 @@ Below example shows how to add a [TextBoxField](https://reference.aspose.com/pdf import aspose.pdf as ap + data_dir = "/path/to/your/pdf/files/" + path_infile = os.path.join(work_dir, infile) + path_outfile = os.path.join(work_dir, outfile) + # Open document - pdfDocument = ap.Document(input_file) + pdfDocument = ap.Document(path_infile) # Create a field textBoxField = ap.forms.TextBoxField(pdfDocument.pages[1], ap.Rectangle(100, 200, 300, 300, True)) @@ -52,6 +56,6 @@ Below example shows how to add a [TextBoxField](https://reference.aspose.com/pdf pdfDocument.form.add(textBoxField, 1) # Save modified PDF - pdfDocument.save(output_pdf) + pdfDocument.save(path_outfile) ``` diff --git a/en/python-net/advanced-operations/working-with-forms/acroforms/extract-form/_index.md b/en/python-net/advanced-operations/working-with-forms/acroforms/extract-form/_index.md index 9950dc920..253a5a4ab 100644 --- a/en/python-net/advanced-operations/working-with-forms/acroforms/extract-form/_index.md +++ b/en/python-net/advanced-operations/working-with-forms/acroforms/extract-form/_index.md @@ -27,7 +27,8 @@ The following Python code snippets show how to get the values of all the fields import aspose.pdf as ap # Construct the full path to the input PDF file - path_infile = dataDir + infile + data_dir = "/path/to/your/pdf/files/" + path_infile = os.path.join(work_dir, infile) # Create a Form object from the PDF file form = ap.facades.Form(path_infile) diff --git a/en/python-net/advanced-operations/working-with-forms/acroforms/fill-form/_index.md b/en/python-net/advanced-operations/working-with-forms/acroforms/fill-form/_index.md index e2e94b448..9dd5910c3 100644 --- a/en/python-net/advanced-operations/working-with-forms/acroforms/fill-form/_index.md +++ b/en/python-net/advanced-operations/working-with-forms/acroforms/fill-form/_index.md @@ -22,11 +22,9 @@ The next example fills PDF form fields with new values using Aspose.PDF for Pyth import aspose.pdf as ap - dataDir = "/path/to/your/pdf/files/" - infile = "input.pdf" - outfile = "output.pdf" - path_infile = dataDir + infile - path_outfile = dataDir + outfile + data_dir = "/path/to/your/pdf/files/" + path_infile = os.path.join(work_dir, infile) + path_outfile = os.path.join(work_dir, outfile) # Define the new field values new_field_values = { From 05ff256921db4bbedc2bb453734a9b878f5dc778 Mon Sep 17 00:00:00 2001 From: Andriy Andruhovski Date: Thu, 4 Sep 2025 11:02:04 +0300 Subject: [PATCH 18/65] Fix formatting and typos in AcroForms docs Removed trailing whitespace and corrected minor formatting issues in the extract-form and import-export-form-data documentation. Updated section title to 'Extract Form Fields to JSON Document' for clarity. --- .../acroforms/extract-form/_index.md | 1 - .../acroforms/import-export-form-data/_index.md | 14 +++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/en/python-net/advanced-operations/working-with-forms/acroforms/extract-form/_index.md b/en/python-net/advanced-operations/working-with-forms/acroforms/extract-form/_index.md index 253a5a4ab..0b28516b6 100644 --- a/en/python-net/advanced-operations/working-with-forms/acroforms/extract-form/_index.md +++ b/en/python-net/advanced-operations/working-with-forms/acroforms/extract-form/_index.md @@ -44,4 +44,3 @@ The following Python code snippets show how to get the values of all the fields # Print and return the extracted form values print(form_values) ``` - diff --git a/en/python-net/advanced-operations/working-with-forms/acroforms/import-export-form-data/_index.md b/en/python-net/advanced-operations/working-with-forms/acroforms/import-export-form-data/_index.md index 48a8f1ab5..59c9d8a00 100644 --- a/en/python-net/advanced-operations/working-with-forms/acroforms/import-export-form-data/_index.md +++ b/en/python-net/advanced-operations/working-with-forms/acroforms/import-export-form-data/_index.md @@ -1,5 +1,5 @@ --- -title: Import and Export Form Data +title: Import and Export Form Data type: docs weight: 80 url: /python-net/import-export-form-data/ @@ -39,7 +39,7 @@ Use the following steps: # Create a Form object from Aspose.PDF form = ap.facades.Form() - + # Bind the input PDF form form.bind_pdf(path_infile) @@ -68,7 +68,7 @@ Use the following steps: import json from os import path import aspose.pdf as ap - + # Combine the working directory path with the input PDF file name path_infile = path.join(self.workdir_path, infile) @@ -255,7 +255,7 @@ This method extracts form fields from an input file and exports them to a JSON f from io import FileIO, StringIO import json from os import path - import aspose.pdf as ap + import aspose.pdf as ap path_infile = path.join(self.workdir_path, infile) path_outfile = path.join(self.workdir_path, outfile) @@ -265,7 +265,7 @@ This method extracts form fields from an input file and exports them to a JSON f form.export_json(json_file, True) ``` -## Extract Form Fields to Json DOC +## Extract Form Fields to JSON Document 1. Create a Form object from the input file. 1. Initialize an empty dictionary to store form field data. @@ -278,7 +278,7 @@ This method extracts form fields from an input file and exports them to a JSON f from io import FileIO, StringIO import json from os import path - import aspose.pdf as ap + import aspose.pdf as ap path_infile = path.join(self.workdir_path, infile) path_outfile = path.join(self.workdir_path, outfile) @@ -295,4 +295,4 @@ This method extracts form fields from an input file and exports them to a JSON f with open(path_outfile, "w", encoding="utf-8") as json_file: json_file.write(json_string) -``` \ No newline at end of file +``` From 8440c4a79fab4434cb5ca719a005068d2c82ae9c Mon Sep 17 00:00:00 2001 From: "Anastasiia.Holub" Date: Fri, 5 Sep 2025 12:32:04 +0300 Subject: [PATCH 19/65] feat: Add FAQ section for Aspose.PDF for Python via .NET with common questions and answers --- en/python-net/faq/_index.md | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 en/python-net/faq/_index.md diff --git a/en/python-net/faq/_index.md b/en/python-net/faq/_index.md new file mode 100644 index 000000000..08589ce2f --- /dev/null +++ b/en/python-net/faq/_index.md @@ -0,0 +1,18 @@ +--- +title: FAQ +linktitle: FAQ +type: docs +weight: 140 +url: /python-net/faq/ +description: Here you can find answers to Frequently Asked Questions for Aspose.PDF for Python via .NET library. +lastmod: "2025-09-05" +sitemap: + changefreq: "monthly" + priority: 0.7 +TechArticle: true +AlternativeHeadline: The Frequently Asked Question of Aspose.PDF for Python via .NET library. +Abstract: +--- + +

What formats does Aspose.PDF for Python via .NET support?

Aspose.PDF for Python via .NET supports popular file formats such as PDF, TXT, HTML, PCL, XML, XPS, EPUB, TEX, and image formats. For more details, please visit the page .

What AI features Aspose.PDF for Python via .NET support?

Yes, the library has built-in OpenAI and Llama API clients. They allow you to make API requests and create AI copilots. You can find examples on page.

How many files can I combine to PDF at once?

You can merge an unlimited number of files into PDF at once.

How to insert Image into PDF?

To insert an image into a PDF using Aspose.PDF for Python via .NET, you can find .

How to edit the text in PDF?

To edit the text in a PDF using Aspose.PDF for Python via .NET, you can find .

How to add page numbers to PDF file?

To add page numbers to a PDF using Aspose.PDF for Python via .NET, you can find .

How to create a background for PDF Documents?

To create a background for a PDF document using Aspose.PDF for Python via .NET, you can find .

How to secure PDF document?

To secure a PDF document using Aspose.PDF for Python via .NET, you can find .

How to add bold text in highlighted annotation on a PDF page?

To add bold text in a highlighted annotation, you can find .

How to validate a tagged PDF?

To validate a tagged PDF document, you can find .

How to implement regex search for TextFragmentAbsorber?

To use regex with the `TextFragmentAbsorber` class in Aspose.PDF for Python via .NET, you can find .

How to make a valid PDF/A document unless the missing font or its substitution is provided?

To create a valid PDF/A document, you can find .

I see errors in PDF/A conversion log. Does it mean that the document wasn't converted successfully?

No, Aspose.PDF logs all problems it encountered, including the ones that were automatically fixed. If all entries in the log are marked as Convertable=True, all problems were fixed, and the document was successfully converted. Only the entries with Convertable=False indicate the conversion failure.

How to make a valid PDF/A document if the conversion log contains "Font not embedded" errors?

If a PDF/A conversion log contains "Font not embedded" error entries marked as Convertable=False, it means that the original document includes fonts that are missing both in the document itself and on the machine where the conversion is performed. Check to learn how to replace missing fonts.

Why doesn't Aspose.PDF convert a document to PDF/A with the ConvertErrorAction.None option?

The ConvertErrorAction.None option prevents Aspose.PDF from removing elements from the document, even if those elements are prohibited by the PDF/A standard. As a result, it may be impossible to convert a document using the ConvertErrorAction.None mode. Use this option when you do not want Aspose.PDF to automatically remove prohibited elements from the document, especially if you plan to review the conversion log and manually fix the document's issues. Another scenario for using this option is when your document is already mostly or fully PDF/A compliant, and you want to avoid unnecessary changes. More permissive PDF/A versions (e.g., PDF/A-2 or PDF/A-3) are more likely to be successfully converted with the ConvertErrorAction.None option. However, for PDF/A-1a and PDF/A-1b standards, it is generally recommended to use the ConvertErrorAction.Delete option.

Does Aspose.PDF for Python via .NET support Linux?

Yes, Aspose.PDF for Python via .NET supports running on Linux environments. You can use the Python via .NET Core version or later, which is cross-platform and can be used on Windows, macOS, and Linux.

Where are your Python via .NET Examples?

You can check all of them on .

+ From be982c5b93331740e0ac1da60f2404f8491f6fa9 Mon Sep 17 00:00:00 2001 From: "Anastasiia.Holub" Date: Fri, 5 Sep 2025 13:24:44 +0300 Subject: [PATCH 20/65] chore: Update documentation for clarity and consistency --- en/python-net/whatsnew/_index.md | 1101 ++++++++++++------------------ 1 file changed, 440 insertions(+), 661 deletions(-) diff --git a/en/python-net/whatsnew/_index.md b/en/python-net/whatsnew/_index.md index 36825d438..8b1ab2b7a 100644 --- a/en/python-net/whatsnew/_index.md +++ b/en/python-net/whatsnew/_index.md @@ -9,11 +9,447 @@ sitemap: changefreq: "monthly" priority: 0.8 lastmod: "2025-02-24" -TechArticle: true -AlternativeHeadline: Release Notes of Aspose.PDF for Python -Abstract: ​The What's New section of Aspose.PDF for Python via .NET provides detailed release notes for each version, highlighting new features, improvements, and bug fixes. Each release note includes code snippets demonstrating the latest functionalities, offering developers practical examples to integrate into their projects. +TechArticle: false --- +## What's new in Aspose.PDF 25.5 + +The 'extract_certificate' method extracts digital certificates from signatures embedded in a PDF document. + +```python + + import aspose.pdf as ap + + def extract_certificate(self, infile): + + path_infile = self.data_dir + infile + + # Open PDF document + with ap.Document(path_infile) as document: + with ap.facades.PdfFileSignature(document) as signature: + # Get signature names + signature_names = signature.get_signature_names(True) + for signature_name in signature_names: + # Extract certificate + certificate = [] + if signature.try_extract_certificate(signature_name, certificate): + print(certificate[0] is not None) +``` + +The 'create_ordered_list' method generates a tagged PDF document with a structured numbered list, including nested sublists. + +```python + + import aspose.pdf as ap + + def create_ordered_list(self, outfile): + + path_outfile = self.data_dir + outfile + + # Create or open PDF document + with ap.Document() as document: + content = document.tagged_content + root_element = content.root_element + content.set_language("en-US") + root_list = content.create_list_element() + span_for_lbl_1 = content.create_span_element() + span_for_lbl_1.set_text("1. ") + position_settings = ap.tagged.PositionSettings() + position_settings.is_in_line_paragraph = True + span_for_lbl_1.adjust_position(position_settings) + span_for_body_1 = content.create_span_element() + span_for_body_1.set_text("bread") + span_for_body_1.adjust_position(position_settings) + lbl_1 = content.create_list_lbl_element() + lbl_1.append_child(span_for_body_1, True) + l_body_1 = content.create_list_l_body_element() + l_body_1.append_child(span_for_lbl_1, True) + li_1 = content.create_list_li_element() + li_1.append_child(lbl_1, True) + li_1.append_child(l_body_1, True) + root_list.append_child(li_1, True) + span_for_lbl_2 = content.create_span_element() + span_for_lbl_2.set_text("2. ") + span_for_body_2 = content.create_span_element() + span_for_body_2.set_text("milk") + span_for_body_2.adjust_position(position_settings) + lbl_2 = content.create_list_lbl_element() + lbl_2.append_child(span_for_lbl_2, True) + l_body_2 = content.create_list_l_body_element() + l_body_2.append_child(span_for_body_2, True) + li_2 = content.create_list_li_element() + li_2.append_child(lbl_2, True) + li_2.append_child(l_body_2, True) + root_list.append_child(li_2, True) + nested_list_depth_1 = content.create_list_element() + span_for_lbl_3_1 = content.create_span_element() + span_for_lbl_3_1.set_text("3.1. ") + position_settings_lbl_3_1 = ap.tagged.PositionSettings() + position_settings_lbl_3_1.is_in_line_paragraph = False + margin_info = ap.MarginInfo() + margin_info.left = 50 + position_settings_lbl_3_1.margin = margin_info + span_for_lbl_3_1.adjust_position(position_settings_lbl_3_1) + span_for_body_3_1 = content.create_span_element() + span_for_body_3_1.set_text("apples") + span_for_body_3_1.adjust_position(position_settings) + lbl_3_1 = content.create_list_lbl_element() + lbl_3_1.append_child(span_for_lbl_3_1, True) + l_body_3_1 = content.create_list_l_body_element() + l_body_3_1.append_child(span_for_body_3_1, True) + li_3_1 = content.create_list_li_element() + li_3_1.append_child(lbl_3_1, True) + li_3_1.append_child(l_body_3_1, True) + nested_list_depth_1.append_child(li_3_1, True) + span_for_lbl_3_2 = content.create_span_element() + span_for_lbl_3_2.set_text("3.2. ") + span_for_lbl_3_2.adjust_position(position_settings_lbl_3_1) + span_for_body_3_2 = content.create_span_element() + span_for_body_3_2.set_text("banana") + span_for_body_3_2.adjust_position(position_settings) + lbl_3_2 = content.create_list_lbl_element() + lbl_3_2.append_child(span_for_lbl_3_2, True) + l_body_3_2 = content.create_list_l_body_element() + l_body_3_2.append_child(span_for_body_3_2, True) + li_3_2 = content.create_list_li_element() + li_3_2.append_child(lbl_3_2, True) + li_3_2.append_child(l_body_3_2, True) + nested_list_depth_1.append_child(li_3_2, True) + span_for_lbl_3 = content.create_span_element() + span_for_lbl_3.set_text("3. ") + span_for_body_3 = content.create_span_element() + span_for_body_3.set_text("fruits") + span_for_body_3.adjust_position(position_settings) + lbl_3 = content.create_list_lbl_element() + lbl_3.append_child(span_for_lbl_3, True) + l_body_3 = content.create_list_l_body_element() + l_body_3.append_child(span_for_body_3, True) + li_3 = content.create_list_li_element() + li_3.append_child(lbl_3, True) + li_3.append_child(l_body_3, True) + l_body_3.append_child(nested_list_depth_1, True) + root_list.append_child(li_3, True) + root_element.append_child(root_list, True) + # Save Tagged PDF Document + document.save(path_outfile) +``` + +This example verifies a digital signature in a PDF document using a public key certificate. + +```python + + import aspose.pdf as ap + + def verify_with_public_key_certificate1(self, certificate, infile): + + path_infile = self.data_dir + infile + + # Create an instance of PdfFileSignature for working with signatures in the document + with ap.facades.PdfFileSignature(path_infile) as file_sign: + # Get a list of signatures + signature_names = file_sign.get_signature_names(True) + # Verify the signature with the given name. + return file_sign.verify_signature(signature_names[0], certificate) +``` + +Next feature converts a dynamic XFA (XML Forms Architecture) PDF form into a standard AcroForm PDF. + +```python + + import aspose.pdf as ap + + def convert_xfa_form_with_ignore_needs_rendering(self, infile, outfile): + + path_infile = self.data_dir + infile + path_outfile = self.data_dir + outfile + + # Load dynamic XFA form + with ap.Document(path_infile) as document: + # check if XFA is present & if rendering should be overwritten + if not document.form.needs_rendering and document.form.has_xfa: + document.form.ignore_needs_rendering = True + # Set the form fields type as standard AcroForm + document.form.type = ap.forms.FormType.STANDARD + # Save the resultant PDF + document.save(path_outfile) +``` + +This code snippet converts a PDF document to XPS format while replacing missing or unavailable fonts with a specified default font. + +```python + + import aspose.pdf as ap + + def replace_font_when_converting_pdf_to_xps(self, infile, outfile): + + path_infile = self.data_dir + infile + path_outfile = self.data_dir + outfile + + # Create XpsSaveOptions instance + xps_save_options = ap.XpsSaveOptions() + # use_embedded_true_type_fonts option specifies whether to use embedded TrueType fonts + xps_save_options.use_embedded_true_type_fonts = False + # The specified default font will be used if the embedded font name cannot be found in the system + xps_save_options.default_font = "Courier New" + # Open PDF document + doc = ap.Document(path_infile) + # Save the resultant XPS + doc.save(path_outfile, xps_save_options) +``` + +## What's new in Aspose.PDF 25.4 + +This version converts an existing PDF document into PDF/A-1b format. + +```python + + import aspose.pdf as ap + + def convert_to_pdfa_with_automatic_tagging(self, infile, outfile, outlogfile): + + path_infile = self.data_dir + infile + path_outfile = self.data_dir + outfile + path_outlogfile = self.data_dir + outlogfile + + # Open PDF document + with ap.Document(path_infile) as document: + # Create conversion options + options = ap.PdfFormatConversionOptions(path_outlogfile, ap.PdfFormat.PDF_A_1B, ap.ConvertErrorAction.DELETE) + # Create auto-tagging settings + # aspose.pdf.AutoTaggingSettings.default may be used to set the same settings as given below + auto_tagging_settings = ap.AutoTaggingSettings() + # Enable auto-tagging during the conversion process + auto_tagging_settings.enable_auto_tagging = True + # Use the heading recognition strategy that's optimal for the given document structure + auto_tagging_settings.heading_recognition_strategy = ap.HeadingRecognitionStrategy.AUTO + # Assign auto-tagging settings to be used during the conversion process + options.auto_tagging_settings = auto_tagging_settings + # During the conversion, the document logical structure will be automatically created + document.convert(options) + # Save PDF document + document.save(path_outfile) + +``` + +## What's new in Aspose.PDF 25.3 + +From 25.3 Aspose.PDF for Python via .NET library supports verifying whether the digital signatures in a PDF document have been compromised. + +```python + + import aspose.pdf as ap + + def check(self, infile): + + path_infile = self.data_dir + infile + + # Open PDF document + with ap.Document(path_infile) as document: + # Create the compromise detector instance + detector = ap.SignaturesCompromiseDetector(document) + results = [] + # Check for compromise + if detector.check(results): + print("No signature compromise detected") + return + # Get information about compromised signatures + result = results[0] + if result.has_compromised_signatures: + print(f"Count of compromised signatures: {result.COMPROMISED_SIGNATURES.length}") + for signature_name in result.COMPROMISED_SIGNATURES: + print(f"Signature name: {signature_name.FULL_NAME}") + # Get info about signatures coverage + print(result.signatures_coverage) + +``` + +Also supports adjusting the Table position in PDF. + +```python + + import aspose.pdf as ap + + def adjust_table_position(self, outfile, outlogfile): + + path_outfile = self.data_dir + outfile + path_outlogfile = self.data_dir + outlogfile + + # Create PDF document + with ap.Document() as document: + # Create tagged content + tagged_content = document.tagged_content + tagged_content.set_title("Example table cell style") + tagged_content.set_language("en-US") + # Get root structure element + root_element = tagged_content.root_element + # Create table structure element + table_element = tagged_content.create_table_element() + root_element.append_child(table_element, True) + # Create position settings + position_settings = ap.tagged.PositionSettings() + position_settings.horizontal_alignment = ap.HorizontalAlignment.NONE + margin_info = ap.MarginInfo() + margin_info.left = 20 + margin_info.right = 0 + margin_info.top = 0 + margin_info.bottom = 0 + position_settings.margin = margin_info + position_settings.vertical_alignment = ap.VerticalAlignment.NONE + position_settings.is_first_paragraph_in_column = False + position_settings.is_kept_with_next = False + position_settings.is_in_new_page = False + position_settings.is_in_line_paragraph = False + # Adjust table position + table_element.adjust_position(position_settings) + table_t_head_element = table_element.create_t_head() + table_t_body_element = table_element.create_t_body() + table_t_foot_element = table_element.create_t_foot() + row_count = 4 + col_count = 4 + head_tr_element = table_t_head_element.create_tr() + head_tr_element.alternative_text = "Head Row" + for col_index in range(col_count): + th_element = head_tr_element.create_th() + th_element.set_text(f"Head {col_index}") + th_element.background_color = ap.Color.green_yellow + th_element.border = ap.BorderInfo(ap.BorderSide.ALL, 4.0, ap.Color.gray) + th_element.is_no_border = True + th_element.margin = ap.MarginInfo(16.0, 2.0, 8.0, 2.0) + th_element.alignment = ap.HorizontalAlignment.RIGHT + for row_index in range(row_count): + tr_element = table_t_body_element.create_tr() + tr_element.alternative_text = f"Row {row_index}" + for col_index in range(col_count): + col_span = 1 + row_span = 1 + if col_index == 1 and row_index == 1: + col_span = 2 + row_span = 2 + elif col_index == 2 and (row_index == 1 or row_index == 2): + continue + elif row_index == 2 and (col_index == 1 or col_index == 2): + continue + td_element = tr_element.create_td() + td_element.set_text(f"Cell [{row_index}, {col_index}]") + td_element.background_color = ap.Color.yellow + td_element.border = ap.BorderInfo(ap.BorderSide.ALL, 4.0, ap.Color.gray) + td_element.is_no_border = False + td_element.margin = ap.MarginInfo(8.0, 2.0, 8.0, 2.0) + td_element.alignment = ap.HorizontalAlignment.CENTER + cell_text_state = ap.text.TextState() + cell_text_state.foreground_color = ap.Color.dark_blue + cell_text_state.font_size = 7.5 + cell_text_state.font_style = ap.text.FontStyles.BOLD + cell_text_state.font = ap.text.FontRepository.find_font("Arial") + td_element.default_cell_text_state = cell_text_state + td_element.is_word_wrapped = True + td_element.vertical_alignment = ap.VerticalAlignment.CENTER + td_element.col_span = col_span + td_element.row_span = row_span + foot_tr_element = table_t_foot_element.create_tr() + foot_tr_element.alternative_text = "Foot Row" + + for col_index in range(col_count): + td_element = foot_tr_element.create_td() + td_element.set_text(f"Foot {col_index}") + # Save Tagged PDF Document + document.save(path_outfile) + # Check PDF/UA compliance + with ap.Document(path_outfile) as document: + # Create tagged content + is_pdf_ua_compliance = document.validate(path_outlogfile, ap.PdfFormat.PDF_UA_1) + print(f"PDF/UA compliance: {is_pdf_ua_compliance}") +``` + +## What's new in Aspose.PDF 25.2 + +From 25.2 Aspose.PDF for Python via .NET library supports PDF to PDF/X conversion. + +```python + + import aspose.pdf as ap + + def convert_pdf_to_pdf_x(self, infile, infile_icc, outfile): + + path_infile = self.data_dir + infile + path_infile_icc = self.data_dir + infile_icc + path_outfile = self.data_dir + outfile + + # Open PDF document + with ap.Document(path_infile) as document: + # Set up the desired PDF/X format with PdfFormatConversionOptions + options = ap.PdfFormatConversionOptions(ap.PdfFormat.PDF_X_4, ap.ConvertErrorAction.DELETE) + # Provide the name of the external ICC profile file (optional) + options.icc_profile_file_name = path_infile_icc + # Provide an output condition identifier and other necessary OutputIntent properties (optional) + options.output_intent = ap.OutputIntent("FOGRA39") + # Convert to PDF/X compliant document + document.convert(options) + # Save PDF document + document.save(path_outfile) +``` + +How to get information about Digital Signatures of PDF? + +```python + + import aspose.pdf as ap + + def verify(self, infile): + + path_infile = self.data_dir + infile + + # Open the document + with ap.Document(path_infile) as document: + # Create an instance of PdfFileSignature for working with signatures in the document + with ap.facades.PdfFileSignature(document) as signature: + # Get a list of signature names in the document + signature_names = signature.get_signature_names(True) + # Loop through all signature names to verify each one + for signature_name in signature_names: + # Verify that the signature with the given name is valid + if signature.verify_signature(signature_name) is False: + raise Exception('Not verified') + +``` + +With this version it is now possible to create a TextBoxField with multiple widget annotations. + +```python + + import aspose.pdf as ap + + def add_text_box_field_to_pdf(self, outfile): + + path_outfile = self.data_dir + outfile + + # Create PDF document + with ap.Document() as document: + # Add a new page in the created document + page = document.pages.add() + # Defining an array with rectangle data for widget annotations. + # The number of elements in the array determines the number of widget annotations to add. + rects = [ap.Rectangle(10, 600, 110, 620, True), ap.Rectangle(10, 630, 110, 650, True), + ap.Rectangle(10, 660, 110, 680, True)] + # Defining an array with DefaultAppearance used to specify how widget annotations are displayed in the added field. + default_appearances = [ap.annotations.DefaultAppearance("Arial", 10, drawing.Color.dark_blue), + ap.annotations.DefaultAppearance("Helvetica", 12, drawing.Color.dark_green), + ap.annotations.DefaultAppearance(ap.text.FontRepository.find_font("TimesNewRoman"), + 14, drawing.Color.dark_magenta)] + # Create a field + text_box_field = ap.forms.TextBoxField(page, rects) + # Setting the appearances of widget annotations + i = 0 + for wa in text_box_field: + wa.default_appearance = default_appearances[i] + i += 1 + text_box_field.value = "Text" + # Add field to the document + document.form.add(text_box_field) + # Save PDF document + document.save(path_outfile) +``` + ## What's new in Aspose.PDF 25.1 An option to save PDF to HTML by skipping all raster images. @@ -144,661 +580,4 @@ The following code snippet demonstrates how to add an annotation text stamp to a stamp.scale = False document.pages[1].add_stamp(stamp) document.save(output_pdf_path) -``` - -## What's new in Aspose.PDF 24.11 - -The following code snippet demonstrates the setting of the hashing algorithm for Pkcs7Detached: - -```python - - import aspose.pdf as ap - import aspose.pydrawing as drawing - - def sign_with_manual_digest_hash_algorithm(cert: str, password: str, input_pdf_path: str, out_pdf_signed_path: str): - document = ap.Document(input_pdf_path) - signature = ap.facades.PdfFileSignature(document) - pkcs = ap.forms.PKCS7Detached(cert, password, ap.DigestHashAlgorithm.SHA512) - signature.sign(1, True, drawing.Rectangle(300, 100, 400, 200), pkcs) - signature.save(out_pdf_signed_path) - document.save(out_pdf_signed_path) -``` - -FontEncodingStrategy property. The following sample demonstrates the new option using: - -```python - - import aspose.pdf as ap - - def convert_pdf_to_html_using_cmap(input_pdf_path: str, output_html_path: str): - document = ap.Document(input_pdf_path) - options = ap.HtmlSaveOptions() - options.font_encoding_strategy = ap.HtmlSaveOptions.FontEncodingRules.DECREASE_TO_UNICODE_PRIORITY_LEVEL - document.save(output_html_path, options) -``` - -## What's new in Aspose.PDF 24.10 - -You can use your usual code to sign documents with ECDSA and to verify signatures: - -```python - - import aspose.pdf as ap - import aspose.pydrawing as drawing - - - def sign(cert: str, input_pdf_path: str, signed_pdf_path: str): - document = ap.Document(input_pdf_path) - signature = ap.facades.PdfFileSignature(document) - pkcs = ap.forms.PKCS7Detached(cert, "12345") - signature.sign(1, True, drawing.Rectangle(300, 100, 400, 200), pkcs) - signature.save(signed_pdf_path) - - def verify(signed_pdf_path: str): - document = ap.Document(signed_pdf_path) - signature = ap.facades.PdfFileSignature(document) - sig_names = signature.get_sign_names(True) - for sig_name in sig_names: - is_valid = signature.verify_signature(sig_name) -``` - -Sometimes, it is necessary to crop an image before inserting it into a PDF. We have added an overloaded version of the AddImage() method to support adding cropped images: - -```python - - import aspose.pdf as ap - import io - - def add_cropped_image_to_pdf(image_file_path: str, result_pdf_path: str): - document = ap.Document() - img_stream = io.FileIO(image_file_path, "r") - image_rect = ap.Rectangle(17.62, 65.25, 602.62, 767.25, True) - w = image_rect.width / 2 - h = image_rect.height / 2 - bbox = ap.Rectangle(image_rect.llx, image_rect.lly, image_rect.llx + w, image_rect.lly + h, True) - page = document.pages.add() - page.add_image(img_stream, image_rect, bbox, True) - document.save(result_pdf_path) -``` - -## What's new in Aspose.PDF 24.9 - -Now, it is possible to extract PDF document layer elements and save them into new PDF streams. Layers (also known as Optional Content Groups or OCGs) are used in PDF documents for various purposes, primarily to manage and control the visibility of content within the document. This functionality is particularly useful in design, engineering, and publishing. Examples include blueprint aspects, complex diagram components, and language versions of the same content. - -```python - - import aspose.pdf as ap - - def extract_pdf_layer(input_pdf_path: str, output_pdf_path: str): - input_document = ap.Document(input_pdf_path) - input_page = input_document.pages[1] - layers = input_page.layers - for layer in layers: - extracted_layer_pdf_name = output_pdf_path + layer.id + ".pdf" - with open(extracted_layer_pdf_name, "wb") as stream: - layer.save(stream) -``` - -The following code snippet demonstrates the graphic comparison of two PDF documents and saves an image with the differences into the resultant PDF document: - -```python - - import aspose.pdf as ap - - def compare_pdf_with_compare_documents_to_pdf_method(first_document_path: str, second_document_path: str, comparison_result_pdf_path: str): - first_document = ap.Document(first_document_path) - second_document = ap.Document(second_document_path) - comparer = ap.comparison.graphicalcomparison.GraphicalPdfComparer() - comparer.threshold = 3.0 - comparer.color = ap.Color.red - comparer.resolution = ap.devices.Resolution(300) - comparer.compare_documents_to_pdf(first_document, second_document, comparison_result_pdf_path) -``` - -To convert HEIC images to PDF user should add the reference to the FileFormat.HEIC NuGet package and use the following code snippet: - -```python - - import aspose.pdf as ap - import io - - dev convert_heic_to_pdf(input_pdf_path: str, result_pdf_path: str, width: int, height: int, pixel_format: ap.BitmapInfo.PixelFormat): - document = ap.Document() - page = document.pages.add() - aspose_image = ap.Image() - - pixels = io.FileIO(input_pdf_path).read() - aspose_image.bitmap_info = ap.BitmapInfo(pixels, width, height, pixel_format) - page.page_info.width = width - page.page_info.height = height - page.page_info.margin.bottom = 0 - page.page_info.margin.top = 0 - page.page_info.margin.right = 0 - page.page_info.margin.left = 0 - page.paragraphs.add(aspose_image) - document.save(result_pdf_path) -``` - -## What's new in Aspose.PDF 24.8 - -The following code snippet demonstrates how to convert a document into PDF/A-4 format when the input document is an earlier PDF version than 2.0. - -```python - - import aspose.pdf as ap - - def convert_pdf_to_pdf_a4(input_pdf_path: str, conversion_log_path: str, result_pdf_path: str): - document = ap.Document(input_pdf_path) - document.convert(io.BytesIO(), ap.PdfFormat.V_2_0, ap.ConvertErrorAction.DELETE) - document.convert(conversion_log_path , ap.PdfFormat.PDF_A_4, ap.ConvertErrorAction.DELETE) - document.save(result_pdf_path) -``` - -Since 24.8 we introduced a method for flattening transparent content in PDF documents: - -```python - - import aspose.pdf as ap - - def flatten_transparency(input_pdf_path: str, result_pdf_path: str): - document = ap.Document(input_pdf_path) - document.flatten_transparency() - document.save(result_pdf_path) -``` - -## What's new in Aspose.PDF 24.7 - -The first code snippet demonstrates how to compare the first pages of two PDF documents. - -```python - - import aspose.pdf as ap - - def comparing_specific_pages_side_by_side(input_pdf_path1: str, input_pdf_path2: str, result_pdf_path: str) - document1 = ap.Document(input_pdf_path1) - document2 = ap.Document(input_pdf_path2) - options = ap.comparison.SideBySideComparisonOptions() - options.comparison_mode = ap.comparison.ComparisonMode.IGNORE_SPACES - options.additional_change_marks = True - ap.comparison.SideBySidePdfComparer.compare(document1.pages[1], document2.pages[1], result_pdf_path, options) -``` - -The second code snippet expands the scope to compare the entire content of two PDF documents. - -```python - - import aspose.pdf as ap - - def comparing_entire_documents_side_by_side(input_pdf_path1: str, input_pdf_path2: str, result_pdf_path: str): - document1 = ap.Document(input_pdf_path1) - document2 = ap.Document(input_pdf_path2) - options = ap.comparison.SideBySideComparisonOptions() - options.comparison_mode = ap.comparison.ComparisonMode.IGNORE_SPACES - options.additional_change_marks = True - ap.comparison.SideBySidePdfComparer.compare(document1, document2, result_pdf_path, options) -``` - -## What's new in Aspose.PDF 24.6 - -The next code snippet works with a PDF document and its tagged content, utilizing an Aspose.PDF library to process it: - -```python - - import aspose.pdf as ap - - def create_an_accessible_document(input_pdf_path: str, result_pdf_path: str): - document = ap.Document(input_pdf_path) - content = document.tagged_content - span = content.create_span_element() - content.root_element.append_child(span, True) - for op in document.pages[1].contents: - if is_assignable(op, ap.operators.BDC): - bdc = cast(ap.operators.BDC, op) - if bdc is not None: - span.tag(bdc) - document.save(result_pdf_path) -``` - -## What's new in Aspose.PDF 24.5 - -Lock a PDF layer: - -```python - - import aspose.pdf as ap - - def lock_layer_in_pdf(input_pdf_path: str, result_pdf_path: str): - document = ap.Document(input_pdf_path) - page = document.pages[1] - layer = page.layers[0] - layer.lock() - document.save(result_pdf_path) -``` - -Extract PDF layer elements: - -```python - - import aspose.pdf as ap - - def extract_pdf_layer(input_pdf_path: str, result_pdf_path: str): - document = ap.Document(input_pdf_path) - layers = document.pages[1].layers - for layer in layers: - layer.save(result_pdf_path) -``` - -Flatten a layered PDF: - -```python - - import aspose.pdf as ap - - def flatten_pdf_layers(input_pdf_path: str, result_pdf_path: str): - document = ap.Document(input_pdf_path) - page = document.pages[1] - for layer in page.layers: - layer.flatten(True) - document.save(result_pdf_path) -``` - -Merge All Layers inside the PDF into one: - -```python - - import aspose.pdf as ap - - def merge_pdf_layers(input_pdf_path: str, result_pdf_path: str): - document = ap.Document(input_pdf_path) - page = document.pages[1] - page.merge_layers("NewLayerName") - # Or page.merge_layers("NewLayerName", "OC1") - document.save(result_pdf_path) -``` - -## What's new in Aspose.PDF 24.4 - -This release supports applying a clipping mask to images: - -```python - - import aspose.pdf as ap - import io - - def add_stencil_masks_to_images(input_pdf_path: str, input_mask1_path: str, input_mask2_path: str, result_pdf_path: str): - document = ap.Document(input_pdf_path) - fs1 = io.FileIO(input_mask1_path, "r") - fs2 = io.FileIO(input_mask2_path, "r") - document.pages[1].resources.images[1].add_stencil_mask(fs1) - document.pages[1].resources.images[2].add_stencil_mask(fs2) - document.save(result_pdf_path) -``` - -Beginning with Aspose.PDF 24.4 this preference can be switched on and off using the Document.PickTrayByPdfSize property or the PdfContentEditor facade: - -```python - - import aspose.pdf as ap - - def pick_tray_by_pdf_size(result_pdf_path: str): - document = ap.Document() - page = document.pages.add() - page.paragraphs.add(ap.text.TextFragment("Hello world!")) - document.pick_tray_by_pdf_size = True - document.save(result_pdf_path) -``` - -```python - - import aspose.pdf as ap - - def pick_tray_by_pdf_size_facade(input_pdf_path: str, result_pdf_path: str): - content_editor = ap.facades.PdfContentEditor() - content_editor.bind_pdf(input_pdf_path) - # Set the flag to choose a paper tray using the PDF page size - content_editor.change_viewer_preference(ap.facades.ViewerPreference.PICK_TRAY_BY_PDF_SIZE) - content_editor.save(result_pdf_path) -``` - -## What's new in Aspose.PDF 24.1 - -Since 24.1 release possible to import FDF format annotations to PDF: - -```python - - import aspose.pdf as ap - - def import_fdf_by_form(input_pdf_path: str, template_path: str, result_pdf_path: str): - form = ap.facades.Form(template_path) - fdf_input_stream = io.FileIO(input_pdf_path, 'r') - form.import_fdf(fdf_input_stream) - form.save(result_pdf_path) -``` - -## What's new in Aspose.PDF 23.12 - -From Aspose.PDF 23.12, support was added to the new conversion features: - -- Implement PDF to Markdown conversion - -```python - - import aspose.pdf as ap - - input_pdf_path = DIR_INPUT + "input.pdf" - markdown_output_file_path = DIR_OUTPUT + "output_md_file.md" - - doc = ap.Document(input_pdf_path) - save_options = ap.pdftomarkdown.MarkdownSaveOptions() - save_options.resources_directory_name = "images" - doc.save(markdown_output_file_path, save_options) -``` - -- Implement OFD to PDF conversion - -```python - - import aspose.pdf as ap - - input_path = DIR_INPUT + "input.ofd" - output_path = DIR_OUTPUT + "output.pdf" - document = ap.Document(input_path, ap.OfdLoadOptions()) - document.save(output_path) -``` - -Support for Python 3.6 has been discontinued. - -## What's new in Aspose.PDF 23.11 - -Since 23.11 possible to remove the hidden text. The following code snippet can be used: - -```python - - import aspose.pdf as ap - - document = ap.Document(input_file) - text_absorber = ap.text.TextFragmentAbsorber() - # This option can be used to prevent other text fragments from moving after hidden text replacement. - text_absorber.text_replace_options = ap.text.TextReplaceOptions(ap.text.TextReplaceOptions.ReplaceAdjustment.NONE) - document.pages.accept(text_absorber) - - for fragment in text_absorber.text_fragments: - if fragment.text_state.invisible: - fragment.text = '' - - document.save(output_file) -``` - -## What's new in Aspose.PDF 23.8 - -Since the 23.8 version supports to add Incremental Updates detection. - -The function for detecting Incremental Updates in a PDF document has been added. This function returns 'true' if a document was saved with incremental updates; otherwise, it returns 'false'. - -```python - - import aspose.pdf as ap - - doc = ap.Document(file_path) - updated = doc.has_incremental_update() - print(updated) -``` - -Also, 23.8 supports the ways to work with nested checkbox fields. Many fillable PDF forms have checkbox fields that act as radio groups: - -- Create multi-value checkbox field: - -```python - - import aspose.pdf as ap - - document = ap.Document() - page = document.pages.add() - checkbox = ap.forms.CheckboxField(page, ap.Rectangle(50, 50, 70, 70, True)) - # Set the first checkbox group option value - checkbox.export_value = "option 1" - # Add new option right under existing ones - checkbox.add_option("option 2") - # Add new option at the given rectangle - checkbox.add_option("option 3", ap.Rectangle(100, 100, 120, 120, True)) - document.form.add(checkbox) - # Select the added checkbox - checkbox.value = "option 2" - document.save(DIR_OUTPUT + "checkbox_group.pdf") -``` - -- Get and set value of a multi-value checkbox: - -```python - - import aspose.pdf as ap - - doc = ap.Document("example.pdf") - form = doc.form - checkbox = cast(ap.forms.CheckboxField, form.fields[0]) - - # Allowed values may be retrieved from the AllowedStates collection - # Set the checkbox value using Value property - checkbox.value = checkbox.allowed_states[0] - checkbox_value = checkbox.value # the previously set value, e.g. "option 1" - # The value should be any element of AllowedStates - checkbox.value = "option 2" - checkbox_value = checkbox.value # option 2 - # Uncheck boxes by either setting Value to "Off" or setting Checked to false - checkbox.value = "Off" - # or, alternately: - # checkbox.checked = False - checkbox_value = checkbox.value # Off -``` - -- Update checkbox state on user click: - -```python - - import aspose.pdf as ap - from aspose.pycore import cast - - input_file = DIR_INPUT + "input.pdf" - document = ap.Document(input_file) - point = ap.Point(62,462) # for example, the coordinates of a mouse click - # Option 1: look through the annotations on the page - page = document.pages[5] - for annotation in page.annotations: - if(annotation.rect.contains(point)): - widget = cast(ap.annotations.WidgetAnnotation, annotation) - checkbox = cast(ap.forms.CheckboxField, widget.parent) - if(annotation.active_state == "Off"): - checkbox.value = widget.get_checked_state_name() - else: - checkbox.value = "Off" - break - # Option 2: look through the fields in the AcroForm - for widget in document.form: - field = cast(ap.forms.Field, widget) - if(field == None): - continue - checkBoxFound = False - for annotation in field: - if(annotation.rect.contains(point)): - checkBoxFound = True - if(annotation.active_state=="Off"): - annotation.parent.value = annotation.get_checked_state_name() - else: - annotation.parent.value = "Off" - if(checkBoxFound): - break -``` - -## What's new in Aspose.PDF 23.7 - -Since the 23.7 version supports to add the shape extraction: - -```python - - import aspose.pdf as ap - - input1_file = DIR_INPUT + "input_1.pdf" - input2_file = DIR_INPUT + "input_2.pdf" - - source = ap.Document(input1_file) - dest = ap.Document(input2_file) - - graphic_absorber = ap.vector.GraphicsAbsorber() - graphic_absorber.visit(source.pages[1]) - area = ap.Rectangle(90, 250, 300, 400, True) - dest.pages[1].add_graphics(graphic_absorber.elements, area) -``` - -Also supports the ability to detect Overflow when adding text: - -```python - - import aspose.pdf as ap - - output_file = DIR_OUTPUT + "output.pdf" - doc = ap.Document() - paragraph_content = "Lorem ipsum dolor sit amet, consectetur adipiscing elit. Cras nisl tortor, efficitur sed cursus in, lobortis vitae nulla. Quisque rhoncus, felis sed dictum semper, est tellus finibus augue, ut feugiat enim risus eget tortor. Nulla finibus velit nec ante gravida sollicitudin. Morbi sollicitudin vehicula facilisis. Vestibulum ac convallis erat. Ut eget varius sem. Nam varius pharetra lorem, id ullamcorper justo auctor ac. Integer quis erat vitae lacus mollis volutpat eget et eros. Donec a efficitur dolor. Maecenas non dapibus nisi, ut pellentesque elit. Sed pellentesque rhoncus ante, a consectetur ligula viverra vel. Integer eget bibendum ante. Pellentesque habitant morbi tristique senectus et netus et malesuada fames ac turpis egestas. Curabitur elementum, sem a auctor vulputate, ante libero iaculis dolor, vitae facilisis dolor lorem at orci. Sed laoreet dui id nisi accumsan, id posuere diam accumsan." - fragment = ap.text.TextFragment(paragraph_content) - rectangle = ap.Rectangle(100, 600, 500, 700, False) - paragraph = ap.text.TextParagraph() - paragraph.vertical_alignment = ap.VerticalAlignment.TOP - paragraph.formatting_options.wrap_mode = ap.text.TextFormattingOptions.WordWrapMode.BY_WORDS - paragraph.rectangle = rectangle - is_fit_rectangle = fragment.text_state.is_fit_rectangle(paragraph_content, rectangle) - - while is_fit_rectangle == False: - fragment.text_state.font_size -= 0.5 - is_fit_rectangle = fragment.text_state.is_fit_rectangle(paragraph_content, rectangle) - - paragraph.append_line(fragment) - builder = ap.text.TextBuilder(doc.pages.add()) - builder.append_paragraph(paragraph) - doc.save(output_file) -``` - -## What's new in Aspose.PDF 23.6 - -Support the ability to set the title of the HTML, Epub page: - -```python - - import aspose.pdf as ap - - input_pdf = DIR_INPUT + "input.pdf" - output_html = DIR_OUTPUT + "output_title.html" - options = ap.HtmlSaveOptions() - options.fixed_layout = True - options.raster_images_saving_mode = ap.HtmlSaveOptions.RasterImagesSavingModes.AS_EMBEDDED_PARTS_OF_PNG_PAGE_BACKGROUND - options.parts_embedding_mode = ap.HtmlSaveOptions.PartsEmbeddingModes.EMBED_ALL_INTO_HTML - options.title = "NEW PAGE & TITILE" # <-- this added - - document = ap.Document(input_pdf) - document.save(output_html, options) -``` - -## What's new in Aspose.PDF 23.5 - -Since the 23.5 version supports to add RedactionAnnotation FontSize option. Use the next code snippet to solve this task: - -```python - - import aspose.pdf as ap - - doc = ap.Document(DIR_INPUT + "input.pdf") - # Create RedactionAnnotation instance for specific page region - annot = ap.annotations.RedactionAnnotation(doc.pages[1], ap.Rectangle(367, 756.919982910156, 420, 823.919982910156, True)) - annot.fill_color = ap.Color.black - annot.border_color = ap.Color.yellow - annot.color = ap.Color.blue - # Text to be printed on redact annotation - annot.overlay_text = "(Unknown)" - annot.text_alignment = ap.HorizontalAlignment.CENTER - # Repat Overlay text over redact Annotation - annot.repeat = False - # New property there ! - annot.font_size = 20 - # Add annotation to annotations collection of first page - doc.pages[1].annotations.add(annot, False) - # Flattens annotation and redacts page contents (i.e. removes text and image - # Under redacted annotation) - annot.redact() - out_file = DIR_OUTPUT + "RedactPage_out.pdf" - doc.save(out_file) -``` - -Support for Python 3.5 has been discontinued. Support for Python 3.11 has been added. - -## What's new in Aspose.PDF 23.3 - -Version 23.3 introduced support for adding a resolution to an image. Two methods can be used to solve this problem: - -```python - - import aspose.pdf as ap - - input_file = DIR_INPUT + "input.jpg" - table = ap.Table() - table.column_widths = "600" - image = ap.Image() - image.is_apply_resolution = True - image.file = input_file - for i in range(0, 2): - row = table.rows.add() - cell = row.cells.add() - cell.paragraphs.add(image) - - page.paragraphs.add(table) -``` - -The image will be placed with scaled resolution or u can set FixedWidth or FixedHeight properties in combination with IsApplyResolution - -## What's new in Aspose.PDF 23.1 - -Since the 23.1 version supports to creation of PrinterMark annotation. - -Printer's marks are graphic symbols or text added to a page to assist production personnel in identifying components of a multiple-plate job and maintaining consistent output during production. Examples commonly used in the printing industry include: - -- Registration targets for aligning plates -- Gray ramps and colour bars for measuring colours and ink densities -- Cut marks showing where the output medium is to be trimmed - -We will show the example of the option with color bars for measuring colors and ink densities. There is a basic abstract class PrinterMarkAnnotation and from it child ColorBarAnnotation - which already implements these stripes. Let's check the example: - -```python - - import aspose.pdf as ap - - out_file = DIR_OUTPUT + "ColorBarTest.pdf" - doc = ap.Document() - page = doc.pages.add() - page.trim_box = ap.Rectangle(20, 20, 580, 820, True) - add_annotations(page) - doc.save(out_file) - - -def add_annotations(page: ap.Page): - rect_black = ap.Rectangle(100, 300, 300, 320, True) - rect_cyan = ap.Rectangle(200, 600, 260, 690, True) - rect_magenta = ap.Rectangle(10, 650, 140, 670, True) - color_bar_black = ap.annotations.ColorBarAnnotation(page, rect_black, ap.annotations.ColorsOfCMYK.BLACK) - color_bar_cyan = ap.annotations.ColorBarAnnotation(page, rect_cyan, ap.annotations.ColorsOfCMYK.CYAN) - color_ba_magenta = ap.annotations.ColorBarAnnotation(page, rect_magenta, ap.annotations.ColorsOfCMYK.BLACK) - color_ba_magenta.color_of_cmyk = ap.annotations.ColorsOfCMYK.MAGENTA - color_bar_yellow = ap.annotations.ColorBarAnnotation(page, ap.Rectangle(400, 250, 450, 700, True), ap.annotations.ColorsOfCMYK.YELLOW) - page.annotations.add(color_bar_black, False) - page.annotations.add(color_bar_cyan, False) - page.annotations.add(color_ba_magenta, False) - page.annotations.add(color_bar_yellow, False) -``` - -Also support the vector images extraction. Try using the following code to detect and extract vector graphics: - -```python - - import aspose.pdf as ap - - input_pdf = DIR_INPUT + "input.pdf" - output_pdf = DIR_OUTPUT + "output.svg" - doc = ap.Document(input_pdf) - doc.pages[1].try_save_vector_graphics(output_pdf) -``` +``` \ No newline at end of file From b64b348b827410a0e664ed1b73290019365d95f7 Mon Sep 17 00:00:00 2001 From: "Anastasiia.Holub" Date: Fri, 5 Sep 2025 13:25:56 +0300 Subject: [PATCH 21/65] chore: Update FAQ metadata for Aspose.PDF for Python via .NET --- en/python-net/faq/_index.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/en/python-net/faq/_index.md b/en/python-net/faq/_index.md index 08589ce2f..767ee4b5c 100644 --- a/en/python-net/faq/_index.md +++ b/en/python-net/faq/_index.md @@ -9,9 +9,7 @@ lastmod: "2025-09-05" sitemap: changefreq: "monthly" priority: 0.7 -TechArticle: true -AlternativeHeadline: The Frequently Asked Question of Aspose.PDF for Python via .NET library. -Abstract: +TechArticle: false ---

What formats does Aspose.PDF for Python via .NET support?

Aspose.PDF for Python via .NET supports popular file formats such as PDF, TXT, HTML, PCL, XML, XPS, EPUB, TEX, and image formats. For more details, please visit the page .

What AI features Aspose.PDF for Python via .NET support?

Yes, the library has built-in OpenAI and Llama API clients. They allow you to make API requests and create AI copilots. You can find examples on page.

How many files can I combine to PDF at once?

You can merge an unlimited number of files into PDF at once.

How to insert Image into PDF?

To insert an image into a PDF using Aspose.PDF for Python via .NET, you can find .

How to edit the text in PDF?

To edit the text in a PDF using Aspose.PDF for Python via .NET, you can find .

How to add page numbers to PDF file?

To add page numbers to a PDF using Aspose.PDF for Python via .NET, you can find .

How to create a background for PDF Documents?

To create a background for a PDF document using Aspose.PDF for Python via .NET, you can find .

How to secure PDF document?

To secure a PDF document using Aspose.PDF for Python via .NET, you can find .

How to add bold text in highlighted annotation on a PDF page?

To add bold text in a highlighted annotation, you can find .

How to validate a tagged PDF?

To validate a tagged PDF document, you can find .

How to implement regex search for TextFragmentAbsorber?

To use regex with the `TextFragmentAbsorber` class in Aspose.PDF for Python via .NET, you can find .

How to make a valid PDF/A document unless the missing font or its substitution is provided?

To create a valid PDF/A document, you can find .

I see errors in PDF/A conversion log. Does it mean that the document wasn't converted successfully?

No, Aspose.PDF logs all problems it encountered, including the ones that were automatically fixed. If all entries in the log are marked as Convertable=True, all problems were fixed, and the document was successfully converted. Only the entries with Convertable=False indicate the conversion failure.

How to make a valid PDF/A document if the conversion log contains "Font not embedded" errors?

If a PDF/A conversion log contains "Font not embedded" error entries marked as Convertable=False, it means that the original document includes fonts that are missing both in the document itself and on the machine where the conversion is performed. Check to learn how to replace missing fonts.

Why doesn't Aspose.PDF convert a document to PDF/A with the ConvertErrorAction.None option?

The ConvertErrorAction.None option prevents Aspose.PDF from removing elements from the document, even if those elements are prohibited by the PDF/A standard. As a result, it may be impossible to convert a document using the ConvertErrorAction.None mode. Use this option when you do not want Aspose.PDF to automatically remove prohibited elements from the document, especially if you plan to review the conversion log and manually fix the document's issues. Another scenario for using this option is when your document is already mostly or fully PDF/A compliant, and you want to avoid unnecessary changes. More permissive PDF/A versions (e.g., PDF/A-2 or PDF/A-3) are more likely to be successfully converted with the ConvertErrorAction.None option. However, for PDF/A-1a and PDF/A-1b standards, it is generally recommended to use the ConvertErrorAction.Delete option.

Does Aspose.PDF for Python via .NET support Linux?

Yes, Aspose.PDF for Python via .NET supports running on Linux environments. You can use the Python via .NET Core version or later, which is cross-platform and can be used on Windows, macOS, and Linux.

Where are your Python via .NET Examples?

You can check all of them on .

From 1aa67bfd327a5b22a83e5e9a65df062e498ee9c8 Mon Sep 17 00:00:00 2001 From: "Anastasiia.Holub" Date: Fri, 5 Sep 2025 16:34:12 +0300 Subject: [PATCH 22/65] chore: Update content for adding headers and footers in PDF documentation --- .../_index.md | 116 +++++++++++++++++- 1 file changed, 114 insertions(+), 2 deletions(-) diff --git a/en/python-net/advanced-operations/working-with-pages/adding-headers-and-footers-of-pdf-file/_index.md b/en/python-net/advanced-operations/working-with-pages/adding-headers-and-footers-of-pdf-file/_index.md index f2e5f273b..c9b5b75bb 100644 --- a/en/python-net/advanced-operations/working-with-pages/adding-headers-and-footers-of-pdf-file/_index.md +++ b/en/python-net/advanced-operations/working-with-pages/adding-headers-and-footers-of-pdf-file/_index.md @@ -31,6 +31,9 @@ The following code snippet demonstrates how to add headers and footers as text f import aspose.pdf as ap + path_infile = self.data_dir + infile + path_outfile = self.data_dir + outfile + # Open PDF document with ap.Document(path_infile) as document: for i in range(1, len(document.pages) + 1): @@ -80,17 +83,20 @@ The following code snippet demonstrates how to add headers and footers as HTML f import aspose.pdf as ap + path_infile = self.data_dir + infile + path_outfile = self.data_dir + outfile + # Open PDF document with ap.Document(path_infile) as document: for i in range(1, len(document.pages) + 1): # Create header HTML - header_html = ap.HtmlFragment("header") + header_html = ap.HtmlFragment("This is an HTML Header") # Create header header = ap.HeaderFooter() header.paragraphs.add(header_html) # Create footer HTML - footer_html = ap.HtmlFragment("footer") + footer_html = ap.HtmlFragment("Powered by Aspose.PDF") # Create footer footer = ap.HeaderFooter() @@ -131,6 +137,10 @@ This technique is ideal for branding documents with logos or watermarks in the h import aspose.pdf as ap + path_infile = self.data_dir + infile + path_imagefile = self.data_dir + imagefile + path_outfile = self.data_dir + outfile + # Open PDF document with ap.Document(path_infile) as document: for i in range(1, len(document.pages) + 1): @@ -163,4 +173,106 @@ This technique is ideal for branding documents with logos or watermarks in the h # Save PDF document document.save(path_outfile) +``` + +## Adding Headers and Footers as Table + +This code snippet adds headers and footers (using tables) to each page of a PDF document with Aspose.PDF for Python via .NET. + +1. Open the PDF document. +1. Loop through all pages. +1. Create text states for header and footer. +1. Create header and footer objects. +1. Build header table. +1. Build footer table. +1. Add tables to header and footer. +1. Set footer margin. +1. Bind header and footer to the page. +1. Save the updated PDF. + +```python + + import aspose.pdf as ap + + path_infile = self.data_dir + infile + path_outfile = self.data_dir + outfile + + # Open PDF document + with ap.Document(path_infile) as document: + for i in range(1, len(document.pages) + 1): + text_state_top_header = ap.text.TextState() + text_state_top_header.font = ap.text.FontRepository.find_font("Arial") + text_state_top_header.font_size = 12 + text_state_top_header.horizontal_alignment = ap.HorizontalAlignment.CENTER + text_state_info_footer = ap.text.TextState() + text_state_info_footer.font = ap.text.FontRepository.find_font("Arial") + text_state_info_footer.font_size = 12 + text_state_info_footer.horizontal_alignment = ap.HorizontalAlignment.LEFT + # Create header + header = ap.HeaderFooter() + # Create footer + footer = ap.HeaderFooter() + # Create header Table + table_header = ap.Table() + table_header.column_widths = str(594 - header.margin.left - header.margin.right) + table_header.rows.add().cells.add("This is a Table Header", text_state_top_header) + # Create footer Table + table = ap.Table() + table.column_widths = str(594 - footer.margin.left - footer.margin.right) + table.rows.add().cells.add("Powered by Aspose.PDF", text_state_info_footer) + header.paragraphs.add(table_header) + footer.paragraphs.add(table) + # Set margin + margin = ap.MarginInfo() + margin.left = 50 + # Set footer margin + footer.margin = margin + # Bind the header and footer to the page + document.pages[i].header = header + document.pages[i].footer = footer + + # Save PDF document + document.save(path_outfile) +``` + +## Adding Headers and Footers as LaTex + +The following code snippet shows how to use LaTeX fragments in headers and footers for a PDF using Aspose.PDF for Python via .NET. + +1. Open the PDF document. +1. Get the number of pages. +1. Iterate over all pages. +1. Create a header with LaTeX text. +1. Create a footer with LaTeX text. +1. Assign header and footer to the page. +1. Save the modified PDF. + +```python + + import aspose.pdf as ap + + path_infile = self.data_dir + infile + path_outfile = self.data_dir + outfile + + # Open PDF document + with ap.Document(path_infile) as document: + page_count = len(document.pages) + for i in range(1, page_count + 1): + # Create header + header = ap.HeaderFooter() + h_latex_text = "This is a LaTex Header. \\today\\" + h_l_text = ap.TeXFragment(h_latex_text, True) + # Create footer + footer = ap.HeaderFooter() + f_latex_text = f"\\copyright\\ 2025 My Company -- Page \\thepage\\ is {page_count}" + f_l_text = ap.TeXFragment(f_latex_text, True) + + header.paragraphs.add(h_l_text) + footer.paragraphs.add(f_l_text) + # Bind the header and footer to the page + document.pages[i].header = header + document.pages[i].footer = footer + + # Save PDF document + document.save(path_outfile) ``` \ No newline at end of file From 2e5b5ab397c32e16b3d36a44a08a82e7bdc4939f Mon Sep 17 00:00:00 2001 From: "Anastasiia.Holub" Date: Mon, 8 Sep 2025 11:38:03 +0300 Subject: [PATCH 23/65] chore: Add documentation for working with PDF layers in Python --- .../working-with-documents/_index.md | 2 +- .../working-with-layers/_index.md | 252 ++++++++++++++++++ 2 files changed, 253 insertions(+), 1 deletion(-) create mode 100644 en/python-net/advanced-operations/working-with-documents/working-with-layers/_index.md diff --git a/en/python-net/advanced-operations/working-with-documents/_index.md b/en/python-net/advanced-operations/working-with-documents/_index.md index e19442109..f9d0ec724 100644 --- a/en/python-net/advanced-operations/working-with-documents/_index.md +++ b/en/python-net/advanced-operations/working-with-documents/_index.md @@ -27,4 +27,4 @@ You are able to do the following: - [Merge PDF](/pdf/python-net/merge-pdf-documents/) - merge multiple PDF files into a single PDF document using Python. - [Split PDF](/pdf/python-net/split-document/) - split PDF pages into individual PDF files in your Python applications. - [Working with Headings](/pdf/python-net/working-with-headings/) - you can create numbering in heading your PDF document with Python. - +- [Work with PDF layers](/pdf/python-net/work-with-pdf-layers/) - how to lock a PDF layer, extract PDF layer elements, flatten a layered PDF, and merge all layers inside PDF into one. diff --git a/en/python-net/advanced-operations/working-with-documents/working-with-layers/_index.md b/en/python-net/advanced-operations/working-with-documents/working-with-layers/_index.md new file mode 100644 index 000000000..245beeb0c --- /dev/null +++ b/en/python-net/advanced-operations/working-with-documents/working-with-layers/_index.md @@ -0,0 +1,252 @@ +--- +title: Work with PDF layers using Python +linktitle: Work with PDF layers +type: docs +weight: 50 +url: /net/work-with-pdf-layers/ +description: The next task explains how to lock a PDF layer, extract PDF layer elements, flatten a layered PDF, and merge all layers inside PDF into one. +lastmod: "2025-09-17" +sitemap: + changefreq: "monthly" + priority: 0.7 +TechArticle: true +AlternativeHeadline: Manipulate the PDF Layers +Abstract: This guide provides a comprehensive overview of how to manage and manipulate PDF layers using the Aspose.PDF for Python via .NET library. PDF layers—also known as Optional Content Groups (OCGs)—enable the organization of content into separate visual components that can be toggled on or off. +--- + +PDF layers are a powerful way to organize and present content flexibly inside a single PDF file, allowing users to show or hide different parts depending on their needs. + +## Add layers to PDF + +This example shows how to create and add multiple layers to a PDF document using Aspose.PDF for Python via .NET. Each layer contains separate graphical content, such as colored lines, which can be turned on or off in PDF viewers that support layers. + +1. Create a new PDF document and add a page. +1. Create and add the red layer. +1. Create and add the green layer. +1. Create and add the blue layer. +1. Save the PDF document. + +The resulting PDF will contain three separate layers: a red line, a green line, and a blue line. Each can be toggled on or off in PDF readers that support layered content. + +```python + + from io import FileIO + import aspose.pdf as ap + from os import path + + path_outfile = path.join(self.dataDir, outfile) + + try: + document = ap.Document() + page = document.pages.add() + + # Red layer + layer = ap.Layer("oc1", "Red Line") + layer.contents.append(ap.operators.SetRGBColorStroke(1, 0, 0)) + layer.contents.append(ap.operators.MoveTo(500, 700)) + layer.contents.append(ap.operators.LineTo(400, 700)) + layer.contents.append(ap.operators.Stroke()) + page.layers.append(layer) + + # Green layer + layer = ap.Layer("oc2", "Green Line") + layer.contents.append(ap.operators.SetRGBColorStroke(0, 1, 0)) + layer.contents.append(ap.operators.MoveTo(500, 750)) + layer.contents.append(ap.operators.LineTo(400, 750)) + layer.contents.append(ap.operators.Stroke()) + page.layers.append(layer) + + # Blue layer + layer = ap.Layer("oc3", "Blue Line") + layer.contents.append(ap.operators.SetRGBColorStroke(0, 0, 1)) + layer.contents.append(ap.operators.MoveTo(500, 800)) + layer.contents.append(ap.operators.LineTo(400, 800)) + layer.contents.append(ap.operators.Stroke()) + page.layers.append(layer) + + document.save(path_outfile) + print(f"\nLayers added successfully to PDF file.\nFile saved at {path_outfile}") + except Exception as e: + print(f"Error adding layers: {e}") +``` + +## Lock a PDF layer + +With Aspose.PDF for Python via .NET you can open a PDF, lock a specific layer on the first page, and save the document with the changes. + +This example shows how to lock a layer (Optional Content Group, OCG) in a PDF document using Aspose.PDF for Python via .NET. Locking prevents users from changing the visibility of the layer in a PDF viewer, ensuring that the content remains always visible (or hidden) as defined by the document. + +1. Open the PDF document. +1. Access the first page of the PDF. +1. Check if the page has layers. +1. Get the first layer and lock it. +1. Save the updated PDF. + +If the PDF contains layers, the first layer will be locked, ensuring its visibility state cannot be changed by the user. If no layers are found, a message is printed instead. + +```python + + from io import FileIO + import aspose.pdf as ap + from os import path + + path_infile = path.join(self.dataDir, infile) + path_outfile = path.join(self.dataDir, outfile) + + try: + document = ap.Document(path_infile) + page = document.pages[1] + + if len(page.layers) > 0: + layer = page.layers[0] + layer.lock() + document.save(path_outfile) + print(f"Layer locked successfully. File saved at {path_outfile}") + else: + print("No layers found in the document.") + except Exception as e: + print(f"Error locking layer: {e}") +``` + +## Extract PDF layer elements + +This example uses the Aspose.PDF for Python via .NET library to extract individual layers from the first page of a PDF document and save each layer as a separate PDF file. + +To create a new PDF from a layer, the following code snippet can be used: + +1. Load the PDF Document. The input PDF is loaded into an Aspose.PDF.Document object. +1. Access Layers on Page 1. The script retrieves all layers from the first page using document.pages[1].layers. +1. Check for Layers. If no layers are found, a message is printed and the function exits. +1. Iterate and Save Each Layer. + +```python + + from io import FileIO + import aspose.pdf as ap + from os import path + + path_infile = path.join(self.dataDir, infile) + path_outfile = path.join(self.dataDir, outfile) + + try: + document = ap.Document(path_infile) + layers = document.pages[1].layers + + if len(layers) == 0: + print("No layers found in the document.") + return + + index = 1 + for layer in layers: + output_file = path_outfile.replace(".pdf", f"{index}.pdf") + layer.save(output_file) + print(f"Layer {index} saved to {output_file}") + index += 1 + except Exception as e: + print(f"Error extracting layers: {e}") +``` + +It is possible to extract PDF layer elements and save them into a new PDF file stream: + +```python + + from io import FileIO + import aspose.pdf as ap + from os import path + + path_infile = path.join(self.dataDir, infile) + path_outfile = path.join(self.dataDir, outfile) + + try: + document = ap.Document(path_infile) + + if len(document.pages[1].layers) == 0: + print("No layers found in the document.") + return + + layer = document.pages[1].layers[0] + + with FileIO(path_outfile, "wb") as output_layer: + layer.save(output_layer) + print(f"Layer extracted to stream: {path_outfile}") + except Exception as e: + print(f"Error extracting layer to stream: {e}") +``` + +## Flatten a layered PDF + +This script uses Aspose.PDF for Python via .NET to flatten all layers on the first page of a PDF document. Flattening merges the visual content of each layer into one unified layer, making it easier to print, share, or archive without losing visual fidelity or layer-specific data. + +1. Load the PDF Document. The input PDF is loaded into an Aspose.PDF.Document object. +1. Access Layers on Page 1. The script retrieves all layers from the first page using document.pages[1].layers. +1. Check for Layer Presence. If no layers are found, a message is printed and the function exits. +1. Flatten Each Layer. Each layer is flattened using layer.flatten(True), which merges its content into the page. +1. Save the Modified Document. + +```python + + from io import FileIO + import aspose.pdf as ap + from os import path + + path_infile = path.join(self.dataDir, infile) + path_outfile = path.join(self.dataDir, outfile) + + try: + document = ap.Document(path_infile) + layers = document.pages[1].layers + + if len(layers) == 0: + print("No layers found in the document.") + return + + for layer in layers: + layer.flatten(True) + + document.save(path_outfile) + print(f"Layers flattened successfully. File saved at {path_outfile}") + except Exception as e: + print(f"Error flattening layers: {e}") +``` + +## Merge All Layers inside the PDF into one + +This code snippet uses Aspose.PDF to merge all layers on the first page of a PDF into one unified layer with a custom name. + +1. Load the PDF Document. The PDF is loaded into an Aspose.PDF.Document object. +1. Access Page and Its Layers. The first page is selected, and its layers are retrieved. +1. Check for Layers. If no layers exist, a message is printed and the process exits. +1. Define New Layer Name. A new layer name ("LayerNew") is specified for the merged result. +1. Merge Layers. If an optional content group ID is provided, it's used in the merge. Otherwise, layers are merged using just the new name. +1. Save the Document + +```python + + from io import FileIO + import aspose.pdf as ap + from os import path + + path_infile = path.join(self.dataDir, infile) + path_outfile = path.join(self.dataDir, outfile) + + try: + document = ap.Document(path_infile) + page = document.pages[1] + + if len(page.layers) == 0: + print("No layers found in the document.") + return + + new_layer_name = "LayerNew" + new_optional_content_group_id = None + + if new_optional_content_group_id: + page.merge_layers(new_layer_name, new_optional_content_group_id) + else: + page.merge_layers(new_layer_name) + + document.save(path_outfile) + print(f"Layers merged successfully. File saved at {path_outfile}") + except Exception as e: + print(f"Error merging layers: {e}") +``` \ No newline at end of file From d602f8977b3956bc5dee1b54f2dfddacbfc301c6 Mon Sep 17 00:00:00 2001 From: "Anastasiia.Holub" Date: Fri, 12 Sep 2025 10:47:11 +0300 Subject: [PATCH 24/65] Update documentation for converting files to PDF in Python --- .../converting/convert-html-to-pdf/_index.md | 138 ++++++++- .../convert-images-format-to-pdf/_index.md | 107 ++++--- .../convert-other-files-to-pdf/_index.md | 266 +++++++++--------- .../converting/convert-pdf-to-excel/_index.md | 164 ++++++----- 4 files changed, 417 insertions(+), 258 deletions(-) diff --git a/en/python-net/converting/convert-html-to-pdf/_index.md b/en/python-net/converting/convert-html-to-pdf/_index.md index b176da273..b18f7dffd 100644 --- a/en/python-net/converting/convert-html-to-pdf/_index.md +++ b/en/python-net/converting/convert-html-to-pdf/_index.md @@ -4,7 +4,7 @@ linktitle: Convert HTML to PDF file type: docs weight: 40 url: /python-net/convert-html-to-pdf/ -lastmod: "2025-02-27" +lastmod: "2025-09-27" description: Learn how to convert HTML content into a PDF document using Aspose.PDF for Python via .NET sitemap: changefreq: "monthly" @@ -34,22 +34,22 @@ _Format_: **HTML** The following Python code sample shows how to convert an HTML document to a PDF. 1. Create an instance of the [HtmlLoadOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/htmlloadoptions/) class. -2. Initialize [Document](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/) object. -3. Save output PDF document by calling **Document.Save()** method. +1. Initialize [Document](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/) object. +1. Save output PDF document by calling **document.save()** method. ```python - import aspose.pdf as apdf - from io import FileIO from os import path - import pydicom + import aspose.pdf as ap + import requests + import io - path_infile = path.join(self.dataDir, infile) - path_outfile = path.join(self.dataDir, "python", outfile) + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) - load_options = apdf.HtmlLoadOptions() - - document = apdf.Document(path_infile, load_options) + load_options = ap.HtmlLoadOptions() + load_options.page_layout_option = ap.HtmlPageLayoutOption.SCALE_TO_PAGE_WIDTH + document = ap.Document(path_infile, load_options) document.save(path_outfile) print(infile + " converted into " + outfile) ``` @@ -62,6 +62,122 @@ Aspose presents you online free application ["HTML to PDF"](https://products.asp [![Aspose.PDF Convertion HTML to PDF using Free App](html.png)](https://products.aspose.app/html/en/conversion/html-to-pdf) {{% /alert %}} +## Convert HTML to PDF media type + +1. Create an instance of the [HtmlLoadOptions()](https://reference.aspose.com/pdf/python-net/aspose.pdf/htmlloadoptions/) class. 'html_media_type' applies CSS rules intended for on-screen display. +1. Load and convert HTML. +1. Save output PDF document by calling **document.save()** method. + +```python + + from os import path + import aspose.pdf as ap + import requests + import io + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) + + load_options = ap.HtmlLoadOptions() + load_options.html_media_type = ap.HtmlMediaType.SCREEN + document = ap.Document(path_infile, load_options) + document.save(path_outfile) + print(infile + " converted into " + outfile) +``` + +## Convert HTML to PDF priority CSS Page Rule + +1. Create an instance of the [HtmlLoadOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/htmlloadoptions/) class. +1. Optionally sets how CSS is applied. The commented line 'is_priority_css_page_rule' can override how CSS rules affect page layout. +1. Converts and saves it as a PDF. + +```python + from os import path + import aspose.pdf as ap + import requests + import io + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) + load_options = ap.HtmlLoadOptions() + # load_options.is_priority_css_page_rule = False + document = ap.Document(path_infile, load_options) + document.save(path_outfile) + print(infile + " converted into " + outfile) +``` + +## Convert HTML to PDF Embed Fonts + +1. Loads an HTML file. +1. Embeds fonts into the resulting PDF to preserve appearance across devices. +1. Saves the PDF to a specified output location. + +```python + + from os import path + import aspose.pdf as ap + import requests + import io + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) + + load_options = ap.HtmlLoadOptions() + load_options.is_embed_fonts = True + document = ap.Document(path_infile, load_options) + document.save(path_outfile) + print(infile + " converted into " + outfile) +``` + +## Convert HTML to PDF Render Content to same Page + +Aspose.PDF for Python via .NET to convert an HTML file into a single-page PDF. + +1. Loads an HTML file. +1. Configures Aspose to render everything on a single PDF page. +1. Saves the PDF to a specified location. + +```python + + from os import path + import aspose.pdf as ap + import requests + import io + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) + + options = ap.HtmlLoadOptions() + options.is_render_to_single_page = True + + doc = ap.Document(path_infile, options) + doc.save(path_outfile) +``` + +## Convert MHTML to PDF + +Loads an MHT (MHTML) file from disk and converts it into a PDF using Aspose.PDF for Pyhton via .NET. Allows specifying custom page dimensions to ensure a consistent layout. With this operation, you can support embedded HTML, CSS, and images in MHT files, as well as Configurable page width and height, and save the converted document to a specified output folder. + +1. Loads an MHT (MHTML) file. +1. Sets the page size for the converted document. +1. Converts the content into PDF (or another format) using Aspose.PDF. +1. Saves the file to a specific folder and prints a confirmation. + +```python + + from os import path + import aspose.pdf as ap + import requests + import io + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) + load_options = ap.MhtLoadOptions() + load_options.page_info.width = 842 + load_options.page_info.height= 1191 + document = ap.Document(path_infile, load_options) + document.save(path_outfile) + print(infile + " converted into " + outfile) +``` \ No newline at end of file diff --git a/en/python-net/converting/convert-images-format-to-pdf/_index.md b/en/python-net/converting/convert-images-format-to-pdf/_index.md index 43bf507ec..831a73d6e 100644 --- a/en/python-net/converting/convert-images-format-to-pdf/_index.md +++ b/en/python-net/converting/convert-images-format-to-pdf/_index.md @@ -4,7 +4,7 @@ linktitle: Convert Images to PDF type: docs weight: 60 url: /python-net/convert-images-format-to-pdf/ -lastmod: "2025-03-01" +lastmod: "2025-09-01" description: Convert various images formats such as, BMP, CGM, DICOM, PNG, TIFF, EMF and SVG to PDF using Python. sitemap: changefreq: "monthly" @@ -72,28 +72,31 @@ You can convert BMP to PDF files with Aspose.PDF for Python via .NET API. Theref Steps to Convert BMP to PDF in Python: -1. Create a new PDF Document -1. Define Image Placement -1. Insert the Image into the PDF -1. Save the PDF file +1. Creates an empty PDF document. +1. Adds a new A4-sized page. +1. Places the image (from infile) inside the page using the defined rectangle. +1. Saves the document as PDF. So the following code snippet follows these steps and shows how to convert BMP to PDF using Python: ```python - import aspose.pdf as apdf from io import FileIO from os import path - import pydicom + import os + import shutil + import aspose.pdf as apdf + import inspect - path_infile = path.join(self.dataDir, infile) - path_outfile = path.join(self.dataDir, "python", outfile) + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) document = apdf.Document() - page = document.pages.add() rectangle = apdf.Rectangle(0, 0, 595, 842, True) # A4 size in points page.add_image(path_infile, rectangle) document.save(path_outfile) + + print(infile + " converted into " + outfile) ``` {{% alert color="success" %}} @@ -106,6 +109,8 @@ Aspose presents you online free application ["BMP to PDF"](https://products.aspo ## Convert CGM to PDF +Converts a CGM (Computer Graphics Metafile) into PDF (or another supported format) using Aspose.PDF for Python via .NET. + CGM is a file extension for a Computer Graphics Metafile format commonly used in CAD (computer-aided design) and presentation graphics applications. CGM is a vector graphics format that supports three different encoding methods: binary (best for program read speed), character-based (produces the smallest file size and allows for faster data transfers) or cleartext encoding (allows users to read and modify the file with a text editor). Check next code snippet for converting CGM files to PDF format. @@ -119,14 +124,15 @@ Steps to Convert CGM to PDF in Python: ```python - import aspose.pdf as apdf from io import FileIO from os import path - import pydicom + import os + import shutil + import aspose.pdf as apdf + import inspect - # The path to the documents directory - path_infile = path.join(self.dataDir, infile) - path_outfile = path.join(self.dataDir, "python", outfile) + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) options = apdf.CgmLoadOptions() @@ -144,15 +150,29 @@ Steps to Convert CGM to PDF in Python: The following code snippet shows how to convert DICOM files to PDF format with Aspose.PDF. You should load DICOM image, place the image on a page in a PDF file and save the output as PDF. +1. Load the DICOM file. +1. Extract image dimensions. +1. Print image size. +1. Create a new PDF document. +1. Prepare the DICOM image for PDF. +1. Set PDF page size and margins. +1. Add the image to the page. +1. Save the PDF. +1. Print Conversion Message. + ```python - import aspose.pdf as apdf from io import FileIO from os import path + import os + import shutil + import aspose.pdf as apdf + import inspect import pydicom - path_infile = path.join(self.dataDir, infile) - path_outfile = path.join(self.dataDir, "python", outfile) + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) # Load the DICOM file dicom_file = pydicom.dcmread(path_infile) @@ -201,13 +221,16 @@ The following code snippet shows how to convert an EMF to PDF with Python: ```python - import aspose.pdf as apdf from io import FileIO from os import path + import os + import shutil + import aspose.pdf as apdf + import inspect import pydicom - path_infile = path.join(self.dataDir, infile) - path_outfile = path.join(self.dataDir, "python", outfile) + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) document = apdf.Document() page = document.pages.add() @@ -238,13 +261,16 @@ So the following code snippet follows these steps and shows how to convert BMP t ```python - import aspose.pdf as apdf from io import FileIO from os import path + import os + import shutil + import aspose.pdf as apdf + import inspect import pydicom - path_infile = path.join(self.dataDir, infile) - path_outfile = path.join(self.dataDir, "python", outfile) + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) document = apdf.Document() page = document.pages.add() @@ -280,13 +306,16 @@ Moreover, the code snippet below shows how to convert PNG to PDF with Python: ```python - import aspose.pdf as apdf from io import FileIO from os import path + import os + import shutil + import aspose.pdf as apdf + import inspect import pydicom - path_infile = path.join(self.dataDir, infile) - path_outfile = path.join(self.dataDir, "python", outfile) + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) document = apdf.Document() page = document.pages.add() @@ -325,13 +354,16 @@ The following code snippet shows the process of converting SVG file into PDF for ```python - import aspose.pdf as apdf from io import FileIO from os import path + import os + import shutil + import aspose.pdf as apdf + import inspect import pydicom - path_infile = path.join(self.dataDir, infile) - path_outfile = path.join(self.dataDir, "python", outfile) + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) load_options = apdf.SvgLoadOptions() document = apdf.Document(path_infile, load_options) @@ -350,13 +382,16 @@ You can convert TIFF to PDF in the same manner as the rest raster file formats g ```python - import aspose.pdf as apdf from io import FileIO from os import path + import os + import shutil + import aspose.pdf as apdf + import inspect import pydicom - path_infile = path.join(self.dataDir, infile) - path_outfile = path.join(self.dataDir, "python", outfile) + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) document = apdf.Document() page = document.pages.add() @@ -367,6 +402,12 @@ You can convert TIFF to PDF in the same manner as the rest raster file formats g print(infile + " converted into " + outfile) ``` +## Convert CDR to PDF + + +## Convert JPEG to PDF + + ## See Also This article also covers these topics. The codes are same as above. diff --git a/en/python-net/converting/convert-other-files-to-pdf/_index.md b/en/python-net/converting/convert-other-files-to-pdf/_index.md index b1625533f..d44bb5e5b 100644 --- a/en/python-net/converting/convert-other-files-to-pdf/_index.md +++ b/en/python-net/converting/convert-other-files-to-pdf/_index.md @@ -4,7 +4,7 @@ linktitle: Convert other file formats to PDF type: docs weight: 80 url: /python-net/convert-other-files-to-pdf/ -lastmod: "2025-03-01" +lastmod: "2025-09-01" description: This topic show you how to Aspose.PDF allows to convert other file formats such as EPUB, MD, PCL, XPS, PS, XML and LaTeX to PDF document. sitemap: changefreq: "monthly" @@ -18,6 +18,11 @@ Abstract: This article provides a comprehensive guide on converting various file This article explains how to **convert various other types of file formats to PDF using Python**. It covers the following topics. +_Format_: **OFD** +- [Python OFD to PDF](#python-convert-ofd-to-pdf) +- [Python Convert OFD to PDF](#python-convert-ofd-to-pdf) +- [Python How to convert OFD file to PDF](#python-convert-ofd-to-pdf) + _Format_: **EPUB** - [Python EPUB to PDF](#python-convert-epub-to-pdf) - [Python Convert EPUB to PDF](#python-convert-epub-to-pdf) @@ -53,6 +58,64 @@ _Format_: **XPS** - [Python Convert XPS to PDF](#python-convert-xps-to-pdf) - [Python How to convert XPS file to PDF](#python-convert-xps-to-pdf) +## Convert OFD to PDF + +OFD stands for Open Fixed-layout Document (sometimes called Open Fixed Document format). It is a Chinese national standard (GB/T 33190-2016) for electronic documents, introduced as an alternative to PDF. + +Steps Convert OFD to PDF in Python: + +1. Set up OFD load options using OfdLoadOptions(). +1. Load the OFD document. +1. Save as PDF. + +```python + + from os import path + import aspose.pdf as ap + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) + + load_options = ap.OfdLoadOptions() + document = ap.Document(path_infile, load_options) + document.save(path_outfile) + + print(infile + " converted into " + outfile) +``` + +## Convert LaTeX/TeX to PDF + +The LaTeX file format is a text file format with markup in the LaTeX derivative of the TeX family of languages and LaTeX is a derived format of the TeX system. LaTeX (ˈleɪtɛk/lay-tek or lah-tek) is a document preparation system and document markup language. It is widely used for the communication and publication of scientific documents in many fields, including mathematics, physics, and computer science. It also has a prominent role in the preparation and publication of books and articles that contain complex multilingual materials, such as Sanskrit and Arabic, including critical editions. LaTeX uses the TeX typesetting program for formatting its output, and is itself written in the TeX macro language. + +{{% alert color="success" %}} +**Try to convert LaTeX/TeX to PDF online** + +Aspose.PDF for Python via .NET presents you online free application ["LaTex to PDF"](https://products.aspose.app/pdf/conversion/tex-to-pdf), where you may try to investigate the functionality and quality it works. + +[![Aspose.PDF Convertion LaTeX/TeX to PDF with Free App](latex.png)](https://products.aspose.app/pdf/conversion/tex-to-pdf) +{{% /alert %}} + +Steps Convert TEX to PDF in Python: + +1. Set up LaTeX load options using LatexLoadOptions(). +1. Load the LaTeX document. +1. Save as PDF. + +```python + + from os import path + import aspose.pdf as ap + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) + + load_options = ap.LatexLoadOptions() + document = ap.Document(path_infile, load_options) + document.save(path_outfile) + + print(infile + " converted into " + outfile) +``` + ## Convert EPUB to PDF **Aspose.PDF for Python via .NET** allows you simply convert EPUB files to PDF format. @@ -71,24 +134,22 @@ Aspose.PDF for Python via .NET presents you online free application ["EPUB to PD Steps Convert EPUB to PDF in Python: -1. Load EPUB Document -1. Convert EPUB to PDF -1. Print Confirmation +1. Load EPUB Document with EpubLoadOptions(). +1. Convert EPUB to PDF. +1. Print Confirmation. Next following code snippet show you how to convert EPUB files to PDF format with Python. ```python - import aspose.pdf as apdf - from io import FileIO from os import path - import pydicom + import aspose.pdf as ap - path_infile = path.join(self.dataDir, infile) - path_outfile = path.join(self.dataDir, "python", outfile) + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) - load_options = apdf.EpubLoadOptions() - document = apdf.Document(path_infile, load_options) + load_options = ap.EpubLoadOptions() + document = ap.Document(path_infile, load_options) document.save(path_outfile) print(infile + " converted into " + outfile) @@ -112,16 +173,14 @@ The following code snippet shows how to use this functionality with Aspose.PDF l ```python - import aspose.pdf as apdf - from io import FileIO from os import path - import pydicom + import aspose.pdf as ap - path_infile = path.join(self.dataDir, infile) - path_outfile = path.join(self.dataDir, "python", outfile) + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) - load_options = apdf.MdLoadOptions() - document = apdf.Document(path_infile, load_options) + load_options = ap.MdLoadOptions() + document = ap.Document(path_infile, load_options) document.save(path_outfile) print(infile + " converted into " + outfile) ``` @@ -142,20 +201,24 @@ To allow conversion from PCL to PDF, Aspose.PDF has the class [`PclLoadOptions`] The following code snippet shows the process of converting a PCL file into PDF format. +Steps Convert PCL to PDF in Python: + +1. Load options for PCL using PclLoadOptions(). +1. Load the document. +1. Save as PDF. + ```python - import aspose.pdf as apdf - from io import FileIO from os import path - import pydicom + import aspose.pdf as ap - path_infile = path.join(self.dataDir, infile) - path_outfile = path.join(self.dataDir, "python", outfile) + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) - load_options = apdf.PclLoadOptions() + load_options = ap.PclLoadOptions() load_options.supress_errors = True - document = apdf.Document(path_infile, load_options) + document = ap.Document(path_infile, load_options) document.save(path_outfile) print(infile + " converted into " + outfile) @@ -175,40 +238,51 @@ Aspose.PDF for Python via .NET presents you online free application ["Text to PD [![Aspose.PDF Convertion TEXT to PDF with Free App](text_to_pdf.png)](https://products.aspose.app/pdf/conversion/txt-to-pdf) {{% /alert %}} +Steps Convert TEXT to PDF in Python: + +1. Read the text file. +1. Set up font. +1. Create PDF and first page. +1. Page formatting. +1. Loop through lines. +1. Save PDF. + ```python - import aspose.pdf as apdf - from io import FileIO from os import path - import pydicom + import aspose.pdf as ap - path_infile = path.join(self.dataDir, infile) - path_outfile = path.join(self.dataDir, "python", outfile) + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) with open(path_infile, "r") as file: lines = file.readlines() - monospace_font = apdf.text.FontRepository.find_font("Courier New") + monospace_font = ap.text.FontRepository.find_font("Courier New") - document = apdf.Document() + document = ap.Document() page = document.pages.add() page.page_info.margin.left = 20 page.page_info.margin.right = 10 page.page_info.default_text_state.font = monospace_font page.page_info.default_text_state.font_size = 12 - + count = 1 for line in lines: if line != "" and line[0] == "\x0c": - page = document.Pages.Add() + page = document.pages.add() page.page_info.margin.left = 20 page.page_info.margin.right = 10 - page.page_info.defaultTextState.Font = monospace_font - page.page_info.defaulttextstate.FontSize = 12 + page.page_info.default_text_state.font = monospace_font + page.page_info.default_text_state.font_size = 12 + count = count + 1 else: - text = apdf.text.TextFragment(line) + text = ap.text.TextFragment(line) page.paragraphs.add(text) + if count == 4: + break + document.save(path_outfile) print(infile + " converted into " + outfile) @@ -224,16 +298,14 @@ The following code snippet shows the process of converting XPS file into PDF for ```python - import aspose.pdf as apdf - from io import FileIO from os import path - import pydicom + import aspose.pdf as ap - path_infile = path.join(self.dataDir, infile) - path_outfile = path.join(self.dataDir, "python", outfile) + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) - load_options = apdf.XpsLoadOptions() - document = apdf.Document(path_infile, load_options) + load_options = ap.XpsLoadOptions() + document = ap.Document(path_infile, load_options) document.save(path_outfile) print(infile + " converted into " + outfile) @@ -242,116 +314,30 @@ The following code snippet shows the process of converting XPS file into PDF for {{% alert color="success" %}} **Try to convert XPS format to PDF online** -Aspose.PDF for Python via .NET presents you online free application ["XPS to PDF"](https://products.aspose.app/pdf/conversion/xps-to-pdf/), where you may try to investigate the functionality and quality it works. +Aspose.PDF for Python via .NET presents you online free appliPostScriptcation ["XPS to PDF"](https://products.aspose.app/pdf/conversion/xps-to-pdf/), where you may try to investigate the functionality and quality it works. [![Aspose.PDF Convertion XPS to PDF with Free App](xps_to_pdf.png)](https://products.aspose.app/pdf/conversion/xps-to-pdf/) {{% /alert %}} -## Convert PostScript to PDF - -**Aspose.PDF for Python via .NET** support features converting PostScript files to PDF format. One of the features from Aspose.PDF is that you can set a set of font folders to be used during conversion. - -Following code snippet can be used to convert a PostScript file into PDF format with Aspose.PDF for Python via .NET: - -```python - - import aspose.pdf as apdf - from io import FileIO - from os import path - import pydicom - - path_infile = path.join(self.dataDir, infile) - path_outfile = path.join(self.dataDir, "python", outfile) - - load_options = apdf.PsLoadOptions() - document = apdf.Document(path_infile, load_options) - document.save(path_outfile) - - print(infile + " converted into " + outfile) -``` - -## Convert XML to PDF - -The XML format used to store structured data. There are several ways to convert XML to PDF in Aspose.PDF: - -{{% alert color="success" %}} -**Try to convert XML to PDF online** - -Aspose.PDF for Python via .NET presents you online free application ["XML to PDF"](https://products.aspose.app/pdf/conversion/xml-to-pdf), where you may try to investigate the functionality and quality it works. - -[![Aspose.PDF Convertion XML to PDF with Free App](xml_to_pdf.png)](https://products.aspose.app/pdf/conversion/xml-to-pdf) -{{% /alert %}} - -Following code snippet can be used to convert a XML to PDF format with Aspose.PDF for Python: - -```python - - import aspose.pdf as apdf - from io import FileIO - from os import path - import pydicom - - path_infile = path.join(self.dataDir, infile) - path_outfile = path.join(self.dataDir, "python", outfile) - - load_options = apdf.XmlLoadOptions("template.xslt") - document = apdf.Document(path_infile, load_options) - document.save(path_outfile) - - print(infile + " converted into " + outfile) -``` - ## Convert XSL-FO to PDF Following code snippet can be used to convert a XSLFO to PDF format with Aspose.PDF for Python via .NET: ```python - import aspose.pdf as apdf - from io import FileIO from os import path - import pydicom + import aspose.pdf as ap - path_xsltfile = path.join(self.dataDir, xsltfile) - path_xmlfile = path.join(self.dataDir, xmlfile) - path_outfile = path.join(self.dataDir, "python", outfile) + path_xsltfile = path.join(self.data_dir, xsltfile) + path_xmlfile = path.join(self.data_dir, xmlfile) + path_outfile = path.join(self.data_dir, "python", outfile) - load_options = apdf.XslFoLoadOptions(path_xsltfile) - load_options.parsing_errors_handling_type = apdf.XslFoLoadOptions.ParsingErrorsHandlingTypes.ThrowExceptionImmediately - document = apdf.Document(path_xmlfile, load_options) + load_options = ap.XslFoLoadOptions(path_xsltfile) + load_options.parsing_errors_handling_type = ( + ap.XslFoLoadOptions.ParsingErrorsHandlingTypes.ThrowExceptionImmediately + ) + document = ap.Document(path_xmlfile, load_options) document.save(path_outfile) print(xmlfile + " converted into " + outfile) - ``` - -## Convert LaTeX/TeX to PDF - -The LaTeX file format is a text file format with markup in the LaTeX derivative of the TeX family of languages and LaTeX is a derived format of the TeX system. LaTeX (ˈleɪtɛk/lay-tek or lah-tek) is a document preparation system and document markup language. It is widely used for the communication and publication of scientific documents in many fields, including mathematics, physics, and computer science. It also has a prominent role in the preparation and publication of books and articles that contain complex multilingual materials, such as Sanskrit and Arabic, including critical editions. LaTeX uses the TeX typesetting program for formatting its output, and is itself written in the TeX macro language. - -{{% alert color="success" %}} -**Try to convert LaTeX/TeX to PDF online** - -Aspose.PDF for Python via .NET presents you online free application ["LaTex to PDF"](https://products.aspose.app/pdf/conversion/tex-to-pdf), where you may try to investigate the functionality and quality it works. - -[![Aspose.PDF Convertion LaTeX/TeX to PDF with Free App](latex.png)](https://products.aspose.app/pdf/conversion/tex-to-pdf) -{{% /alert %}} - -The following code snippet shows the process of converting LaTex file to PDF format with Python. - -```python - - import aspose.pdf as apdf - from io import FileIO - from os import path - import pydicom - - path_infile = path.join(self.dataDir, infile) - path_outfile = path.join(self.dataDir, "python", outfile) - - load_options = apdf.LatexLoadOptions() - document = apdf.Document(path_infile, load_options) - document.save(path_outfile) - - print(infile + " converted into " + outfile) -``` \ No newline at end of file diff --git a/en/python-net/converting/convert-pdf-to-excel/_index.md b/en/python-net/converting/convert-pdf-to-excel/_index.md index 277ae11c1..6432a3c42 100644 --- a/en/python-net/converting/convert-pdf-to-excel/_index.md +++ b/en/python-net/converting/convert-pdf-to-excel/_index.md @@ -4,7 +4,7 @@ linktitle: Convert PDF to Excel type: docs weight: 20 url: /python-net/convert-pdf-to-excel/ -lastmod: "2025-02-27" +lastmod: "2025-09-27" description: Convert PDFs to Excel spreadsheets effortlessly with Aspose.PDF for Python via .NET. Follow this guide for accurate PDF to XLSX conversions sitemap: changefreq: "monthly" @@ -64,21 +64,19 @@ Aspose.PDF presents you online free application ["PDF to XLSX"](https://products The following code snippet shows the process for converting PDF file into XLS or XLSX format with Aspose.PDF for Python via .NET. -Steps: Convert PDF to XLS in Python +Steps: Convert a PDF file to an Excel (XML Spreadsheet 2003) format -1. Create an instance of [Document](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/) object with the source PDF document. -2. Create an instance of [ExcelSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/excelsaveoptions/). -3. Save it to **XLS** format specifying **.xls extension** by calling [Document.Save()](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/#methods) method and passing it [ExcelSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/excelsaveoptions/). +1. Load the PDF document. +1. Set up Excel save options using [ExcelSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/excelsaveoptions/). +1. Save the converted file. ```python - import aspose.pdf as apdf - from io import FileIO from os import path - import pydicom + import aspose.pdf as apdf - path_infile = path.join(self.dataDir, infile) - path_outfile = path.join(self.dataDir, "python", outfile) + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) document = apdf.Document(path_infile) save_options = apdf.ExcelSaveOptions() @@ -88,104 +86,123 @@ The following code snippet shows the process for converting PDF file into XLS or print(infile + " converted into " + outfile) ``` +Steps: Convert a PDF file to an XLSX format (Excel 2007+) -Steps: Convert PDF to XLSX in Python - -1. Create an instance of [Document](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/) object with the source PDF document. -2. Create an instance of [ExcelSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/excelsaveoptions/). -3. Save it to **XLSX** format specifying **.xlsx extension** by calling [save()](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/#methods) method and passing it [ExcelSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/excelsaveoptions/). +1. Load the PDF document. +1. Set up Excel save options using [ExcelSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/excelsaveoptions/). +1. Save the converted file. ```python - import aspose.pdf as ap + from os import path + import aspose.pdf as apdf - input_pdf = DIR_INPUT + "sample.pdf" - output_pdf = DIR_OUTPUT + "convert_pdf_to_xlsx.xlsx" - # Open PDF document - document = ap.Document(input_pdf) + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) - save_option = ap.ExcelSaveOptions() + document = apdf.Document(path_infile) + save_options = apdf.ExcelSaveOptions() + save_options.format = apdf.ExcelSaveOptions.ExcelFormat.XLSX + document.save(path_outfile, save_options) - # Save the file into MS Excel format - document.save(output_pdf, save_option) + print(infile + " converted into " + outfile) ``` ## Convert PDF to XLS with control Column -When converting a PDF to XLS format, a blank column is added to the output file as first column. The in 'ExcelSaveOptions class' InsertBlankColumnAtFirst option is used to control this column. Its default value is true. +When converting a PDF to XLS format, a blank column is added to the output file as first column. The in 'ExcelSaveOptions class' 'insert_blank_column_at_first' option is used to control this column. Its default value is true. ```python - import aspose.pdf as ap + from os import path + import aspose.pdf as apdf - input_pdf = DIR_INPUT + "sample.pdf" - output_pdf = DIR_OUTPUT + "convert_pdf_to_xlsx_with_control_column.xls" - # Open PDF document - document = ap.Document(input_pdf) + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) - save_option = ap.ExcelSaveOptions() - save_option.format = ap.ExcelSaveOptions.ExcelFormat.XML_SPREAD_SHEET2003 - save_option.insert_blank_column_at_first = True + document = apdf.Document(path_infile) + save_options = apdf.ExcelSaveOptions() + save_options.format = apdf.ExcelSaveOptions.ExcelFormat.XLSX + save_options.insert_blank_column_at_first = True + document.save(path_outfile, save_options) - # Save the file into MS Excel format - document.save(output_pdf, save_option) + print(infile + " converted into " + outfile) ``` ## Convert PDF to Single Excel Worksheet -When exporting a PDF file with a lot of pages to XLS, each page is exported to a different sheet in the Excel file. This is because the MinimizeTheNumberOfWorksheets property is set to false by default. To ensure that all pages are exported to one single sheet in the output Excel file, set the MinimizeTheNumberOfWorksheets property to true. +Aspose.PDF for Python via .NET shows how to convert a PDF to an Excel (.xlsx) file, with the 'minimize_the_number_of_worksheets' option enabled. Steps: Convert PDF to XLS or XLSX Single Worksheet in Python -1. Create an instance of [Document](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/) object with the source PDF document. -2. Create an instance of [ExcelSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/excelsaveoptions/) with **MinimizeTheNumberOfWorksheets = true**. -3. Save it to **XLS** or **XLSX** format having single worksheet by calling [save()](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/#methods) method and passing it [ExcelSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/excelsaveoptions/). +1. Load the PDF document. +1. Set up Excel save options using [ExcelSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/excelsaveoptions/). +1. The 'minimize_the_number_of_worksheets' option reduces the number of Excel sheets by combining PDF pages into fewer worksheets (e.g., one worksheet for the entire document if possible). +1. Save the converted file. ```python - import aspose.pdf as ap + from os import path + import aspose.pdf as apdf + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) + + document = apdf.Document(path_infile) + save_options = apdf.ExcelSaveOptions() + save_options.format = apdf.ExcelSaveOptions.ExcelFormat.XLSX + save_options.minimize_the_number_of_worksheets = True + document.save(path_outfile, save_options) + + print(infile + " converted into " + outfile) +``` - input_pdf = DIR_INPUT + "many_pages.pdf" - output_pdf = DIR_OUTPUT + "convert_pdf_to_xlsx_single_excel_worksheet.xls" - # Open PDF document - document = ap.Document(input_pdf) +## Convert PDF file into an Excel file in XLSM format - save_option = ap.ExcelSaveOptions() - save_option.format = ap.ExcelSaveOptions.ExcelFormat.XML_SPREAD_SHEET2003 - save_option.minimize_the_number_of_worksheets = True +This Python example shows how to convert a PDF file into an Excel file in XLSM format (Excel Macro-Enabled Workbook). + +```python + + from os import path + import aspose.pdf as apdf - # Save the file into MS Excel format - document.save(output_pdf, save_option) + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) + document = apdf.Document(path_infile) + save_options = apdf.ExcelSaveOptions() + save_options.format = apdf.ExcelSaveOptions.ExcelFormat.XLSM + document.save(path_outfile, save_options) + + print(infile + " converted into " + outfile) ``` ## Convert to other spreadsheet formats ### Convert to CSV -Conversion to CSV format performs in the same way as above. All is what you need - set the appropriate format. +The 'convert_pdf_to_excel_2007_csv' function performs the same operation as before, but this time the target format is CSV (Comma-Separated Values) instead of XLSM. Steps: Convert PDF to CSV in Python 1. Create an instance of [Document](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/) object with the source PDF document. -2. Create an instance of [ExcelSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/excelsaveoptions/) with **Format = ExcelSaveOptions.ExcelFormat.CSV** -3. Save it to **CSV** format by calling [save()](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/#methods)* method and passing it [ExcelSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/excelsaveoptions/). - +1. Create an instance of [ExcelSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/excelsaveoptions/) with **ExcelSaveOptions.ExcelFormat.CSV** +1. Save it to **CSV** format by calling [save()](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/#methods)* method and passing it [ExcelSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/excelsaveoptions/). ```python - import aspose.pdf as ap + from os import path + import aspose.pdf as apdf - input_pdf = DIR_INPUT + "sample.pdf" - output_pdf = DIR_OUTPUT + "convert_pdf_to_csv.csv" - # Open PDF document - document = ap.Document(input_pdf) + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) - save_option = ap.ExcelSaveOptions() - save_option.format = ap.ExcelSaveOptions.ExcelFormat.CSV + document = apdf.Document(path_infile) + save_options = apdf.ExcelSaveOptions() + save_options.format = apdf.ExcelSaveOptions.ExcelFormat.CSV + document.save(path_outfile, save_options) - # Save the file - document.save(output_pdf, save_option) + print(infile + " converted into " + outfile) ``` ### Convert to ODS @@ -193,29 +210,28 @@ Conversion to CSV format performs in the same way as above. All is what you need Steps: Convert PDF to ODS in Python 1. Create an instance of [Document](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/) object with the source PDF document. -2. Create an instance of [ExcelSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/excelsaveoptions/) with **Format = ExcelSaveOptions.ExcelFormat.ODS** -3. Save it to **ODS** format by calling [save()](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/#methods) method and passing it [ExcelSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/excelsaveoptions/). - +1. Create an instance of [ExcelSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/excelsaveoptions/) with **ExcelSaveOptions.ExcelFormat.ODS** +1. Save it to **ODS** format by calling [save()](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/#methods) method and passing it [ExcelSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/excelsaveoptions/). Conversion to ODS format performs in the same way as all other formats. ```python - import aspose.pdf as ap + from os import path + import aspose.pdf as apdf - input_pdf = DIR_INPUT + "sample.pdf" - output_pdf = DIR_OUTPUT + "convert_pdf_to_ods.ods" - # Open PDF document - document = ap.Document(input_pdf) + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) - save_option = ap.ExcelSaveOptions() - save_option.format = ap.ExcelSaveOptions.ExcelFormat.ODS + document = apdf.Document(path_infile) + save_options = apdf.ExcelSaveOptions() + save_options.format = apdf.ExcelSaveOptions.ExcelFormat.ODS + document.save(path_outfile, save_options) - # Save the file - document.save(output_pdf, save_option) + print(infile + " converted into " + outfile) ``` -## See Also +## See Also This article also covers these topics. The codes are same as above. From 4d1da0f9952ac8840de103e41e8557f07c8bfcc3 Mon Sep 17 00:00:00 2001 From: "Anastasiia.Holub" Date: Fri, 12 Sep 2025 11:04:53 +0300 Subject: [PATCH 25/65] chore: Add code examples for converting CDR and JPEG images to PDF in Python --- .../convert-images-format-to-pdf/_index.md | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/en/python-net/converting/convert-images-format-to-pdf/_index.md b/en/python-net/converting/convert-images-format-to-pdf/_index.md index 831a73d6e..9671e1a13 100644 --- a/en/python-net/converting/convert-images-format-to-pdf/_index.md +++ b/en/python-net/converting/convert-images-format-to-pdf/_index.md @@ -404,10 +404,51 @@ You can convert TIFF to PDF in the same manner as the rest raster file formats g ## Convert CDR to PDF +```python + + from io import FileIO + from os import path + import os + import shutil + import aspose.pdf as apdf + import inspect + import pydicom + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) + + load_options = apdf.CdrLoadOptions() + document = apdf.Document(path_infile, load_options) + document.save(path_outfile) + + print(infile + " converted into " + outfile) +``` ## Convert JPEG to PDF +```python + + from io import FileIO + from os import path + import os + import shutil + import aspose.pdf as apdf + import inspect + import pydicom + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) + + document = apdf.Document() + page = document.pages.add() + rectangle = apdf.Rectangle(0, 0, 595, 842, True) # A4 size in points + page.add_image(path_infile, rectangle) + + document.save(path_outfile) + print(infile + " converted into " + outfile) +``` + ## See Also This article also covers these topics. The codes are same as above. From 75581022eec1c2727150076d5396d62050485ba3 Mon Sep 17 00:00:00 2001 From: AnHolub <68945813+AnHolub@users.noreply.github.com> Date: Sun, 14 Sep 2025 10:02:03 +0300 Subject: [PATCH 26/65] Add code examples for converting --- .../convert-images-format-to-pdf/_index.md | 13 ++ .../converting/convert-pdf-to-html/_index.md | 211 +++++++++++++++++- .../convert-pdf-to-images-format/_index.md | 91 ++++---- .../convert-pdf-to-powerpoint/_index.md | 72 ++++-- .../converting/convert-pdf-to-word/_index.md | 63 ++---- 5 files changed, 321 insertions(+), 129 deletions(-) diff --git a/en/python-net/converting/convert-images-format-to-pdf/_index.md b/en/python-net/converting/convert-images-format-to-pdf/_index.md index 9671e1a13..a425e0e7d 100644 --- a/en/python-net/converting/convert-images-format-to-pdf/_index.md +++ b/en/python-net/converting/convert-images-format-to-pdf/_index.md @@ -404,6 +404,12 @@ You can convert TIFF to PDF in the same manner as the rest raster file formats g ## Convert CDR to PDF +Next code snippet shows how to load a CorelDRAW (CDR) file and save it as a PDF using 'CdrLoadOptions' in Aspose.PDF for Python via .NET. + +1. Initialize load options for CDR format. +1. Load CDR file into Document object. +1. Save the document in PDF format. + ```python from io import FileIO @@ -426,6 +432,13 @@ You can convert TIFF to PDF in the same manner as the rest raster file formats g ## Convert JPEG to PDF +This example shows how to create a new PDF document, add a blank A4-sized page, and insert an image into it using Aspose.PDF for Python via .NET. + +1. Create a new PDF document. +1. Add a new page. +1. Define the placement rectangle (A4 size: 595x842 points). +1. Insert the image into the page. +1. Save the PDF. ```python diff --git a/en/python-net/converting/convert-pdf-to-html/_index.md b/en/python-net/converting/convert-pdf-to-html/_index.md index 8dda7a16d..2cc2a4361 100644 --- a/en/python-net/converting/convert-pdf-to-html/_index.md +++ b/en/python-net/converting/convert-pdf-to-html/_index.md @@ -4,8 +4,8 @@ linktitle: Convert PDF to HTML format type: docs weight: 50 url: /python-net/convert-pdf-to-html/ -lastmod: "2025-02-27" -description: This topic show you how to convert PDF file to HTML format with Aspose.PDF for Python .NET library. +lastmod: "2025-09-27" +description: This topic show you how to convert PDF file to HTML format with Aspose.PDF for Python via .NET library. sitemap: changefreq: "monthly" priority: 0.8 @@ -23,7 +23,6 @@ _Format_: **HTML** - [Python Convert PDF to HTML](#python-pdf-to-html) - [Python How to convert PDF file to HTML](#python-pdf-to-html) - ## Convert PDF to HTML **Aspose.PDF for Python via .NET** provides many features for converting various file formats into PDF documents and converting PDF files into various output formats. This article discusses how to convert a PDF file into HTML. You can use just a couple of lines of code Python for converting PDF To HTML. You may need to convert PDF to HTML if you want to create a website or add content to an online forum. One way to convert PDF to HTML is to programmatically use Python. @@ -43,26 +42,218 @@ Aspose.PDF for Python presents you online free application ["PDF to HTML"](https ```python + from os import path import aspose.pdf as apdf - from io import FileIO + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) + document = apdf.Document(path_infile) + save_options = apdf.HtmlSaveOptions() + document.save(path_outfile, save_options) + + print(infile + " converted into " + outfile) +``` + +### Convert PDF to HTML with Stored Images + +This function converts a PDF file into HTML format using Aspose.PDF for Python via .NET. All extracted images are stored in a specified folder instead of being embedded in the HTML file. + +1. Configure HTML save options. +1. Save as HTML with external images. + +```python + from os import path - import pydicom + import aspose.pdf as apdf + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) + document = apdf.Document(path_infile) + save_options = apdf.HtmlSaveOptions() + save_options.special_folder_for_all_images = self.data_dir + document.save(path_outfile, save_options) + + print(infile + " converted into " + outfile) +``` - path_infile = path.join(self.dataDir, infile) - path_outfile = path.join(self.dataDir, "python", outfile) +### Convert PDF to Multi-Page HTML - # Open PDF document +This function converts a PDF file into multi-page HTML, where each PDF page is exported as a separate HTML file. This makes the output easier to navigate and reduces loading time for large PDFs. +1. Load the source PDF using 'apdf.Document'. +1. Create 'HtmlSaveOptions' and 'set split_into_pages'. +1. Save the document as HTML with pages split into separate files. +1. Print a confirmation message. + +```python + + from os import path + import aspose.pdf as apdf + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) document = apdf.Document(path_infile) + save_options = apdf.HtmlSaveOptions() + save_options.split_into_pages = True + document.save(path_outfile, save_options) + + print(infile + " converted into " + outfile) +``` + +### Convert PDF to HTML with Stored SVG Images + +This function converts a PDF into HTML format while storing all images as SVG files in a specified folder, instead of embedding them directly in the HTML. + +1. Load the source PDF using 'apdf.Document'. +1. Create 'HtmlSaveOptions' and 'set special_folder_for_svg_images' to the target folder. +1. Save the document as HTML with external SVG images. +1. Print a confirmation message. + +```python - # save document in HTML format + from os import path + import aspose.pdf as apdf + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) + document = apdf.Document(path_infile) save_options = apdf.HtmlSaveOptions() + save_options.special_folder_for_svg_images = self.data_dir + document.save(path_outfile, save_options) + + print(infile + " converted into " + outfile) +``` +### Convert PDF to HTML with Compressed SVG Images + +This snippet converts a PDF into HTML format, storing all images as SVG files in a specified folder and compressing them to reduce file size. + +1. Load the PDF document using 'apdf.Document'. +1. Create 'HtmlSaveOptions' and: + - Set 'special_folder_for_svg_images' to store SVG images externally. + - Enable 'compress_svg_graphics_if_any' to compress SVG images. +1. Save the document as HTML with compressed external SVG images. +1. Print a confirmation message. + +```python + + from os import path + import aspose.pdf as apdf + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) + document = apdf.Document(path_infile) + save_options = apdf.HtmlSaveOptions() + save_options.special_folder_for_svg_images = self.data_dir + save_options.compress_svg_graphics_if_any = True document.save(path_outfile, save_options) + + print(infile + " converted into " + outfile) +``` + +### Convert PDF to HTML with Embedded Raster Images + +This snippet converts a PDF into HTML format, embedding raster images as PNG page backgrounds. This approach preserves image quality and page layout within the HTML. + +1. Load the PDF document using 'apdf.Document'. +1. Create 'HtmlSaveOptions' and 'set raster_images_saving_mode' to 'AS_EMBEDDED_PARTS_OF_PNG_PAGE_BACKGROUND'. +1. Save the document as HTML with embedded raster images. +1. Print a confirmation message. + +```python + + from os import path + import aspose.pdf as apdf + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) + document = apdf.Document(path_infile) + save_options = apdf.HtmlSaveOptions() + save_options.raster_images_saving_mode = apdf.HtmlSaveOptions.RasterImagesSavingModes.AS_EMBEDDED_PARTS_OF_PNG_PAGE_BACKGROUND + document.save(path_outfile, save_options) + + print(infile + " converted into " + outfile) +``` + +### Convert PDF to HTML (Body Content Only, Multi-Page) + +This function converts a PDF into HTML format, generating only the 'body' content without extra 'html' or 'head' tags, and splits the output into separate pages. + +1. Load the PDF document using 'apdf.Document'. +1. Create 'HtmlSaveOptions' and configure: + - 'html_markup_generation_mode = WRITE_ONLY_BODY_CONTENT' to generate only the 'body' content. + - 'split_into_pages' to create separate HTML files for each PDF page. +1. Save the document as HTML with the specified options. +1. Print a confirmation message. + +```python + + from os import path + import aspose.pdf as apdf + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) + document = apdf.Document(path_infile) + save_options = apdf.HtmlSaveOptions() + save_options.html_markup_generation_mode = apdf.HtmlSaveOptions.HtmlMarkupGenerationModes.WRITE_ONLY_BODY_CONTENT + save_options.split_into_pages = True + document.save(path_outfile, save_options) + + print(infile + " converted into " + outfile) +``` + +### Convert PDF to HTML with Transparent Text Rendering + +This function converts a PDF into HTML format, rendering all text as transparent, including shadowed texts, which preserves visual fidelity while allowing flexible styling in the output HTML. + +1. Load the PDF document using 'apdf.Document'. +1. Create 'HtmlSaveOptions' and configure: + - 'save_transparent_texts' to render normal text as transparent. + - 'save_shadowed_texts_as_transparent_texts' to render shadowed text as transparent. +1. Save the document as HTML with transparent text rendering. +1. Print a confirmation message. + +```python + + from os import path + import aspose.pdf as apdf + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) + document = apdf.Document(path_infile) + save_options = apdf.HtmlSaveOptions() + save_options.save_transparent_texts = True + save_options.save_shadowed_texts_as_transparent_texts = True + document.save(path_outfile, save_options) + + print(infile + " converted into " + outfile) +``` + +### Convert PDF to HTML with Document Layers Rendering + +This function converts a PDF into HTML format, preserving document layers by converting marked content into separate layers in the output HTML. This allows layered elements (like annotations, backgrounds, and overlays) to be rendered accurately. + +1. Load the PDF document using 'apdf.Document'. +1. Create 'HtmlSaveOptions' and enable 'convert_marked_content_to_layers' to preserve layers. +1. Save the document as HTML with layered content. +1. Print a confirmation message. + +```python + + from os import path + import aspose.pdf as apdf + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) + document = apdf.Document(path_infile) + save_options = apdf.HtmlSaveOptions() + save_options.convert_marked_content_to_layers = True + document.save(path_outfile, save_options) + + print(infile + " converted into " + outfile) ``` -## See Also +## See Also This article also covers these topics. The codes are same as above. diff --git a/en/python-net/converting/convert-pdf-to-images-format/_index.md b/en/python-net/converting/convert-pdf-to-images-format/_index.md index 113f26d34..52468db91 100644 --- a/en/python-net/converting/convert-pdf-to-images-format/_index.md +++ b/en/python-net/converting/convert-pdf-to-images-format/_index.md @@ -4,7 +4,7 @@ linktitle: Convert PDF to Images type: docs weight: 70 url: /python-net/convert-pdf-to-images-format/ -lastmod: "2025-02-27" +lastmod: "2025-09-27" description: Explore how to convert PDF pages into images such as PNG, JPEG, or TIFF using Aspose.PDF in Python via .NET. sitemap: changefreq: "monthly" @@ -88,24 +88,23 @@ The following code snippet shows how to convert all the PDF pages to a single TI ```python + from os import path import aspose.pdf as apdf from io import FileIO - from os import path - import pydicom - path_infile = path.join(self.dataDir, infile) - path_outfile = path.join(self.dataDir, "python", outfile) + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) document = apdf.Document(path_infile) - resolution = apdf.devices.Resolution(300) + resolution = apdf.devices.Resolution(300) tiffSettings = apdf.devices.TiffSettings() tiffSettings.compression = apdf.devices.CompressionType.LZW tiffSettings.depth = apdf.devices.ColorDepth.DEFAULT tiffSettings.skip_blank_pages = False tiffDevice = apdf.devices.TiffDevice(resolution, tiffSettings) - tiffDevice.Process(document, path_outfile) + tiffDevice.process(document, path_outfile) print(infile + " converted into " + outfile) ``` @@ -129,9 +128,9 @@ Let's take a look at how to convert a PDF page to an image. - -The following steps and code snippet in Python shows this possibility - + +The following steps and code snippet in Python shows this possibility: + - [Convert PDF to BMP in Python](#python-pdf-to-image) - [Convert PDF to EMF in Python](#python-pdf-to-image) - [Convert PDF to JPG in Python](#python-pdf-to-image) @@ -147,28 +146,28 @@ The following steps and code snippet in Python shows this possibility * [JpegDevice](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/jpegdevice/) (to convert PDF to JPG) * [PngDevice](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/pngdevice/) (to convert PDF to PNG) * [GifDevice](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/gifdevice/) (to convert PDF to GIF) -3. Call the [ImageDevice.Process()](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/imagedevice/#methods) method to perform PDF to Image conversion. +3. Call the [device.process()](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/imagedevice/#methods) method to perform PDF to Image conversion. ### Convert PDF to BMP ```python + from os import path import aspose.pdf as apdf from io import FileIO - from os import path - import pydicom - path_infile = path.join(self.dataDir, infile) - path_outfile = path.join(self.dataDir, "python", outfile) + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) - options = apdf.CgmLoadOptions() - document = apdf.Document(path_infile, options) + document = apdf.Document(path_infile) resolution = apdf.devices.Resolution(300) - device = apdf.devices.BmpDevice(resolution) - image_stream = FileIO(path_outfile, "w") - device.process(document.pages[1], image_stream) - image_stream.close() + page_count = 1 + while page_count <= len(document.pages): + image_stream = FileIO(path_outfile + str(page_count) + "_out.bmp", "w") + device.process(document.pages[page_count], image_stream) + image_stream.close() + page_count = page_count + 1 print(infile + " converted into " + outfile) ``` @@ -177,13 +176,12 @@ The following steps and code snippet in Python shows this possibility ```python + from os import path import aspose.pdf as apdf from io import FileIO - from os import path - import pydicom - path_infile = path.join(self.dataDir, infile) - path_outfile = path.join(self.dataDir, "python", outfile) + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) document = apdf.Document(path_infile) resolution = apdf.devices.Resolution(300) @@ -202,13 +200,12 @@ The following steps and code snippet in Python shows this possibility ```python + from os import path import aspose.pdf as apdf from io import FileIO - from os import path - import pydicom - path_infile = path.join(self.dataDir, infile) - path_outfile = path.join(self.dataDir, "python", outfile) + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) document = apdf.Document(path_infile) resolution = apdf.devices.Resolution(300) @@ -221,20 +218,19 @@ The following steps and code snippet in Python shows this possibility image_stream.close() page_count = page_count + 1 - print(infile + " converted into " + outfile) + print(infile + " converted into " + outfile) ``` ### Convert PDF to PNG ```python + from os import path import aspose.pdf as apdf from io import FileIO - from os import path - import pydicom - path_infile = path.join(self.dataDir, infile) - path_outfile = path.join(self.dataDir, "python", outfile) + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) document = apdf.Document(path_infile) resolution = apdf.devices.Resolution(300) @@ -254,13 +250,12 @@ The following steps and code snippet in Python shows this possibility ```python + from os import path import aspose.pdf as apdf from io import FileIO - from os import path - import pydicom - path_infile = path.join(self.dataDir, infile) - path_outfile = path.join(self.dataDir, "python", outfile) + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) document = apdf.Document(path_infile) resolution = apdf.devices.Resolution(300) @@ -301,7 +296,7 @@ Aspose.PDF for Python via .NET presents you online free application ["PDF to SVG SVG images and their behaviors are defined in XML text files. This means that they can be searched, indexed, scripted and if required, compressed. As XML files, SVG images can be created and edited with any text editor, but it is often more convenient to create them with drawing programs such as Inkscape. -Aspose.PDF for Python supports the feature to convert SVG image to PDF format and also offers the capability to convert PDF files to SVG format. To accomplish this requirement, the [SvgSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/svgsaveoptions/) class has been introduced into the Aspose.PDF namespace. Instantiate an object of SvgSaveOptions and pass it as a second argument to the [Document.Save()](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/#methods) method. +Aspose.PDF for Python supports the feature to convert SVG image to PDF format and also offers the capability to convert PDF files to SVG format. To accomplish this requirement, the [SvgSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/svgsaveoptions/) class has been introduced into the Aspose.PDF namespace. Instantiate an object of SvgSaveOptions and pass it as a second argument to the [document.save()](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/#methods) method. The following code snippet shows the steps for converting a PDF file to SVG format with Python. @@ -309,35 +304,25 @@ The following code snippet shows the steps for converting a PDF file to SVG form 1. Create an object of the [Document](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/) class. 2. Create [SvgSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/svgsaveoptions/) object with needed settings. -3. Call the [Document.Save()](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/#methods) method and pass it [SvgSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/svgsaveoptions/) object convert the PDF document to SVG. +3. Call the [document.save()](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/#methods) method and pass it [SvgSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/svgsaveoptions/) object convert the PDF document to SVG. ### Convert PDF to SVG ```python + from os import path import aspose.pdf as apdf from io import FileIO - from os import path - import pydicom - path_infile = path.join(self.dataDir, infile) - path_outfile = path.join(self.dataDir, "python", outfile) - - # Open PDF document + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) document = apdf.Document(path_infile) - # Instantiate an object of SvgSaveOptions - save_options = apdf.SvgSaveOptions() - - # Do not compress SVG image to Zip archive - save_options.compress_output_to_zip_archive = False save_options.treat_target_file_name_as_directory = True - # Save the output in SVG files - document.save(path_outfile, save_options) print(infile + " converted into " + outfile) ``` diff --git a/en/python-net/converting/convert-pdf-to-powerpoint/_index.md b/en/python-net/converting/convert-pdf-to-powerpoint/_index.md index ad75e74b0..26a0041f9 100644 --- a/en/python-net/converting/convert-pdf-to-powerpoint/_index.md +++ b/en/python-net/converting/convert-pdf-to-powerpoint/_index.md @@ -5,7 +5,7 @@ type: docs weight: 30 url: /python-net/convert-pdf-to-powerpoint/ description: Learn how to easily convert PDFs to PowerPoint presentations using Aspose.PDF for Python via .NET. Step-by-step guide for seamless PDF to PPTX transformation. -lastmod: "2025-02-27" +lastmod: "2025-09-27" sitemap: changefreq: "monthly" priority: 0.7 @@ -37,25 +37,23 @@ We have an API named Aspose.Slides which offers the feature to create as well as During PDF to PPTX conversion, the text is rendered as Text where you can select/update it. Please note that in order to convert PDF files to PPTX format, Aspose.PDF provides a class named [PptxSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/pptxsaveoptions/). An object of the PptxSaveOptions class is passed as a second argument to the [save()](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/#methods). The following code snippet shows the process for converting PDF files into PPTX format. -## Simple conversion PDF to PowerPoint using Python and Aspose.PDF for Python +## Simple conversion PDF to PowerPoint using Python and Aspose.PDF for Python via .NET In order to convert PDF to PPTX, Aspose.PDF for Python advice to use the following code steps. Steps: Convert PDF to PowerPoint in Python | Steps: Convert PDF to PPTX in Python -1. Create an instance of [Document](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/) class -2. Create an instance of [PptxSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/pptxsaveoptions/) class -3. Use the **Save** method of the **Document** object to save the PDF as PPTX +1. Create an instance of [Document](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/) class. +1. Create an instance of [PptxSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/pptxsaveoptions/) class. +1. Call the [document.save()](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/#methods) method. ```python - import aspose.pdf as apdf - from io import FileIO from os import path - import pydicom + import aspose.pdf as apdf - path_infile = path.join(self.dataDir, infile) - path_outfile = path.join(self.dataDir, "python", outfile) + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) document = apdf.Document(path_infile) save_options = apdf.PptxSaveOptions() @@ -69,7 +67,7 @@ In order to convert PDF to PPTX, Aspose.PDF for Python advice to use the followi {{% alert color="success" %}} **Try to convert PDF to PowerPoint online** -Aspose.PDF for Python presents you online free application ["PDF to PPTX"](https://products.aspose.app/pdf/conversion/pdf-to-pptx), where you may try to investigate the functionality and quality it works. +Aspose.PDF for Python via .NET presents you online free application ["PDF to PPTX"](https://products.aspose.app/pdf/conversion/pdf-to-pptx), where you may try to investigate the functionality and quality it works. [![Aspose.PDF Convertion PDF to PPTX with Free App](pdf_to_pptx.png)](https://products.aspose.app/pdf/conversion/pdf-to-pptx) {{% /alert %}} @@ -78,20 +76,48 @@ In case if you need to convert a searchable PDF to PPTX as images instead of sel ```python - import aspose.pdf as ap - - input_pdf = DIR_INPUT + "sample.pdf" - output_pdf = DIR_OUTPUT + "convert_pdf_to_pptx_with_slides_as_images.pptx" - # Open PDF document - document = ap.Document(input_pdf) - # Instantiate PptxSaveOptions instance - save_option = ap.PptxSaveOptions() - save_option.slides_as_images = True - # Save the file into MS PowerPoint format - document.save(output_pdf, save_option) + from os import path + import aspose.pdf as apdf + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) + + document = apdf.Document(path_infile) + save_options = apdf.PptxSaveOptions() + save_options.slides_as_images = True + + document.save(path_outfile, save_options) + + print(infile + " converted into " + outfile) +``` + +## Convert PDF to PPTX with Custom Image Resolution + +This method converts a PDF document into a PowerPoint (PPTX) file while setting a custom image resolution (300 DPI) for improved quality. + +1. Load the PDF into an 'apdf.Document' object. +1. Create a 'PptxSaveOptions' instance. +1. Set the 'image_resolution' property to 300 DPI for high-quality rendering. +1. Save the PDF as a PPTX file using the defined save options. + +```python + + from os import path + import aspose.pdf as apdf + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) + + document = apdf.Document(path_infile) + save_options = apdf.PptxSaveOptions() + save_options.image_resolution = 300 + + document.save(path_outfile, save_options) + + print(infile + " converted into " + outfile) ``` -## See Also +## See Also This article also covers these topics. The codes are same as above. diff --git a/en/python-net/converting/convert-pdf-to-word/_index.md b/en/python-net/converting/convert-pdf-to-word/_index.md index d6ce4d96a..daef56eb4 100644 --- a/en/python-net/converting/convert-pdf-to-word/_index.md +++ b/en/python-net/converting/convert-pdf-to-word/_index.md @@ -4,7 +4,7 @@ linktitle: Convert PDF to Word 2003/2019 type: docs weight: 10 url: /python-net/convert-pdf-to-word/ -lastmod: "2025-02-27" +lastmod: "2025-09-27" description: Learn how to convert PDF documents to Word format in Python using Aspose.PDF for easy document editing. sitemap: changefreq: "monthly" @@ -33,51 +33,27 @@ _Format_: **Word** - [Python Convert PDF to Word](#python-pdf-to-doc) - [Python How to convert PDF file to Word](#python-pdf-to-docx) +## Convert PDF to DOC -## Python PDF to DOC and DOCX Conversion +One of the most popular features is the PDF to Microsoft Word DOC conversion, which makes content management easier. **Aspose.PDF for Python via .NET** allows you to convert PDF files not only to DOC but also to DOCX format, easily and efficiently. -One of the most popular features is the PDF to Microsoft Word DOC conversion, which makes content management easier. **Aspose.PDF for Python** allows you to convert PDF files not only to DOC but also to DOCX format, easily and efficiently. - -## Convert PDF to DOC (Word 97-2003) file - -Convert PDF file to DOC format with ease and full control. Aspose.PDF for Python is flexible and supports a wide variety of conversions. Converting pages from PDF documents to images, for example, is a very popular feature. - -A conversion that many of our customers have requested is PDF to DOC: converting a PDF file to a Microsoft Word document. Customers want this because PDF files cannot easily be edited, whereas Word documents can. Some companies want their users to be able to manipulate text, tables and images in files that started as PDFs. - -Keeping alive the tradition of making things simple and understandable, Aspose.PDF for Python lets you transform a source PDF file into a DOC file with two lines of code. To accomplish this feature, we have introduced an enumeration named SaveFormat and its value .Doc lets you save the source file to Microsoft Word format. - -The following Python code snippet shows the process of converting a PDF file into DOC format. +The [DocSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/docsaveoptions/) class provides numerous properties that improve the process of converting PDF files to DOC format. Among these properties, Mode enables you to specify the recognition mode for PDF content. You can specify any value from the RecognitionMode enumeration for this property. Each of these values has specific benefits and limitations: Steps: Convert PDF to DOC in Python -1. Create an instance of [Document](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/) object with the source PDF document. -2. Save it to [SaveFormat](https://reference.aspose.com/pdf/python-net/aspose.pdf/saveformat/) format by calling [save()](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/#methods) method. - -```python - - import aspose.pdf as ap - - input_pdf = DIR_INPUT + "sample.pdf" - output_pdf = DIR_OUTPUT + "convert_pdf_to_doc.doc" - # Open PDF document - document = ap.Document(input_pdf) - # Save the file into MS Word document format - document.save(output_pdf, ap.SaveFormat.DOC) -``` - -### Using the DocSaveOptions Class - -The [DocSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/docsaveoptions/) class provides numerous properties that improve the process of converting PDF files to DOC format. Among these properties, Mode enables you to specify the recognition mode for PDF content. You can specify any value from the RecognitionMode enumeration for this property. Each of these values has specific benefits and limitations: +1. Load the PDF into an 'apdf.Document' object. +1. Create a 'DocSaveOptions' instance. +1. Set the format property to 'DocFormat.DOC' to ensure the output is in .doc format (older Word format). +1. Save the PDF as a Word document using the specified save options. +1. Print a confirmation message. ```python - import aspose.pdf as apdf - from io import FileIO from os import path - import pydicom + import aspose.pdf as apdf - path_infile = path.join(self.dataDir, infile) - path_outfile = path.join(self.dataDir, "python", outfile) + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) document = apdf.Document(path_infile) save_options = apdf.DocSaveOptions() @@ -103,18 +79,19 @@ The following Python code snippet shows the process of converting a PDF file int Steps: Convert PDF to DOCX in Python -1. Create an instance of [Document](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/) object with the source PDF document. -2. Save it to [SaveFormat](https://reference.aspose.com/pdf/python-net/aspose.pdf/saveformat/) format by calling [save()](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/#methods) method. +1. Load the source PDF using 'apdf.Document'. +1. Create an instance of 'DocSaveOptions'. +1. Set the format property to 'DocFormat.DOC_X' to generate a .docx file (modern Word format). +1. Save the PDF as a DOCX file with the configured save options. +1. Print a confirmation message after conversion. ```python - import aspose.pdf as apdf - from io import FileIO from os import path - import pydicom + import aspose.pdf as apdf - path_infile = path.join(self.dataDir, infile) - path_outfile = path.join(self.dataDir, "python", outfile) + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) document = apdf.Document(path_infile) save_options = apdf.DocSaveOptions() From 6831157f717c7c53f3d4d9e52fe288d26043fe14 Mon Sep 17 00:00:00 2001 From: AnHolub <68945813+AnHolub@users.noreply.github.com> Date: Wed, 17 Sep 2025 09:37:55 +0300 Subject: [PATCH 27/65] Update en/python-net/advanced-operations/working-with-documents/_index.md Co-authored-by: Andriy Andruhovski --- .../advanced-operations/working-with-documents/_index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/en/python-net/advanced-operations/working-with-documents/_index.md b/en/python-net/advanced-operations/working-with-documents/_index.md index f9d0ec724..e04fd606e 100644 --- a/en/python-net/advanced-operations/working-with-documents/_index.md +++ b/en/python-net/advanced-operations/working-with-documents/_index.md @@ -27,4 +27,4 @@ You are able to do the following: - [Merge PDF](/pdf/python-net/merge-pdf-documents/) - merge multiple PDF files into a single PDF document using Python. - [Split PDF](/pdf/python-net/split-document/) - split PDF pages into individual PDF files in your Python applications. - [Working with Headings](/pdf/python-net/working-with-headings/) - you can create numbering in heading your PDF document with Python. -- [Work with PDF layers](/pdf/python-net/work-with-pdf-layers/) - how to lock a PDF layer, extract PDF layer elements, flatten a layered PDF, and merge all layers inside PDF into one. +- [Working with PDF layers](/pdf/python-net/working-with-pdf-layers/) - how to manage and manipulate PDF layers using the Aspose.PDF for Python via .NET library. From b1dae308ffe4da0911e9169050edeb84f51817a1 Mon Sep 17 00:00:00 2001 From: AnHolub <68945813+AnHolub@users.noreply.github.com> Date: Wed, 17 Sep 2025 09:38:03 +0300 Subject: [PATCH 28/65] Update en/python-net/advanced-operations/working-with-documents/working-with-layers/_index.md Co-authored-by: Andriy Andruhovski --- .../working-with-documents/working-with-layers/_index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/en/python-net/advanced-operations/working-with-documents/working-with-layers/_index.md b/en/python-net/advanced-operations/working-with-documents/working-with-layers/_index.md index 245beeb0c..e54be37da 100644 --- a/en/python-net/advanced-operations/working-with-documents/working-with-layers/_index.md +++ b/en/python-net/advanced-operations/working-with-documents/working-with-layers/_index.md @@ -3,7 +3,7 @@ title: Work with PDF layers using Python linktitle: Work with PDF layers type: docs weight: 50 -url: /net/work-with-pdf-layers/ +url: /net/working-with-pdf-layers/ description: The next task explains how to lock a PDF layer, extract PDF layer elements, flatten a layered PDF, and merge all layers inside PDF into one. lastmod: "2025-09-17" sitemap: From b34b5bf2f245377753a94a254d0dd2fc761f0b09 Mon Sep 17 00:00:00 2001 From: AnHolub <68945813+AnHolub@users.noreply.github.com> Date: Wed, 17 Sep 2025 10:03:37 +0300 Subject: [PATCH 29/65] Update lastmod dates and add code examples for converting PDF to PNG in Python --- .../convert-pdf-to-images-format/_index.md | 33 +++- .../convert-pdf-to-other-files/_index.md | 144 ++++++++++-------- .../converting/convert-pdf-to-pdfa/_index.md | 2 +- 3 files changed, 114 insertions(+), 65 deletions(-) diff --git a/en/python-net/converting/convert-pdf-to-images-format/_index.md b/en/python-net/converting/convert-pdf-to-images-format/_index.md index 52468db91..60088d1ba 100644 --- a/en/python-net/converting/convert-pdf-to-images-format/_index.md +++ b/en/python-net/converting/convert-pdf-to-images-format/_index.md @@ -244,7 +244,38 @@ The following steps and code snippet in Python shows this possibility: page_count = page_count + 1 print(infile + " converted into " + outfile) -``` +``` + +### Convert PDF to PNG with default font + +```python + + from os import path + import aspose.pdf as apdf + from io import FileIO + + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) + + document = apdf.Document(path_infile) + resolution = apdf.devices.Resolution(300) + + rendering_options = apdf.RenderingOptions() + rendering_options.default_font_name = "Arial" + + device = apdf.devices.PngDevice(resolution) + device.rendering_options = rendering_options + + page_count = 1 + while page_count <= len(document.pages): + image_stream = FileIO(path_outfile + str(page_count) + "_out.png", "w") + device.process(document.pages[page_count], image_stream) + image_stream.close() + page_count = page_count + 1 + + print(infile + " converted into " + outfile) +``` ### Convert PDF to GIF diff --git a/en/python-net/converting/convert-pdf-to-other-files/_index.md b/en/python-net/converting/convert-pdf-to-other-files/_index.md index e625462a9..3c4e6533d 100644 --- a/en/python-net/converting/convert-pdf-to-other-files/_index.md +++ b/en/python-net/converting/convert-pdf-to-other-files/_index.md @@ -4,7 +4,7 @@ linktitle: Convert PDF to other formats type: docs weight: 90 url: /python-net/convert-pdf-to-other-files/ -lastmod: "2025-02-27" +lastmod: "2025-09-27" description: This topic shows you how to convert PDF file to other file formats like EPUB, LaTeX, Text, XPS etc using Python. sitemap: changefreq: "monthly" @@ -27,22 +27,22 @@ Aspose.PDF for Python presents you online free application ["PDF to EPUB"](https **EPUB** is a free and open e-book standard from the International Digital Publishing Forum (IDPF). Files have the extension .epub. EPUB is designed for reflowable content, meaning that an EPUB reader can optimize text for a particular display device. EPUB also supports fixed-layout content. The format is intended as a single format that publishers and conversion houses can use in-house, as well as for distribution and sale. It supersedes the Open eBook standard. -Aspose.PDF for Python also supports the feature to convert PDF documents to EPUB format. Aspose.PDF for Python has a class named 'EpubSaveOptions' which can be used as the second argument to [save()](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/#methods) method, to generate an EPUB file. +Aspose.PDF for Python also supports the feature to convert PDF documents to EPUB format. Aspose.PDF for Python has a class named 'EpubSaveOptions' which can be used as the second argument to [document.save()](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/#methods) method, to generate an EPUB file. Please try using the following code snippet to accomplish this requirement with Python. ```python - import aspose.pdf as apdf - from io import FileIO from os import path - import pydicom + import aspose.pdf as ap - path_infile = path.join(self.dataDir, infile) - path_outfile = path.join(self.dataDir, "python", outfile) + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) - document = apdf.Document(path_infile) - save_options = apdf.EpubSaveOptions() - save_options.content_recognition_mode = apdf.EpubSaveOptions.RecognitionMode.FLOW + document = ap.Document(path_infile) + save_options = ap.EpubSaveOptions() + save_options.content_recognition_mode = ( + ap.EpubSaveOptions.RecognitionMode.FLOW + ) document.save(path_outfile, save_options) print(infile + " converted into " + outfile) @@ -67,25 +67,14 @@ The following code snippet shows the process of converting PDF files into the TE ```python - import aspose.pdf as apdf - from io import FileIO from os import path - import pydicom + import aspose.pdf as ap - path_infile = path.join(self.dataDir, infile) - path_outfile = path.join(self.dataDir, "python", outfile) + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) - # Open PDF document - - document = apdf.Document(path_infile) - - # Instantiate an object of SvgSaveOptions - - save_options = apdf.SvgSaveOptions() - - # Instantiate an object of LaTeXSaveOptions - - save_options = apdf.LaTeXSaveOptions() + document = ap.Document(path_infile) + save_options = ap.LaTeXSaveOptions() document.save(path_outfile, save_options) print(infile + " converted into " + outfile) @@ -93,26 +82,18 @@ The following code snippet shows the process of converting PDF files into the TE ## Convert PDF to Text -**Aspose.PDF for Python** support converting whole PDF document and single page to a Text file. - -### Convert PDF document to Text file - -You can convert PDF document to TXT file using 'TextDevice' class. - -The following code snippet explains how to extract the texts from the all pages. +**Aspose.PDF for Python** support converting whole PDF document and single page to a Text file. You can convert PDF document to TXT file using 'TextDevice' class. The following code snippet explains how to extract the texts from the all pages. ```python - import aspose.pdf as apdf - from io import FileIO from os import path - import pydicom + import aspose.pdf as ap - path_infile = path.join(self.dataDir, infile) - path_outfile = path.join(self.dataDir, "python", outfile) + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) - document = apdf.Document(path_infile) - device = apdf.devices.TextDevice() + document = ap.Document(path_infile) + device = ap.devices.TextDevice() device.process(document.pages[1], path_outfile) print(infile + " converted into " + outfile) @@ -140,26 +121,78 @@ Aspose.PDF for Python presents you online free application ["PDF to XPS"](https: The XPS file type is primarily associated with the XML Paper Specification by Microsoft Corporation. The XML Paper Specification (XPS), formerly codenamed Metro and subsuming the Next Generation Print Path (NGPP) marketing concept, is Microsoft's initiative to integrate document creation and viewing into the Windows operating system. -To convert PDF files to XPS, Aspose.PDF has the class [XpsSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/xpssaveoptions/) that is used as the second argument to the [save()](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/#methods) method to generate the XPS file. +To convert PDF files to XPS, Aspose.PDF has the class [XpsSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/xpssaveoptions/) that is used as the second argument to the [document.save()](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/#methods) method to generate the XPS file. The following code snippet shows the process of converting PDF file into XPS format. ```python - import aspose.pdf as apdf - from io import FileIO from os import path - import pydicom + import aspose.pdf as ap + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) + + document = ap.Document(path_infile) + save_options = ap.XpsSaveOptions() + save_options.use_new_imaging_engine = True + document.save(path_outfile, save_options) + + print(infile + " converted into " + outfile) +``` + +## Convert PDF to MD + +Aspose.PDF has the class 'MarkdownSaveOptions(), which converts a PDF document into Markdown (MD) format while preserving images and resources. + +1. Load the source PDF using 'ap.Document'. +1. Create an instance of 'MarkdownSaveOptions'. +1. Set 'resources_directory_name' to 'images' – extracted images will be stored in this folder. +1. Save the converted Markdown document using the configured options. +1. Print a confirmation message after conversion. + +```python + + from os import path + import aspose.pdf as ap + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) - path_infile = path.join(self.dataDir, infile) - path_outfile = path.join(self.dataDir, "python", outfile) - document = apdf.Document(path_infile) - save_options = apdf.XpsSaveOptions() + document = ap.Document(path_infile) + save_options = ap.MarkdownSaveOptions() + # save_options.extract_vector_graphics = True + save_options.resources_directory_name = "images" + save_options.use_image_html_tag = True document.save(path_outfile, save_options) print(infile + " converted into " + outfile) ``` +A Markdown file with text and linked images stored in the specified images folder. + +## Convert PDF to MobiXML + +This method converts a PDF document into the MOBI (MobiXML) format, which is commonly used for eBooks on Kindle devices. + +1. Load the source PDF document using 'ap.Document'. +1. Save the document with the format 'ap.SaveFormat.MOBI_XML'. +1. Print a confirmation message once the conversion is complete. + +```python + + from os import path + import aspose.pdf as ap + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) + + document = ap.Document(path_infile) + document.save(path_outfile, ap.SaveFormat.MOBI_XML) + + print(infile + " converted into " + outfile) +``` + ## Convert PDF to XML {{% alert color="success" %}} @@ -175,19 +208,4 @@ Aspose.PDF for Python presents you online free application ["PDF to XML"](https: Aspose.PDF for Python also supports the feature to convert PDF documents to XML format. Aspose.PDF for Python has a class named 'XmlSaveOptions' which can be used as the second argument to [save()](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/#methods) method, to generate an XML file. Please try using the following code snippet to accomplish this requirement with Python. -```python - - import aspose.pdf as apdf - from io import FileIO - from os import path - import pydicom - - path_infile = path.join(self.dataDir, infile) - path_outfile = path.join(self.dataDir, "python", outfile) - - document = apdf.Document(path_infile) - save_options = apdf.PdfXmlSaveOptions() - document.save(path_outfile, save_options) - print(infile + " converted into " + outfile) -``` diff --git a/en/python-net/converting/convert-pdf-to-pdfa/_index.md b/en/python-net/converting/convert-pdf-to-pdfa/_index.md index 61aaf0af8..1603b5d74 100644 --- a/en/python-net/converting/convert-pdf-to-pdfa/_index.md +++ b/en/python-net/converting/convert-pdf-to-pdfa/_index.md @@ -4,7 +4,7 @@ linktitle: Convert PDF to PDF/A formats type: docs weight: 100 url: /python-net/convert-pdf-to-pdfa/ -lastmod: "2025-02-27" +lastmod: "2025-09-27" description: Learn how to convert PDF files to PDF/A format for compliance with archiving standards using Aspose.PDF in Python via .NET. sitemap: changefreq: "monthly" From 9f2b211a8a66f97715b0ba675ad730835ff1db95 Mon Sep 17 00:00:00 2001 From: AnHolub <68945813+AnHolub@users.noreply.github.com> Date: Wed, 17 Sep 2025 13:07:02 +0300 Subject: [PATCH 30/65] Add conversion guides for PDF/x and PDF/A formats in Python --- en/python-net/converting/_index.md | 4 + .../converting/convert-pdf-to-pdfa/_index.md | 60 ---- .../converting/convert-pdf-to-pdfx/_index.md | 307 ++++++++++++++++++ .../pdf_to_pdfa.png | Bin .../converting/convert-pdfx-to-pdf/_index.md | 70 ++++ 5 files changed, 381 insertions(+), 60 deletions(-) delete mode 100644 en/python-net/converting/convert-pdf-to-pdfa/_index.md create mode 100644 en/python-net/converting/convert-pdf-to-pdfx/_index.md rename en/python-net/converting/{convert-pdf-to-pdfa => convert-pdf-to-pdfx}/pdf_to_pdfa.png (100%) create mode 100644 en/python-net/converting/convert-pdfx-to-pdf/_index.md diff --git a/en/python-net/converting/_index.md b/en/python-net/converting/_index.md index 6aa69434e..749ba6609 100644 --- a/en/python-net/converting/_index.md +++ b/en/python-net/converting/_index.md @@ -64,6 +64,10 @@ If honestly, externally, it is very difficult to determine if it is PDF or PDF/A - [Convert other file formats to PDF](/pdf/python-net/convert-other-files-to-pdf/) - this topic describes conversion with various formats like EPUB, XPS, Postscript, text and others. +- [Convert PDF/x to PDF](/pdf/python-net/convert-pdf_x-to-pdf/) - this topic describes converting PDF/UA, and PDF/A to PDF. + +- [Convert PDF to PDF/x](/pdf/python-net/convert-pdf-to-pdf_x/) - this topic describes converting PDF to PDF/A, PDF/E and PDF/X formats. + ## Try to convert PDF files online {{% alert color="success" %}} diff --git a/en/python-net/converting/convert-pdf-to-pdfa/_index.md b/en/python-net/converting/convert-pdf-to-pdfa/_index.md deleted file mode 100644 index 1603b5d74..000000000 --- a/en/python-net/converting/convert-pdf-to-pdfa/_index.md +++ /dev/null @@ -1,60 +0,0 @@ ---- -title: Convert PDF to PDF/A formats in Python -linktitle: Convert PDF to PDF/A formats -type: docs -weight: 100 -url: /python-net/convert-pdf-to-pdfa/ -lastmod: "2025-09-27" -description: Learn how to convert PDF files to PDF/A format for compliance with archiving standards using Aspose.PDF in Python via .NET. -sitemap: - changefreq: "monthly" - priority: 0.8 -TechArticle: true -AlternativeHeadline: How to Convert PDF to PDF/A formats in Python -Abstract: This article outlines the process of converting a PDF file to a PDF/A compliant format using Aspose.PDF for Python. The procedure includes validating the original PDF file according to Adobe Preflight standards, as various tools interpret PDF/A conformance differently. Once validated, the PDF is converted using the `Document` class's `Convert` method, with the validation results stored in an XML file. The conversion process allows for handling unconvertible elements via the `ConvertErrorAction` enumeration. Additionally, Aspose.PDF offers an online tool for converting PDFs to PDF/A-1A, enabling users to test the functionality and quality of the conversion process. A Python code snippet is provided to demonstrate converting a PDF to PDF/A-1b, illustrating the necessary steps and functions involved in the conversion. ---- - -**Aspose.PDF for Python** allows you to convert a PDF file to a PDF/A compliant PDF file. Before doing so, the file must be validated. This topic explains how. - -{{% alert color="primary" %}} - -Please note we follow Adobe Preflight for validating PDF/A conformance. All tools on the market have their own “representation” of PDF/A conformance. Please check this article on PDF/A validation tools for reference. We chose Adobe products for verifying how Aspose.PDF produces PDF files because Adobe is at the center of everything connected to PDF. - -{{% /alert %}} - -Convert the file using the Document class Convert method. Before converting the PDF to PDF/A compliant file, validate the PDF using the Validate method. The validation result is stored in an XML file and then this result is also passed to the Convert method. You can also specify the action for the elements which cannot be converted using the ConvertErrorAction enumeration. - -{{% alert color="success" %}} -**Try to convert PDF to PDF/A online** - -Aspose.PDF for Python presents you online free application ["PDF to PDF/A-1A"](https://products.aspose.app/pdf/conversion/pdf-to-pdfa1a), where you may try to investigate the functionality and quality it works. - -[![Aspose.PDF Convertion PDF to PDF/A with Free App](pdf_to_pdfa.png)](https://products.aspose.app/pdf/conversion/pdf-to-pdfa1a) -{{% /alert %}} - - -## Convert PDF file to PDF/A-1b - -The following code snippet shows how to convert PDF files to PDF/A-1b compliant PDF. - -```python - - import aspose.pdf as apdf - from io import FileIO - from os import path - import pydicom - - path_infile = path.join(self.dataDir, infile) - path_outfile = path.join(self.dataDir, "python", outfile) - - document = apdf.Document(path_infile) - document.convert( - self.dataDir + "pdf_pdfa.log", - apdf.PdfFormat.PDF_A_1B, - apdf.ConvertErrorAction.DELETE, - ) - document.save(path_outfile) - - print(infile + " converted into " + outfile) -``` - diff --git a/en/python-net/converting/convert-pdf-to-pdfx/_index.md b/en/python-net/converting/convert-pdf-to-pdfx/_index.md new file mode 100644 index 000000000..532374225 --- /dev/null +++ b/en/python-net/converting/convert-pdf-to-pdfx/_index.md @@ -0,0 +1,307 @@ +--- +title: Convert PDF to PDF/x formats in Python +linktitle: Convert PDF to PDF/x formats +type: docs +weight: 120 +url: /python-net/convert-pdf-to-pdf_x/ +lastmod: "2025-09-27" +description: This topic shows you how to convert PDF to PDF/x formats using Aspose.PDF for Python via .NET. +sitemap: + changefreq: "monthly" + priority: 0.8 +TechArticle: true +AlternativeHeadline: How to convert PDF to PDF/x formats +Abstract: The article provides a comprehensive guide on converting PDF to PDF/A, PDF/E and PDF/X formats using Aspose.PDF for Python. +--- + +**PDF to PDF/x format means the ability to convert PDF to additional formats, namely PDF/A, PDF/E and PDF/X.** + +## Convert PDF to PDF/A + +**Aspose.PDF for Python** allows you to convert a PDF file to a PDF/A compliant PDF file. Before doing so, the file must be validated. This topic explains how. + +{{% alert color="primary" %}} + +Please note we follow Adobe Preflight for validating PDF/A conformance. All tools on the market have their own “representation” of PDF/A conformance. Please check this article on PDF/A validation tools for reference. We chose Adobe products for verifying how Aspose.PDF produces PDF files because Adobe is at the center of everything connected to PDF. + +{{% /alert %}} + +Convert the file using the Document class Convert method. Before converting the PDF to PDF/A compliant file, validate the PDF using the Validate method. The validation result is stored in an XML file and then this result is also passed to the Convert method. You can also specify the action for the elements which cannot be converted using the ConvertErrorAction enumeration. + +{{% alert color="success" %}} +**Try to convert PDF to PDF/A online** + +Aspose.PDF for Python presents you online free application ["PDF to PDF/A-1A"](https://products.aspose.app/pdf/conversion/pdf-to-pdfa1a), where you may try to investigate the functionality and quality it works. + +[![Aspose.PDF Convertion PDF to PDF/A with Free App](pdf_to_pdfa.png)](https://products.aspose.app/pdf/conversion/pdf-to-pdfa1a) +{{% /alert %}} + +The 'document.validate()' method validates whether a PDF file conforms to the PDF/A-1B standard (an ISO-standardized version of PDF designed for long-term archiving). The validation results are saved in a log file. + +1. Load the PDF document using 'ap.Document'. +1. Call the validate method with the target compliance level (ap.PdfFormat.PDF_A_1B). +1. The results of the validation are written into the specified log file. + +```python + + path_infile = path.join(self.data_dir, infile) + path_logfile = path.join(self.data_dir, "python", outfile) + + document = ap.Document(path_infile) + document.validate(path_logfile, ap.PdfFormat.PDF_A_1B) +``` + +### Convert PDF to PDF/A-1B + +The following code snippet shows how to convert PDF files to PDF/A-1B format: + +1. Load the PDF document using 'ap.Document'. +1. Call the convert method with the following parameters: + - Log file path - stores the details of the conversion process and compliance checks. + - Target format - 'ap.PdfFormat.PDF_A_1B' (archival standard). + - Error action - 'ap.ConvertErrorAction.DELETE' — automatically removes elements that prevent compliance. +1. Save the converted PDF/A-compliant file to the output path. + +```python + + from os import path + import aspose.pdf as ap + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) + + document = ap.Document(path_infile) + document.convert( + self.data_dir + "pdf_pdfa.log", + ap.PdfFormat.PDF_A_1B, + ap.ConvertErrorAction.DELETE, + ) + document.save(path_outfile) + + print(infile + " converted into " + outfile) +``` + +### Convert PDF to PDF 2.0 and PDF/A-4 + +This example demonstrates how to convert a PDF document into newer standardized formats: PDF 2.0 and PDF/A-4. +Both conversions help ensure compliance with modern specifications and archival requirements. + +1. Load the input document using ap.Document. +1. Perform the first conversion to PDF 2.0 by calling document.convert with: + - Log file path for conversion details. + - Target format - 'ap.PdfFormat.V_2_0'. + - Error action - 'ap.ConvertErrorAction.DELETE' to remove non-compliant elements. +1. Perform a second conversion to PDF/A-4 using the same method, ensuring the file is also compliant with archival standards. +1. Save the resulting document in the specified output path. + +```python + + from os import path + import aspose.pdf as ap + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) + path_logfile = path_outfile.replace(".pdf","_log.xml") + + document = ap.Document(path_infile) + document.convert(path_logfile, ap.PdfFormat.V_2_0, ap.ConvertErrorAction.DELETE) + + document.convert(path_logfile, ap.PdfFormat.PDF_A_4, ap.ConvertErrorAction.DELETE) + document.save(path_outfile) + + print(infile + " converted into " + outfile) +``` + +### Convert PDF to PDF/A-3A with Embedded Files + +Next code snippet demonstrates how to embed external files into a PDF and then convert the PDF into PDF/A-3A format, which supports attachments and is suitable for long-term archival with embedded content. + +1. Load the input PDF using 'ap.Document'. +1. Create a 'FileSpecification' object pointing to the file to embed (e.g., "aspose-logo.jpg") with a description. +1. Add the file specification to the PDF’s 'embedded_files' collection. +1. Convert the document to PDF/A-3A using 'document.convert', specifying: + - Log file path. + - Target format - 'ap.PdfFormat.PDF_A_3A'. + - Error action - 'ap.ConvertErrorAction.DELETE' to remove non-compliant elements. +1. Save the converted PDF to the output path. +1. Print a confirmation message. + +```python + + from os import path + import aspose.pdf as ap + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) + path_logfile = path_outfile.replace(".pdf","_log.xml") + + document = ap.Document(path_infile) + + fileSpecification = ap.FileSpecification(self.data_dir + "aspose-logo.jpg", "Large Image file") + document.embedded_files.add(fileSpecification) + document.convert(path_logfile, ap.PdfFormat.PdfFormat.PDF_A_3A, ap.ConvertErrorAction.DELETE) + document.save(path_outfile) + print(infile + " converted into " + outfile) +``` + +### Convert PDF to PDF/A-1B with Font Substitution + +This function converts a PDF into PDF/A-1B format while handling missing fonts by substituting them with available ones. This ensures the converted PDF remains visually consistent and compliant with archival standards. + +1. Load the PDF using 'ap.Document'. +1. Convert the PDF to PDF/A-1B using 'document.convert', specifying: + - Log file path. + - Target format - 'ap.PdfFormat.PDF_A_1B'. + - Error action - 'ap.ConvertErrorAction.DELETE' to remove non-compliant elements. +1. Save the converted PDF to the output path. +1. Print a confirmation message. + +```python + + from os import path + import aspose.pdf as ap + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) + path_logfile = path_outfile.replace(".pdf","_log.xml") + + try: + ap.text.FontRepository.find_font("AgencyFB") + + except ap.FontNotFoundException: + font_substitution = ap.text.SimpleFontSubstitution("AgencyFB", "Arial") + ap.text.FontRepository.Substitutions.append(font_substitution) + + document = ap.Document(path_infile) + document.convert(path_logfile, ap.PdfFormat.PDF_A_1B, ap.ConvertErrorAction.DELETE) + document.save(path_outfile) + print(infile + " converted into " + outfile) +``` + +### Convert PDF to PDF/A-1B with Automatic Tagging + +This function converts a PDF document into PDF/A-1B format while automatically tagging the content for accessibility and structural consistency. Automatic tagging improves document usability for screen readers and ensures proper semantic structure. + +1. Load the PDF using 'ap.Document'. +1. Create 'PdfFormatConversionOptions' specifying: + - Log file path. + - Target format - 'ap.PdfFormat.PDF_A_1B'. + - Error action - 'ap.ConvertErrorAction.DELETE' to remove non-compliant elements. +1. Configure 'AutoTaggingSettings': + - Enable 'enable_auto_tagging = True'. + - Set 'heading_recognition_strategy = AUTO' to automatically detect headings. +1. Assign the auto-tagging settings to the conversion options. +1. Convert the PDF using 'document.convert(options)'. +1. Save the converted PDF to the output path. +1. Print a confirmation message. + +```python + + from os import path + import aspose.pdf as ap + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) + path_logfile = path_outfile.replace(".pdf","_log.xml") + + document = ap.Document(path_infile) + options = ap.PdfFormatConversionOptions(path_logfile, ap.PdfFormat.PDF_A_1B, ap.ConvertErrorAction.DELETE) + + auto_tagging_settings = ap.AutoTaggingSettings() + auto_tagging_settings.enable_auto_tagging = True + + auto_tagging_settings.heading_recognition_strategy = ap.HeadingRecognitionStrategy.AUTO + + options.auto_tagging_settings = auto_tagging_settings + document.convert(options) + document.save(path_outfile) + print(infile + " converted into " + outfile) +``` + +## Convert PDF to PDF/E + +This snippet validates whether a PDF document conforms to the PDF/E-1 standard, which is an ISO standard tailored for engineering and technical documents. The validation results are saved to a log file. + +1. Load the PDF document using 'ap.Document'. +1. Call the validate method, specifying: + - Log file path to store validation results. + - Target format - 'ap.PdfFormat.PDF_E_1'. +1. The validation results are saved in the log file for review. + +```python + + path_infile = path.join(self.data_dir, infile) + path_logfile = path.join(self.data_dir, "python", outfile) + + document = ap.Document(path_infile) + document.validate(path_logfile, ap.PdfFormat.PDF_E_1) +``` + +Next example demonstrates how to convert a PDF into PDF/E-1 format, which is an ISO standard tailored for engineering and technical documentation. This format preserves precise layout, graphics, and metadata required for engineering workflows. + +1. Load the source PDF using 'ap.Document'. +1. Create 'PdfFormatConversionOptions' specifying: + - Log file path for tracking conversion issues. + - Target format - 'ap.PdfFormat.PDF_E_1'. + - Error action - 'ap.ConvertErrorAction.DELETE' to remove non-compliant elements. +1. Convert the PDF using 'document.convert(options)'. +1. Save the converted PDF to the specified output path. +1. Print a confirmation message. + +```python + + from os import path + import aspose.pdf as ap + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) + path_logfile = path_outfile.replace(".pdf","_log.xml") + + document = ap.Document(path_infile) + options = ap.PdfFormatConversionOptions(path_logfile, ap.PdfFormat.PDF_E_1, ap.ConvertErrorAction.DELETE) + + document.convert(options) + + # Save PDF document + document.save(path_outfile) + print(infile + " converted into " + outfile) +``` + +## Convert PDF to PDF/X + +Next code snippet converts a PDF document into PDF/X-4 format, which is an ISO standard commonly used in the printing and publishing industry. PDF/X-4 ensures color accuracy, maintains transparency, and embeds ICC profiles for consistent output across devices. + +1. Load the source PDF using 'ap.Document'. +1. Create 'PdfFormatConversionOptions' specifying: + - Log file path. + - Target format - 'ap.PdfFormat.PDF_X_4'. + - Error action - 'ap.ConvertErrorAction.DELETE' to remove non-compliant elements. +1. Provide the **ICC profile file** for color management via 'icc_profile_file_name'. +1. Specify an **OutputIntent** with a condition identifier (e.g., "FOGRA39") for printing requirements. +1. Convert the PDF using 'document.convert()'. +1. Save the converted PDF to the specified output path. +1. Print a confirmation message. + +```python + + from os import path + import aspose.pdf as ap + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) + path_logfile = path_outfile.replace(".pdf","_log.xml") + + document = ap.Document(path_infile) + options = ap.PdfFormatConversionOptions(path_logfile, ap.PdfFormat.PDF_X_4, ap.ConvertErrorAction.DELETE) + + # Provide the name of the external ICC profile file (optional) + options.icc_profile_file_name = path.join(self.data_dir,"ISOcoated_v2_eci.icc") + # Provide an output condition identifier and other necessary OutputIntent properties (optional) + options.output_intent = ap.OutputIntent("FOGRA39") + + document.convert(options) + + # Save PDF document + document.save(path_outfile) + print(infile + " converted into " + outfile) +``` \ No newline at end of file diff --git a/en/python-net/converting/convert-pdf-to-pdfa/pdf_to_pdfa.png b/en/python-net/converting/convert-pdf-to-pdfx/pdf_to_pdfa.png similarity index 100% rename from en/python-net/converting/convert-pdf-to-pdfa/pdf_to_pdfa.png rename to en/python-net/converting/convert-pdf-to-pdfx/pdf_to_pdfa.png diff --git a/en/python-net/converting/convert-pdfx-to-pdf/_index.md b/en/python-net/converting/convert-pdfx-to-pdf/_index.md new file mode 100644 index 000000000..612c6bfc7 --- /dev/null +++ b/en/python-net/converting/convert-pdfx-to-pdf/_index.md @@ -0,0 +1,70 @@ +--- +title: Convert PDF/x to PDF formats in Python +linktitle: Convert PDF/x to PDF formats +type: docs +weight: 120 +url: /python-net/convert-pdf_x-to-pdf/ +lastmod: "2025-09-27" +description: This topic shows you how to convert PDF/x to PDF formats using Aspose.PDF for Python via .NET. +sitemap: + changefreq: "monthly" + priority: 0.8 +TechArticle: true +AlternativeHeadline: How to convert PDF/x to PDF formats +Abstract: The article provides a comprehensive guide on converting PDF/UA, and PDF/A to PDF file using Aspose.PDF for Python. +--- + +**PDF/x to PDF format means the ability to convert PDF/UA, and PDF/A to PDF file.** + +## Convert PDF/A to PDF + +1. Load the PDF document using 'ap.Document'. +1. Call 'remove_pdfa_compliance()' to strip all PDF/A-related compliance settings and metadata. +1. Save the resulting PDF to the output path. + +```python + + from os import path + import aspose.pdf as ap + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) + + document = ap.Document(path_infile) + document.remove_pdfa_compliance() + document.save(path_outfile) + + print(infile + " converted into " + outfile) +``` + +## Convert PDF/UA to PDF and PDF/A-1B + +This function demonstrates a two-step conversion process: first removing PDF/UA (Universal Accessibility) compliance, and then converting the resulting PDF into PDF/A-1B format with automatic tagging for accessibility and semantic structure. + +1. Remove PDF/UA Compliance: + - Load the PDF document using 'ap.Document'. + - Call 'remove_pdf_ua_compliance(') to strip all PDF/UA-related compliance settings and metadata. + - Save the resulting PDF to the output path. +1. Convert to PDF/A-1B with Automatic Tagging: + - Define the log file path (_log.xml) to store conversion details. + - Load the PDF document again. + - Create 'PdfFormatConversionOptions' specifying: + - Configure 'AutoTaggingSettings' + - Assign auto-tagging settings to the conversion options. + - Convert the document using 'document.convert()'. + - Save the PDF/A-1B compliant output. + +```python + + from os import path + import aspose.pdf as ap + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) + + document = ap.Document(path_infile) + document.remove_pdfa_compliance() + document.save(path_outfile) + + print(infile + " converted into " + outfile) +``` \ No newline at end of file From d9794360050bd8c2226c1338323b4062460bc879 Mon Sep 17 00:00:00 2001 From: AnHolub <68945813+AnHolub@users.noreply.github.com> Date: Thu, 18 Sep 2025 11:19:18 +0300 Subject: [PATCH 31/65] Add documentation for replacing and extracting images in PDF using Python --- .../working-with-images/_index.md | 4 +- .../add-image-to-existing-pdf-file/_index.md | 153 ++++++++++--- .../delete-images-from-pdf-file/_index.md | 33 +-- .../extract-images-from-pdf-file/_index.md | 76 +++++-- .../_index.md | 83 +++++++ .../_index.md | 211 ++++++++++++++++++ 6 files changed, 493 insertions(+), 67 deletions(-) create mode 100644 en/python-net/advanced-operations/working-with-images/replace-image-in-existing-pdf-file/_index.md create mode 100644 en/python-net/advanced-operations/working-with-images/search-and-get-images-from-pdf-document/_index.md diff --git a/en/python-net/advanced-operations/working-with-images/_index.md b/en/python-net/advanced-operations/working-with-images/_index.md index 168c98737..a1170df20 100644 --- a/en/python-net/advanced-operations/working-with-images/_index.md +++ b/en/python-net/advanced-operations/working-with-images/_index.md @@ -5,7 +5,7 @@ type: docs weight: 40 url: /python-net/working-with-images/ description: This section describes the features of working with images in a PDF file using Python library. -lastmod: "2025-02-27" +lastmod: "2025-09-27" sitemap: changefreq: "monthly" priority: 0.7 @@ -23,4 +23,6 @@ You are able to do the following: - [Add Image to Existing PDF File](/pdf/python-net/add-image-to-existing-pdf-file/) - add images and references of a single image in PDF document, after that control quality. - [Delete Images from PDF File](/pdf/python-net/delete-images-from-pdf-file/) - check code snippet for deleting images from PDF file. - [Extract Images from PDF File](/pdf/python-net/extract-images-from-pdf-file/) - the next article shows how to extract images from PDF file using Python library. +- [Search and Get Images from PDF Document](/pdf/python-net/search-and-get-images-from-pdf-document/) - you can get an image from an individual page and search among images on all pages with Python. +- [Replace Image in Existing PDF File](/pdf/python-net/replace-image-in-existing-pdf-file/) - check our code snippet, it shows you how to replace an image in a PDF file. diff --git a/en/python-net/advanced-operations/working-with-images/add-image-to-existing-pdf-file/_index.md b/en/python-net/advanced-operations/working-with-images/add-image-to-existing-pdf-file/_index.md index 9e1a1e4f4..e420539ed 100644 --- a/en/python-net/advanced-operations/working-with-images/add-image-to-existing-pdf-file/_index.md +++ b/en/python-net/advanced-operations/working-with-images/add-image-to-existing-pdf-file/_index.md @@ -5,57 +5,158 @@ type: docs weight: 10 url: /python-net/add-image-to-existing-pdf-file/ description: This section describes how to add image to existing PDF file using Python library. -lastmod: "2025-02-27" +lastmod: "2025-09-27" TechArticle: true -AlternativeHeadline: How to add images into PDf using Python +AlternativeHeadline: How to add images into PDF using Python Abstract: This article provides guidance on adding images to existing PDF files using Python with the Aspose.PDF library. Two methods are outlined for achieving this. The first method involves using the `Document` class from Aspose.PDF, where the user loads the PDF, specifies the page number, and uses the `add_image` method of the `Page` class to position the image. The document is then saved using the `save()` method. The second method utilizes the `PdfFileMend` class from the Aspose.PDF.Facades namespace, which offers a simpler interface. Here, the `add_image()` method is invoked to add the image to the specified page and coordinates, followed by saving the updated PDF and closing the `PdfFileMend` object. Code snippets are provided for both methods to demonstrate the process. --- ## Add Image in an Existing PDF File -The following code snippet shows how to add image in the PDF file. - -1. Load the input PDF file. -1. Specify the page number on which the picture will be placed. -1. To define the position of the image on the page call the [Page](https://reference.aspose.com/pdf/python-net/aspose.pdf/page/) class [add_image](https://reference.aspose.com/pdf/python-net/aspose.pdf/page/#methods) method. -1. Call the [Document](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/) class [save()](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/#methods) method. +This example demonstrates how to insert an image into a specific position on a PDF page using Aspose.PDF for Python via .NET. +1. Load the PDF document with 'ap.Document'. +1. Select the target page '(document.pages[1]' - the first page). +1. Use 'page.add_image()' to place the image: + - File path of the image. + - A 'Rectangle' object defining the image’s coordinates (left=20, bottom=730, right=120, top=830). +1. Save the updated PDF. ```python import aspose.pdf as ap + from io import FileIO + from os import path + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) + + document = ap.Document(path_infile) + page = document.pages[1] + page.add_image( + path.join(self.data_dir, image_file), + ap.Rectangle(20, 730, 120, 830, True), + ) + document.save(path_outfile) +``` - # Open document - document = ap.Document(input_file) - - document.pages[1].add_image(image_file, ap.Rectangle(20, 730, 120, 830, True)) +## Add an Image Using Operators - document.save(output_pdf) -``` +Next code snippet shows a low-level approach to adding an image to a PDF page by manually working with PDF operators rather than high-level helper methods. -## Add Image in an Existing PDF File (Facades) +Steps: -There is also an alternative, easier way to add a Image to a PDF file. You can use [AddImage](https://reference.aspose.com/pdf/python-net/aspose.pdf.facades/pdffilemend/methods/addimage/index) method of the [PdfFileMend](https://reference.aspose.com/pdf/python-net/aspose.pdf.facades/pdffilemend/) class. The [add_image()](https://reference.aspose.com/pdf/python-net/aspose.pdf.facades/pdffilemend/#methods) method requires the image to be added, the page number at which the image needs to be added and the coordinate information. After that, save the updated PDF file, and close the PdfFileMend object using [close()](https://reference.aspose.com/pdf/python-net/aspose.pdf.facades/pdffilemend/#methods) method. The following code snippet shows you how to add image in an existing PDF file. +1. Create a new blank 'Document'. +1. Add a page and set its size (842 × 595 - landscape A4). +1. Access the page’s image resources (page.resources.images). +1. Load the image file into a stream and add it to the resources. + - The method returns an 'image_id'. + - The newly added image object is retrieved from the resources. +1. Define a rectangle that maintains the aspect ratio of the image: +1. Build an operator sequence: + - 'GSave()' - Save the current graphics state. + - 'ConcatenateMatrix(matrix)' - Apply transformation (scale and center the image vertically on the page). + - 'Do(image_id)' - Render the image. + - 'GRestore()' - Restore graphics state. +1. Add the operator sequence to 'page.contents'. +1. Save the resulting PDF. ```python import aspose.pdf as ap + from io import FileIO + from os import path + + path_infile = path.join(self.data_dir, image_file) + path_outfile = path.join(self.data_dir, "python", outfile) + + document = ap.Document() + page = document.pages.add() + page.set_page_size(842,595) + + # Get page resources + resources_images = page.resources.images + + # Add image to resources + image_stream = FileIO(path.join(self.data_dir, path_infile), "rb") + image_id = resources_images.add(image_stream) - # Open document - mender = ap.facades.PdfFileMend() + x_image = list(resources_images)[-1] - # Create PdfFileMend object to add text - mender.bind_pdf(input_file) + rectangle = ap.Rectangle( + 0, + 0, + page.media_box.width, + (page.media_box.width * x_image.height) / x_image.width, + True, + ) - # Add image in the PDF file - mender.add_image(image_file, 1, 100.0, 600.0, 200.0, 700.0) + # Create operator sequence for adding image + operators = [] - # Save changes - mender.save(output_pdf) + # Save graphics state + operators.append(ap.operators.GSave()) - # Close PdfFileMend object - mender.close() + # Set transformation matrix (position and size) + matrix = ap.Matrix( + rectangle.urx - rectangle.llx, + 0, + 0, + rectangle.ury - rectangle.lly, + rectangle.llx, + rectangle.llx + (page.media_box.height - rectangle.height) / 2, + ) + operators.append(ap.operators.ConcatenateMatrix(matrix)) + # Draw the image + operators.append(ap.operators.Do(image_id)) + + # Restore graphics state + operators.append(ap.operators.GRestore()) + + # Add operators to page contents + page.contents.add(operators) + + document.save(path_outfile) ``` +## Add Image with Alternative Text + +This example demonstrates how to add an image to a PDF page and assign alternative text (alt text) for accessibility compliance (such as PDF/UA). +1. Create a new 'Document' and add a page (842 × 595, landscape A4). +1. Place the image on the page using 'page.add_image()' with a rectangle that spans the full page. +1. Access the page’s image resources ('page.resources.images'). +1. Define an alternative text string (e.g., 'Alternative text for image'). +1. Retrieve the first image object from resources ('x_image = resources_images[1]'). +1. Use 'try_set_alternative_text(alt_text, page)' to assign alt text to the image. +1. Save the resulting PDF. + +```python + + import aspose.pdf as ap + from io import FileIO + from os import path + + path_image_file = path.join(self.data_dir, image_file) + path_outfile = path.join(self.data_dir, "python", outfile) + + document = ap.Document() + page = document.pages.add() + page.set_page_size(842,595) + + page.add_image( + path_image_file, + ap.Rectangle(0, 0, 842, 595, True), + ) + + resources_images = page.resources.images + alt_text = "Alternative text for image" + x_image = resources_images[1] + result = x_image.try_set_alternative_text(alt_text, page) + + # If set is successful, then get the alternative text for the image + if (result): + print ("Text has been added successfuly") + document.save(path_outfile) +``` \ No newline at end of file diff --git a/en/python-net/advanced-operations/working-with-images/delete-images-from-pdf-file/_index.md b/en/python-net/advanced-operations/working-with-images/delete-images-from-pdf-file/_index.md index a75b3cb98..c7115f82d 100644 --- a/en/python-net/advanced-operations/working-with-images/delete-images-from-pdf-file/_index.md +++ b/en/python-net/advanced-operations/working-with-images/delete-images-from-pdf-file/_index.md @@ -5,7 +5,7 @@ type: docs weight: 20 url: /python-net/delete-images-from-pdf-file/ description: This section explain how to delete Images from PDF File using Aspose.PDF for Python via .NET. -lastmod: "2025-02-27" +lastmod: "2025-09 -27" TechArticle: true AlternativeHeadline: How to remove images from PDF using Python Abstract: The article discusses the various reasons for removing images from PDF files, such as protecting privacy, preventing unauthorized access to sensitive information, reducing file size for easier sharing and storage, and preparing the document for compression or text extraction. It introduces **Aspose.PDF for Python via .NET** as a tool to accomplish this task. The article provides step-by-step instructions and code snippets for deleting specific images or all images from a PDF file using Aspose.PDF. The process involves opening an existing PDF document, deleting images either individually or in bulk, and saving the updated file. The provided Python code demonstrates how to remove images by accessing the document's resources and modifying the desired pages. @@ -35,33 +35,14 @@ The following code snippet shows how to delete an image from a PDF file. ```python import aspose.pdf as ap + from os import path - # Open document - document = ap.Document(input_file) + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, outfile) - # Delete particular image - document.pages[2].resources.images.delete(1) - - # Save updated PDF file - document.save(output_pdf) -``` - -## Delete all images from input PDF - -```python - - import aspose.pdf as ap - - # Open document - document = ap.Document(input_file) - - # Delete all images on all pages - for i in range(len(document.pages)): - while len(document.pages[i + 1].resources.images) != 0: - document.pages[i + 1].resources.images.delete(1) - - # Save updated PDF file - document.save(output_file) + document = ap.Document(path_infile) + document.pages[1].resources.images.delete(1) + document.save(path_outfile) ``` diff --git a/en/python-net/advanced-operations/working-with-images/extract-images-from-pdf-file/_index.md b/en/python-net/advanced-operations/working-with-images/extract-images-from-pdf-file/_index.md index 5855343ba..efbbbac62 100644 --- a/en/python-net/advanced-operations/working-with-images/extract-images-from-pdf-file/_index.md +++ b/en/python-net/advanced-operations/working-with-images/extract-images-from-pdf-file/_index.md @@ -5,31 +5,79 @@ type: docs weight: 30 url: /python-net/extract-images-from-pdf-file/ description: This section shows how to extract images from PDF file using Python library. -lastmod: "2025-02-27" +lastmod: "2025-09-27" TechArticle: true -AlternativeHeadline: Get images from PDF with Python +AlternativeHeadline: Extract images from PDF with Python Abstract: This article discusses the process of extracting images from PDF files using Aspose.PDF for Python. It highlights the utility of separating images for purposes such as management, archiving, analysis, or sharing. The article explains that images within a PDF are stored in each page's resources collection, specifically within the XImage collection. To extract an image, users can access a particular page and retrieve the image using its index from the Images collection. The XImage object returned by the index provides a `save()` method to save the extracted image. A code snippet is provided to demonstrate the steps required to open a PDF document, extract a specific image from the second page using its index, and save it to a file. --- Do you need to separate images from your PDF files? For simplified management, archiving, analysis, or sharing images of your documents, use **Aspose.PDF for Python** and extract images from PDF files. -Images are held in each page's [resources](https://reference.aspose.com/pdf/python-net/aspose.pdf/page/#properties) collection's [XImage](https://reference.aspose.com/pdf/python-net/aspose.pdf/ximagecollection/) collection. To extract a particular page, then get the image from the Images collection using the particular index of the image. - -The image's index returns an [XImage](https://reference.aspose.com/pdf/python-net/aspose.pdf/ximage/) object. This object provides a [save()](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/#methods) method which can be used to save the extracted image. The following code snippet shows how to extract images from a PDF file. +1. Load the PDF document with 'ap.Document()'. +1. Access the first page of the document (document.pages[1]). +1. Select the first image from the page resources (resources.images[1]). +1. Create an output stream (FileIO) for the target file. +1. Save the extracted image using 'xImage.save(output_image)'. ```python import aspose.pdf as ap + from io import FileIO + from os import path - # Open document - document = ap.Document(input_file) - - # Extract a particular image - xImage = document.pages[2].resources.images[1] - outputImage = io.FileIO(output_image, "w") + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, outfile) - # Save output image - xImage.save(outputImage) - outputImage.close() + document = ap.Document(path_infile) + xImage = document.pages[1].resources.images[1] + with FileIO(path_outfile, "w") as output_image: + xImage.save(output_image) ``` +## Extract Images from Specific Region in PDF + +This method extracts images located within a specified rectangular region on a PDF page and saves them as separate files. + +1. Load the PDF document using 'ap.Document'. + +1. Create an 'ImagePlacementAbsorber' to collect all images on the first page. + +1. Call 'document.pages[1].accept(absorber)' to analyze image placements. + +1. Iterate through all images in 'absorber.image_placements': + + - Get the image bounding box (llx, lly, urx, ury). + + - Check if both corners of the image rectangle fall inside the target rectangle (rectangle.contains()). + + - If true, save the image to a file using FileIO, replacing 'index' in the filename with a sequential number. + +1. Increment the index for each saved image. + +```python + + import aspose.pdf as ap + from io import FileIO + from os import path + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, outfile) + + rectangle = ap.Rectangle(0, 0, 590, 590, True) + + document = ap.Document(path_infile) + absorber = ap.ImagePlacementAbsorber() + document.pages[1].accept(absorber) + index = 1 + for image_placement in absorber.image_placements: + point1 = ap.Point( + image_placement.rectangle.llx, image_placement.rectangle.lly + ) + point2 = ap.Point( + image_placement.rectangle.urx, image_placement.rectangle.urx + ) + if rectangle.contains(point1, True) and rectangle.contains(point2, True): + with FileIO(path_outfile.replace("index", str(index)), "w") as output_image: + image_placement.image.save(output_image) + index = index + 1 +``` \ No newline at end of file diff --git a/en/python-net/advanced-operations/working-with-images/replace-image-in-existing-pdf-file/_index.md b/en/python-net/advanced-operations/working-with-images/replace-image-in-existing-pdf-file/_index.md new file mode 100644 index 000000000..7671d179d --- /dev/null +++ b/en/python-net/advanced-operations/working-with-images/replace-image-in-existing-pdf-file/_index.md @@ -0,0 +1,83 @@ +--- +title: Replace Image in Existing PDF File using Python +linktitle: Replace Image +type: docs +weight: 70 +url: /python-net/replace-image-in-existing-pdf-file/ +description: This section describes about replace image in existing PDF file using Python library. +lastmod: "2025-09-17" +TechArticle: true +AlternativeHeadline: Replace an Image in PDF +Abstract: The Aspose.PDF for Python via .NET documentation provides a comprehensive guide on replacing images within existing PDF files. This functionality is essential for tasks such as updating logos, graphics, or other visual elements in a PDF document without altering its textual content. +--- + +## Replace an Image in PDF + +This example demonstrates how to replace an existing image on a PDF page with a new image using Aspose.PDF for Python via .NET. + +1. Import necessary modules (aspose.pdf, os.path, FileIO). +1. Define paths for: + - Input PDF (infile) + - New image file (image_file) + - Output PDF (outfile) +1. Load the PDF document using 'apdf.Document(path_infile)'. +1. Open the new image file in binary read mode. +1. Replace the first image on the first page: + - 'document.pages[1].resources.images.replace(1, image_stream)' +1. Save the updated PDF to 'path_outfile'. + +```python + + import aspose.pdf as apdf + from io import FileIO + from os import path + + path_infile = path.join(self.data_dir, infile) + path_image_file = path.join(self.data_dir, image_file) + path_outfile = path.join(self.data_dir, outfile) + + document = apdf.Document(path_infile) + + with FileIO(path_image_file, "rb") as image_stream: + document.pages[1].resources.images.replace(1, image_stream) + + document.save(path_outfile) +``` + +## Replace specific Image + +This example demonstrates how to replace a specific image on a PDF page by locating it via image placement detection. + +1. Load the PDF using 'apdf.Document()'. +1. Create an 'ImagePlacementAbsorber' to collect all image placements on the page. +1. Accept the absorber on the first page ('document.pages[1].accept(absorber)'). +1. Check if any image placements exist on the page. +1. Select the first image placement (absorber.image_placements[1]) and replace it. +1. Save the modified PDF to 'path_outfile'. + +```python + + import aspose.pdf as apdf + from io import FileIO + from os import path + + path_infile = path.join(self.data_dir, infile) + path_image_file = path.join(self.data_dir, image_file) + path_outfile = path.join(self.data_dir, outfile) + + document = apdf.Document(path_infile) + + # Create ImagePlacementAbsorber to find image placements + absorber = apdf.ImagePlacementAbsorber() + + # Accept the absorber for the first page + document.pages[1].accept(absorber) + + # Replace the first image placement found + if len(absorber.image_placements) > 0: + image_placement = absorber.image_placements[1] + with FileIO(path_image_file, "rb") as image_stream: + image_placement.replace(image_stream) + + document.save(path_outfile) +``` \ No newline at end of file diff --git a/en/python-net/advanced-operations/working-with-images/search-and-get-images-from-pdf-document/_index.md b/en/python-net/advanced-operations/working-with-images/search-and-get-images-from-pdf-document/_index.md new file mode 100644 index 000000000..417391924 --- /dev/null +++ b/en/python-net/advanced-operations/working-with-images/search-and-get-images-from-pdf-document/_index.md @@ -0,0 +1,211 @@ +--- +title: Get and Search Images in PDF +linktitle: Get and Search Images +type: docs +weight: 40 +url: /python-net/search-and-get-images-from-pdf-document/ +description: Learn how to search and get images from the PDF document in Python using Aspose.PDF. +lastmod: "2025-09-17" +TechArticle: true +AlternativeHeadline: Searching and Extracting Images from PDF +Abstract: The Aspose.PDF for Python via .NET library offers robust capabilities for searching and extracting images from PDF documents. Utilizing the 'ImagePlacementAbsorber' class, developers can efficiently locate and access images embedded across all pages of a PDF. +--- + +## Inspect Image Placement Properties in a PDF Page + +This example demonstrates how to analyze and display properties of all images on a specific PDF page using Aspose.PDF for Python via .NET. + +1. Create an 'ImagePlacementAbsorber' to collect all images on the page. +1. Call 'document.pages[1].accept(absorber)' to analyze image placements on the first page. +1. Iterate through 'absorber.image_placements' and display key properties of each image: + - Width and Height (points). + - Lower-left X (LLX) and Lower-left Y (LLY) coordinates. + - Horizontal (X) and Vertical (Y) resolution (DPI). + +```python + + import math + import aspose.pdf as ap + from aspose.pycore import cast, is_assignable + from os import path + + path_infile = path.join(self.data_dir, infile) + + document = ap.Document(path_infile) + absorber = ap.ImagePlacementAbsorber() + document.pages[1].accept(absorber) + + for image_placement in absorber.image_placements: + # Display image placement properties for all placements + print("image width: " + str(image_placement.rectangle.width)) + print("image height: " + str(image_placement.rectangle.height)) + print("image LLX: " + str(image_placement.rectangle.llx)) + print("image LLY: " + str(image_placement.rectangle.lly)) + print("image horizontal resolution: " + str(image_placement.resolution.x)) + print("image vertical resolution: " + str(image_placement.resolution.y)) +``` + +## Extract and Count Image Types in a PDF + +This function analyzes all images on the first page of a PDF and counts how many are grayscale and RGB images. + +1. Create an 'ImagePlacementAbsorber' to collect all images on the page. +1. Initialize counters for grayscale and RGB images. +1. Call 'document.pages[1].accept(absorber)' to analyze image placements. +1. Print the total number of images found. +1. Iterate through each image in 'absorber.image_placements': + - Get the image color type using 'image_placement.image.get_color_type()'. + - Increment the corresponding counter (grayscaled or rgb). + - Print a message for each image indicating whether it is grayscale or RGB. + +```python + + import math + import aspose.pdf as ap + from aspose.pycore import cast, is_assignable + from os import path + + path_infile = path.join(self.data_dir, infile) + + document = ap.Document(path_infile) + absorber = ap.ImagePlacementAbsorber() + + # Counters for grayscale and RGB images + grayscaled = 0 + rgb = 0 + + document.pages[1].accept(absorber) + + print("--------------------------------") + print("Total Images = " + str(len(absorber.image_placements))) + + image_counter = 1 + + for image_placement in absorber.image_placements: + # Determine the color type of the image + colorType = image_placement.image.get_color_type() + if colorType == ap.ColorType.GRAYSCALE: + grayscaled += 1 + print(f"Image {image_counter} is Grayscale...") + elif colorType == ap.ColorType.RGB: + rgb += 1 + print(f"Image {image_counter} is RGB...") + image_counter += 1 +``` + +## Extract Detailed Image Information from a PDF + +This function analyzes all images on the first page of a PDF and calculates their scaled dimensions and effective resolution based on the page’s graphics transformations. + +1. Load PDF and initialize variables +1. Collect image resources +1. Process page content operators: + - 'GSave' - push the current CTM onto the stack. + - 'GRestore' - pop the last CTM from the stack. + - 'ConcatenateMatrix' - update the current CTM by multiplying with the operator’s matrix. +1. Print image name, scaled dimensions, and calculated resolution. + +```python + + import math + import aspose.pdf as ap + from aspose.pycore import cast, is_assignable + from os import path + + + path_infile = path.join(self.data_dir, infile) + + document = ap.Document(path_infile) + + default_resolution = 72 + graphics_state = [] + + # Collect image names from page resources + image_names = list(document.pages[1].resources.images.names) + + # Push identity matrix to stack + graphics_state.append(Matrix23(1, 0, 0, 1, 0, 0)) + + # Process operators on first page + for op in document.pages[1].contents: + if is_assignable(op, ap.operators.GSave): + graphics_state.append(graphics_state[-1]) + + elif is_assignable(op, ap.operators.GRestore): + graphics_state.pop() + + elif is_assignable(op, ap.operators.ConcatenateMatrix): + opCM = cast(ap.operators.ConcatenateMatrix, op) + cm = Matrix23( + float(opCM.matrix.a), + float(opCM.matrix.b), + float(opCM.matrix.c), + float(opCM.matrix.d), + float(opCM.matrix.e), + float(opCM.matrix.f), + ) + + graphics_state[-1] = graphics_state[-1] * cm + continue + + elif is_assignable(op, ap.operators.Do): + opDo = cast(ap.operators.Do, op) + if opDo.name in image_names: + last_ctm = graphics_state[-1] + index = image_names.index(opDo.name) + 1 + image = document.pages[1].resources.images[index] + + # Calculate scaled dimensions + scaled_width = math.sqrt(last_ctm.a**2 + last_ctm.b**2) + scaled_height = math.sqrt(last_ctm.c**2 + last_ctm.d**2) + + # Original dimensions + original_width = image.width + original_height = image.height + + # Compute resolution + res_horizontal = original_width * default_resolution / scaled_width + res_vertical = original_height * default_resolution / scaled_height + + # Output info + print( + f"{self.data_dir}image {opDo.name} " + f"({scaled_width:.2f}:{scaled_height:.2f}): " + f"res {res_horizontal:.2f} x {res_vertical:.2f}" + ) +``` + +## Extract Alternative Text from Images in a PDF + +This function retrieves alternative text (alt text) from all images on the first page of a PDF, useful for accessibility and PDF/UA compliance checks. + +1. Load the PDF document using 'ap.Document()'. +1. Create an 'ImagePlacementAbsorber' to collect all images on the page. +1. Accept the absorber on the first page (page.accept(absorber)). +1. Iterate through each image in 'absorber.image_placements': + - Print the name of the image in the page’s resource collection (get_name_in_collection()). + - Retrieve the alternative text using 'get_alternative_text(page)'. + - Print the first line of the alt text. + +```python + + import math + import aspose.pdf as ap + from aspose.pycore import cast, is_assignable + from os import path + + path_infile = path.join(self.data_dir, infile) + + document = ap.Document(path_infile) + absorber = ap.ImagePlacementAbsorber() + page = document.pages[1] + page.accept(absorber) + + for image_placement in absorber.image_placements: + print( + "Name in collection: " + + str(image_placement.image.get_name_in_collection()) + ) + lines = image_placement.image.get_alternative_text(page) + print("Alt Text: " + lines[0]) +``` \ No newline at end of file From 45030dd9668513d3f10f1c5245233aeb2309f8b8 Mon Sep 17 00:00:00 2001 From: AnHolub <68945813+AnHolub@users.noreply.github.com> Date: Thu, 18 Sep 2025 11:21:17 +0300 Subject: [PATCH 32/65] minoe fix --- .../delete-images-from-pdf-file/_index.md | 6 +----- .../extract-images-from-pdf-file/_index.md | 7 ------- .../search-and-get-images-from-pdf-document/_index.md | 2 +- 3 files changed, 2 insertions(+), 13 deletions(-) diff --git a/en/python-net/advanced-operations/working-with-images/delete-images-from-pdf-file/_index.md b/en/python-net/advanced-operations/working-with-images/delete-images-from-pdf-file/_index.md index c7115f82d..af01f8fd9 100644 --- a/en/python-net/advanced-operations/working-with-images/delete-images-from-pdf-file/_index.md +++ b/en/python-net/advanced-operations/working-with-images/delete-images-from-pdf-file/_index.md @@ -12,11 +12,9 @@ Abstract: The article discusses the various reasons for removing images from PDF --- There are many reasons for removing all or specific images from PDFs. - Sometimes a PDF file may contain important images that need to be removed to protect privacy or prevent unauthorized access to certain information. Removing unwanted or redundant images can help reduce file size, making it easier to share or store PDFs. - If necessary, you can reduce the number of pages by removing all images from the document. Also, deleting images from the document will help prepare the PDF for compression or extraction of text information. @@ -43,6 +41,4 @@ The following code snippet shows how to delete an image from a PDF file. document = ap.Document(path_infile) document.pages[1].resources.images.delete(1) document.save(path_outfile) -``` - - +``` \ No newline at end of file diff --git a/en/python-net/advanced-operations/working-with-images/extract-images-from-pdf-file/_index.md b/en/python-net/advanced-operations/working-with-images/extract-images-from-pdf-file/_index.md index efbbbac62..e3efb498d 100644 --- a/en/python-net/advanced-operations/working-with-images/extract-images-from-pdf-file/_index.md +++ b/en/python-net/advanced-operations/working-with-images/extract-images-from-pdf-file/_index.md @@ -39,19 +39,12 @@ Do you need to separate images from your PDF files? For simplified management, a This method extracts images located within a specified rectangular region on a PDF page and saves them as separate files. 1. Load the PDF document using 'ap.Document'. - 1. Create an 'ImagePlacementAbsorber' to collect all images on the first page. - 1. Call 'document.pages[1].accept(absorber)' to analyze image placements. - 1. Iterate through all images in 'absorber.image_placements': - - Get the image bounding box (llx, lly, urx, ury). - - Check if both corners of the image rectangle fall inside the target rectangle (rectangle.contains()). - - If true, save the image to a file using FileIO, replacing 'index' in the filename with a sequential number. - 1. Increment the index for each saved image. ```python diff --git a/en/python-net/advanced-operations/working-with-images/search-and-get-images-from-pdf-document/_index.md b/en/python-net/advanced-operations/working-with-images/search-and-get-images-from-pdf-document/_index.md index 417391924..ac31fbfdd 100644 --- a/en/python-net/advanced-operations/working-with-images/search-and-get-images-from-pdf-document/_index.md +++ b/en/python-net/advanced-operations/working-with-images/search-and-get-images-from-pdf-document/_index.md @@ -91,7 +91,7 @@ This function analyzes all images on the first page of a PDF and counts how many rgb += 1 print(f"Image {image_counter} is RGB...") image_counter += 1 -``` +``` ## Extract Detailed Image Information from a PDF From 920f5a5918afed624032d4d18d3e6c8c77301497 Mon Sep 17 00:00:00 2001 From: AnHolub <68945813+AnHolub@users.noreply.github.com> Date: Thu, 18 Sep 2025 11:24:29 +0300 Subject: [PATCH 33/65] Update en/python-net/converting/convert-pdfx-to-pdf/_index.md Co-authored-by: Andriy Andruhovski --- en/python-net/converting/convert-pdfx-to-pdf/_index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/en/python-net/converting/convert-pdfx-to-pdf/_index.md b/en/python-net/converting/convert-pdfx-to-pdf/_index.md index 612c6bfc7..1133c0adb 100644 --- a/en/python-net/converting/convert-pdfx-to-pdf/_index.md +++ b/en/python-net/converting/convert-pdfx-to-pdf/_index.md @@ -37,7 +37,7 @@ Abstract: The article provides a comprehensive guide on converting PDF/UA, and P print(infile + " converted into " + outfile) ``` -## Convert PDF/UA to PDF and PDF/A-1B +## Removing PDF/UA compliance This function demonstrates a two-step conversion process: first removing PDF/UA (Universal Accessibility) compliance, and then converting the resulting PDF into PDF/A-1B format with automatic tagging for accessibility and semantic structure. From fd5ac18ec7726996bd4072bee6fab5a5fcd96117 Mon Sep 17 00:00:00 2001 From: AnHolub <68945813+AnHolub@users.noreply.github.com> Date: Thu, 18 Sep 2025 11:24:36 +0300 Subject: [PATCH 34/65] Update en/python-net/converting/convert-pdf-to-images-format/_index.md Co-authored-by: Andriy Andruhovski --- en/python-net/converting/convert-pdf-to-images-format/_index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/en/python-net/converting/convert-pdf-to-images-format/_index.md b/en/python-net/converting/convert-pdf-to-images-format/_index.md index 60088d1ba..7425dfb53 100644 --- a/en/python-net/converting/convert-pdf-to-images-format/_index.md +++ b/en/python-net/converting/convert-pdf-to-images-format/_index.md @@ -251,7 +251,7 @@ The following steps and code snippet in Python shows this possibility: ```python from os import path - import aspose.pdf as apdf + import aspose.pdf as ap from io import FileIO From 0f666de3e1e2a6537af46cf070cad1fbe0661a49 Mon Sep 17 00:00:00 2001 From: AnHolub <68945813+AnHolub@users.noreply.github.com> Date: Thu, 18 Sep 2025 11:24:44 +0300 Subject: [PATCH 35/65] Update en/python-net/converting/convert-pdf-to-images-format/_index.md Co-authored-by: Andriy Andruhovski --- .../converting/convert-pdf-to-images-format/_index.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/en/python-net/converting/convert-pdf-to-images-format/_index.md b/en/python-net/converting/convert-pdf-to-images-format/_index.md index 7425dfb53..daaba8ae8 100644 --- a/en/python-net/converting/convert-pdf-to-images-format/_index.md +++ b/en/python-net/converting/convert-pdf-to-images-format/_index.md @@ -258,10 +258,10 @@ The following steps and code snippet in Python shows this possibility: path_infile = path.join(self.data_dir, infile) path_outfile = path.join(self.data_dir, "python", outfile) - document = apdf.Document(path_infile) - resolution = apdf.devices.Resolution(300) + document = ap.Document(path_infile) + resolution = ap.devices.Resolution(300) - rendering_options = apdf.RenderingOptions() + rendering_options = ap.RenderingOptions() rendering_options.default_font_name = "Arial" device = apdf.devices.PngDevice(resolution) From 461c44a8a8b00aa2e210ae490c10e3d17bd30f88 Mon Sep 17 00:00:00 2001 From: AnHolub <68945813+AnHolub@users.noreply.github.com> Date: Thu, 18 Sep 2025 11:24:50 +0300 Subject: [PATCH 36/65] Update en/python-net/converting/convert-pdf-to-images-format/_index.md Co-authored-by: Andriy Andruhovski --- en/python-net/converting/convert-pdf-to-images-format/_index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/en/python-net/converting/convert-pdf-to-images-format/_index.md b/en/python-net/converting/convert-pdf-to-images-format/_index.md index daaba8ae8..397a57090 100644 --- a/en/python-net/converting/convert-pdf-to-images-format/_index.md +++ b/en/python-net/converting/convert-pdf-to-images-format/_index.md @@ -264,7 +264,7 @@ The following steps and code snippet in Python shows this possibility: rendering_options = ap.RenderingOptions() rendering_options.default_font_name = "Arial" - device = apdf.devices.PngDevice(resolution) + device = ap.devices.PngDevice(resolution) device.rendering_options = rendering_options page_count = 1 From ce0042ae60db4f0bc8fe205cd0039bffbdd9cafa Mon Sep 17 00:00:00 2001 From: AnHolub <68945813+AnHolub@users.noreply.github.com> Date: Thu, 18 Sep 2025 11:28:59 +0300 Subject: [PATCH 37/65] Update conversion guide for PDF/x to include PDF/A compliance details --- .../converting/convert-pdfx-to-pdf/_index.md | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/en/python-net/converting/convert-pdfx-to-pdf/_index.md b/en/python-net/converting/convert-pdfx-to-pdf/_index.md index 1133c0adb..fba335122 100644 --- a/en/python-net/converting/convert-pdfx-to-pdf/_index.md +++ b/en/python-net/converting/convert-pdfx-to-pdf/_index.md @@ -41,18 +41,9 @@ Abstract: The article provides a comprehensive guide on converting PDF/UA, and P This function demonstrates a two-step conversion process: first removing PDF/UA (Universal Accessibility) compliance, and then converting the resulting PDF into PDF/A-1B format with automatic tagging for accessibility and semantic structure. -1. Remove PDF/UA Compliance: - - Load the PDF document using 'ap.Document'. - - Call 'remove_pdf_ua_compliance(') to strip all PDF/UA-related compliance settings and metadata. - - Save the resulting PDF to the output path. -1. Convert to PDF/A-1B with Automatic Tagging: - - Define the log file path (_log.xml) to store conversion details. - - Load the PDF document again. - - Create 'PdfFormatConversionOptions' specifying: - - Configure 'AutoTaggingSettings' - - Assign auto-tagging settings to the conversion options. - - Convert the document using 'document.convert()'. - - Save the PDF/A-1B compliant output. +1. Load the PDF document using 'ap.Document()'. +1. Call 'document.remove_pdfa_compliance()' to remove any PDF/A restrictions or compliance settings. +1. Save the modified PDF to 'path_outfile'. ```python From c4c271d33a675f3aa264d39245277d6337da9776 Mon Sep 17 00:00:00 2001 From: AnHolub <68945813+AnHolub@users.noreply.github.com> Date: Mon, 22 Sep 2025 10:11:53 +0300 Subject: [PATCH 38/65] Update en/python-net/converting/convert-html-to-pdf/_index.md Co-authored-by: Andriy Andruhovski --- en/python-net/converting/convert-html-to-pdf/_index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/en/python-net/converting/convert-html-to-pdf/_index.md b/en/python-net/converting/convert-html-to-pdf/_index.md index b18f7dffd..1ed2451aa 100644 --- a/en/python-net/converting/convert-html-to-pdf/_index.md +++ b/en/python-net/converting/convert-html-to-pdf/_index.md @@ -131,7 +131,7 @@ Aspose presents you online free application ["HTML to PDF"](https://products.asp print(infile + " converted into " + outfile) ``` -## Convert HTML to PDF Render Content to same Page +## Render content on single page during HTML to PDF conversion Aspose.PDF for Python via .NET to convert an HTML file into a single-page PDF. From 34d2f3e3d2931e82c2d4bbdcffcad3840de098f3 Mon Sep 17 00:00:00 2001 From: AnHolub <68945813+AnHolub@users.noreply.github.com> Date: Mon, 22 Sep 2025 10:12:07 +0300 Subject: [PATCH 39/65] Update en/python-net/converting/convert-pdf-to-html/_index.md Co-authored-by: Andriy Andruhovski --- en/python-net/converting/convert-pdf-to-html/_index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/en/python-net/converting/convert-pdf-to-html/_index.md b/en/python-net/converting/convert-pdf-to-html/_index.md index 2cc2a4361..01f0850c7 100644 --- a/en/python-net/converting/convert-pdf-to-html/_index.md +++ b/en/python-net/converting/convert-pdf-to-html/_index.md @@ -100,7 +100,7 @@ This function converts a PDF file into multi-page HTML, where each PDF page is e print(infile + " converted into " + outfile) ``` -### Convert PDF to HTML with Stored SVG Images +### Convert PDF to HTML with saving SVG images in specified folder This function converts a PDF into HTML format while storing all images as SVG files in a specified folder, instead of embedding them directly in the HTML. From 7a96828fde3dda178a417f8acee31cc00e175283 Mon Sep 17 00:00:00 2001 From: AnHolub <68945813+AnHolub@users.noreply.github.com> Date: Mon, 22 Sep 2025 10:12:22 +0300 Subject: [PATCH 40/65] Update en/python-net/converting/convert-pdf-to-html/_index.md Co-authored-by: Andriy Andruhovski --- en/python-net/converting/convert-pdf-to-html/_index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/en/python-net/converting/convert-pdf-to-html/_index.md b/en/python-net/converting/convert-pdf-to-html/_index.md index 01f0850c7..ea9ada36a 100644 --- a/en/python-net/converting/convert-pdf-to-html/_index.md +++ b/en/python-net/converting/convert-pdf-to-html/_index.md @@ -124,7 +124,7 @@ This function converts a PDF into HTML format while storing all images as SVG fi print(infile + " converted into " + outfile) ``` -### Convert PDF to HTML with Compressed SVG Images +### Convert PDF to HTML and saving compressed SVG images This snippet converts a PDF into HTML format, storing all images as SVG files in a specified folder and compressing them to reduce file size. From b89982287fbadc76526dda529361909beb1544fd Mon Sep 17 00:00:00 2001 From: AnHolub <68945813+AnHolub@users.noreply.github.com> Date: Mon, 22 Sep 2025 10:12:33 +0300 Subject: [PATCH 41/65] Update en/python-net/converting/convert-pdf-to-html/_index.md Co-authored-by: Andriy Andruhovski --- en/python-net/converting/convert-pdf-to-html/_index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/en/python-net/converting/convert-pdf-to-html/_index.md b/en/python-net/converting/convert-pdf-to-html/_index.md index ea9ada36a..d906b8a05 100644 --- a/en/python-net/converting/convert-pdf-to-html/_index.md +++ b/en/python-net/converting/convert-pdf-to-html/_index.md @@ -175,7 +175,7 @@ This snippet converts a PDF into HTML format, embedding raster images as PNG pag print(infile + " converted into " + outfile) ``` -### Convert PDF to HTML (Body Content Only, Multi-Page) +### Convert PDF to Body-Only content HTML page This function converts a PDF into HTML format, generating only the 'body' content without extra 'html' or 'head' tags, and splits the output into separate pages. From c748105683f8b92fe18950062cde7b430e893f69 Mon Sep 17 00:00:00 2001 From: AnHolub <68945813+AnHolub@users.noreply.github.com> Date: Mon, 22 Sep 2025 10:12:43 +0300 Subject: [PATCH 42/65] Update en/python-net/converting/convert-pdf-to-images-format/_index.md Co-authored-by: Andriy Andruhovski --- en/python-net/converting/convert-pdf-to-images-format/_index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/en/python-net/converting/convert-pdf-to-images-format/_index.md b/en/python-net/converting/convert-pdf-to-images-format/_index.md index 52468db91..d23758d9c 100644 --- a/en/python-net/converting/convert-pdf-to-images-format/_index.md +++ b/en/python-net/converting/convert-pdf-to-images-format/_index.md @@ -146,7 +146,7 @@ The following steps and code snippet in Python shows this possibility: * [JpegDevice](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/jpegdevice/) (to convert PDF to JPG) * [PngDevice](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/pngdevice/) (to convert PDF to PNG) * [GifDevice](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/gifdevice/) (to convert PDF to GIF) -3. Call the [device.process()](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/imagedevice/#methods) method to perform PDF to Image conversion. +3. Call the [ImageDevice.process()](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/imagedevice/#methods) method to perform PDF to Image conversion. ### Convert PDF to BMP From 71a0e82813660d806cb6f2bb3c0615fff42bb5ff Mon Sep 17 00:00:00 2001 From: AnHolub <68945813+AnHolub@users.noreply.github.com> Date: Mon, 22 Sep 2025 10:13:25 +0300 Subject: [PATCH 43/65] Update en/python-net/converting/convert-pdf-to-html/_index.md Co-authored-by: Andriy Andruhovski --- en/python-net/converting/convert-pdf-to-html/_index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/en/python-net/converting/convert-pdf-to-html/_index.md b/en/python-net/converting/convert-pdf-to-html/_index.md index d906b8a05..58ec2853a 100644 --- a/en/python-net/converting/convert-pdf-to-html/_index.md +++ b/en/python-net/converting/convert-pdf-to-html/_index.md @@ -151,7 +151,7 @@ This snippet converts a PDF into HTML format, storing all images as SVG files in print(infile + " converted into " + outfile) ``` -### Convert PDF to HTML with Embedded Raster Images +### Convert PDF to HTML with control of Embedded Raster Images This snippet converts a PDF into HTML format, embedding raster images as PNG page backgrounds. This approach preserves image quality and page layout within the HTML. From d31da15c1416efee308587d6be78893323620902 Mon Sep 17 00:00:00 2001 From: AnHolub <68945813+AnHolub@users.noreply.github.com> Date: Mon, 22 Sep 2025 10:13:37 +0300 Subject: [PATCH 44/65] Update en/python-net/converting/convert-pdf-to-html/_index.md Co-authored-by: Andriy Andruhovski --- en/python-net/converting/convert-pdf-to-html/_index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/en/python-net/converting/convert-pdf-to-html/_index.md b/en/python-net/converting/convert-pdf-to-html/_index.md index 58ec2853a..46fc09118 100644 --- a/en/python-net/converting/convert-pdf-to-html/_index.md +++ b/en/python-net/converting/convert-pdf-to-html/_index.md @@ -177,7 +177,7 @@ This snippet converts a PDF into HTML format, embedding raster images as PNG pag ### Convert PDF to Body-Only content HTML page -This function converts a PDF into HTML format, generating only the 'body' content without extra 'html' or 'head' tags, and splits the output into separate pages. +This function converts a PDF into HTML format, generating 'body-only' content without extra 'html' or 'head' tags, and splits the output into separate pages. 1. Load the PDF document using 'apdf.Document'. 1. Create 'HtmlSaveOptions' and configure: From 54af07347d0ecd63a356206c9363b7d909bdac6e Mon Sep 17 00:00:00 2001 From: AnHolub <68945813+AnHolub@users.noreply.github.com> Date: Mon, 22 Sep 2025 10:13:44 +0300 Subject: [PATCH 45/65] Update en/python-net/converting/convert-pdf-to-html/_index.md Co-authored-by: Andriy Andruhovski --- en/python-net/converting/convert-pdf-to-html/_index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/en/python-net/converting/convert-pdf-to-html/_index.md b/en/python-net/converting/convert-pdf-to-html/_index.md index 46fc09118..df877a0bc 100644 --- a/en/python-net/converting/convert-pdf-to-html/_index.md +++ b/en/python-net/converting/convert-pdf-to-html/_index.md @@ -54,7 +54,7 @@ Aspose.PDF for Python presents you online free application ["PDF to HTML"](https print(infile + " converted into " + outfile) ``` -### Convert PDF to HTML with Stored Images +### Convert PDF to HTML with saving images in the specified folder This function converts a PDF file into HTML format using Aspose.PDF for Python via .NET. All extracted images are stored in a specified folder instead of being embedded in the HTML file. From 578a9c68c26707e31d54f899f68664f81d6ddb9f Mon Sep 17 00:00:00 2001 From: AnHolub <68945813+AnHolub@users.noreply.github.com> Date: Mon, 22 Sep 2025 10:44:33 +0300 Subject: [PATCH 46/65] Refactor documentation for PDF conversion in Python --- .../converting/convert-html-to-pdf/_index.md | 59 ++++--- .../convert-images-format-to-pdf/_index.md | 149 +++++------------- .../convert-other-files-to-pdf/_index.md | 135 +++++++++------- .../converting/convert-pdf-to-excel/_index.md | 98 +----------- .../converting/convert-pdf-to-html/_index.md | 42 ++--- .../convert-pdf-to-images-format/_index.md | 71 ++------- .../convert-pdf-to-other-files/_index.md | 23 +-- .../convert-pdf-to-other-files/pdf_to_xml.png | Bin 28069 -> 0 bytes .../convert-pdf-to-powerpoint/_index.md | 47 +----- .../converting/convert-pdf-to-word/_index.md | 61 +------ 10 files changed, 186 insertions(+), 499 deletions(-) delete mode 100644 en/python-net/converting/convert-pdf-to-other-files/pdf_to_xml.png diff --git a/en/python-net/converting/convert-html-to-pdf/_index.md b/en/python-net/converting/convert-html-to-pdf/_index.md index b18f7dffd..9a9377331 100644 --- a/en/python-net/converting/convert-html-to-pdf/_index.md +++ b/en/python-net/converting/convert-html-to-pdf/_index.md @@ -14,17 +14,6 @@ AlternativeHeadline: How to convert HTML to PDF using Aspose.PDF for Python Abstract: Aspose.PDF for Python via .NET offers a robust solution for creating PDF files from web pages and raw HTML code within applications. This article provides a guide on converting HTML to PDF using Python, outlining the use of Aspose.PDF for Python, a PDF manipulation API that enables seamless conversion of HTML documents to PDF format. The conversion process can be customized as needed. The article includes a Python code sample demonstrating the conversion process, which involves creating an instance of the HtmlLoadOptions class, initializing a Document object, and saving the output PDF document using the Document.Save() method. Additionally, Aspose offers an online tool for converting HTML to PDF, allowing users to explore the functionality and quality of the conversion process. --- -## Overview - -Aspose.PDF for Python via .NET is a professional solution that allows you to create PDF files from web pages and raw HTML code in your applications. - -This article explains how to **convert HTML to PDF using Python**. It covers the following topics. - -_Format_: **HTML** -- [Python HTML to PDF](#python-html-to-pdf) -- [Python Convert HTML to PDF](#python-html-to-pdf) -- [Python How to convert HTML to PDF](#python-html-to-pdf) - ## Python HTML to PDF Conversion **Aspose.PDF for Python** is a PDF manipulation API that lets you convert any existing HTML documents to PDF seamlessly. The process of converting HTML to PDF can be flexibly customized. @@ -64,9 +53,11 @@ Aspose presents you online free application ["HTML to PDF"](https://products.asp ## Convert HTML to PDF media type -1. Create an instance of the [HtmlLoadOptions()](https://reference.aspose.com/pdf/python-net/aspose.pdf/htmlloadoptions/) class. 'html_media_type' applies CSS rules intended for on-screen display. -1. Load and convert HTML. -1. Save output PDF document by calling **document.save()** method. +This example shows how to convert an HTML file to PDF using Aspose.PDF for Python via .NET with specific rendering options. + +1. Create an instance of the [HtmlLoadOptions()](https://reference.aspose.com/pdf/python-net/aspose.pdf/htmlloadoptions/) class. The 'html_media_type' applies CSS rules intended for on-screen display. The 'html_media_type' property can have multiple values. You can set it to HtmlMediaType.SCREEN or HtmlMediaType.PRINT. +1. Load the HTML into an ap.Document using the load options. +1. Save the document as a PDF. ```python @@ -87,9 +78,12 @@ Aspose presents you online free application ["HTML to PDF"](https://products.asp ## Convert HTML to PDF priority CSS Page Rule +Some documents may contain layout settings that utilize the Page rule, which can create ambiguity when generating the layout. You can control the priority using the 'is_priority_css_page_rule' property. If this property is set to 'True', the CSS rule is applied first. + 1. Create an instance of the [HtmlLoadOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/htmlloadoptions/) class. -1. Optionally sets how CSS is applied. The commented line 'is_priority_css_page_rule' can override how CSS rules affect page layout. -1. Converts and saves it as a PDF. +1. Set 'is_priority_css_page_rule = False' to disable prioritizing @page CSS rules, allowing other styles to take precedence. +1. Load the HTML into an ap.Document with the configured options. +1. Save the document as a PDF. ```python @@ -108,11 +102,14 @@ Aspose presents you online free application ["HTML to PDF"](https://products.asp print(infile + " converted into " + outfile) ``` -## Convert HTML to PDF Embed Fonts +## Convert HTML to PDF with Embeded Fonts + +This example shows how to convert an HTML file to PDF while embedding fonts. If you need a PDF document with Embedded Fonts, you should set 'is_embed_fonts' to True. -1. Loads an HTML file. -1. Embeds fonts into the resulting PDF to preserve appearance across devices. -1. Saves the PDF to a specified output location. +1. Create 'HtmlLoadOptions()' to configure HTML to PDF conversion. +1. Set 'is_embed_fonts = True' to ensure that all fonts used in the HTML are embedded directly into the PDF, preserving visual fidelity. +1. Load the HTML into an ap.Document with these options. +1. Save the document as a PDF. ```python @@ -131,13 +128,15 @@ Aspose presents you online free application ["HTML to PDF"](https://products.asp print(infile + " converted into " + outfile) ``` -## Convert HTML to PDF Render Content to same Page +## Render Content on single Page during HTML to PDF conversion -Aspose.PDF for Python via .NET to convert an HTML file into a single-page PDF. +This example demonstrates how to convert an HTML file into a single-page PDF using Aspose.PDF for Python. +You can display all content on one page using the 'is_render_to_single_page property'. -1. Loads an HTML file. -1. Configures Aspose to render everything on a single PDF page. -1. Saves the PDF to a specified location. +1. Create an instance of 'HtmlLoadOptions()' to configure the conversion process. +1. Enable 'is_render_to_single_page' to render the entire HTML content onto a single continuous PDF page. +1. Load the document with the configured options into an 'ap.Document'. +1. Save the result as a PDF file. ```python @@ -158,12 +157,12 @@ Aspose.PDF for Python via .NET to convert an HTML file into a single-page PDF. ## Convert MHTML to PDF -Loads an MHT (MHTML) file from disk and converts it into a PDF using Aspose.PDF for Pyhton via .NET. Allows specifying custom page dimensions to ensure a consistent layout. With this operation, you can support embedded HTML, CSS, and images in MHT files, as well as Configurable page width and height, and save the converted document to a specified output folder. +This example shows how to convert an MHT (MHTML) file into a PDF document using Aspose.PDF for Python with specific page dimensions. -1. Loads an MHT (MHTML) file. -1. Sets the page size for the converted document. -1. Converts the content into PDF (or another format) using Aspose.PDF. -1. Saves the file to a specific folder and prints a confirmation. +1. Create an instance of ap.MhtLoadOptions() to configure MHT file processing. +1. Set various parameters, such as page size. +1. Initialize the document with the input file and configured loading options. +1. Save the resulting document as a PDF. ```python diff --git a/en/python-net/converting/convert-images-format-to-pdf/_index.md b/en/python-net/converting/convert-images-format-to-pdf/_index.md index a425e0e7d..f74e8342b 100644 --- a/en/python-net/converting/convert-images-format-to-pdf/_index.md +++ b/en/python-net/converting/convert-images-format-to-pdf/_index.md @@ -11,53 +11,9 @@ sitemap: priority: 0.5 TechArticle: true AlternativeHeadline: How to Convert Images to PDF in Python -Abstract: This article provides a comprehensive guide on converting various image formats to PDF using Python, specifically leveraging the Aspose.PDF library for Python via .NET. The article covers a range of image formats including BMP, CGM, DICOM, EMF, GIF, PNG, SVG, and TIFF. Each section details the steps required to perform the conversion, providing code snippets to illustrate the process. For example, converting BMP to PDF involves creating a new PDF document, defining image placement, inserting the image, and saving the document. Similarly, for formats like CGM, DICOM, and others, specific load options and processing steps are outlined. The article also highlights the advantages of using Aspose.PDF for such tasks, such as its support for different encoding methods and the ability to process both single-frame and multi-frame images. Additionally, it references online tools provided by Aspose for users to try out these conversions without code. Each format is discussed with its unique characteristics and requirements for conversion, providing a +Abstract: This article provides a comprehensive guide on converting various image formats to PDF using Python, specifically leveraging the Aspose.PDF library for Python via .NET. The article covers a range of image formats including BMP, CGM, DICOM, EMF, GIF, PNG, SVG, and TIFF. Each section details the steps required to perform the conversion, providing code snippets to illustrate the process. For example, converting BMP to PDF involves creating a new PDF document, defining image placement, inserting the image, and saving the document. Similarly, for formats like CGM, DICOM, and others, specific load options and processing steps are outlined. The article also highlights the advantages of using Aspose.PDF for such tasks, such as its support for different encoding methods and the ability to process both single-frame and multi-frame images. --- -## Overview - -This article explains how to convert various Images formats to PDF using Python. It covers these topics. - -_Format_: **BMP** -- [Python BMP to PDF](#python-bmp-to-pdf) -- [Python Convert BMP to PDF](#python-bmp-to-pdf) -- [Python How to convert BMP image to PDF](#python-bmp-to-pdf) - -_Format_: **CGM** -- [Python CGM to PDF](#python-cgm-to-pdf) -- [Python Convert CGM to PDF](#python-cgm-to-pdf) -- [Python How to convert CGM image to PDF](#python-cgm-to-pdf) - -_Format_: **DICOM** -- [Python DICOM to PDF](#python-dicom-to-pdf) -- [Python Convert DICOM to PDF](#python-dicom-to-pdf) -- [Python How to convert DICOM image to PDF](#python-dicom-to-pdf) - -_Format_: **EMF** -- [Python EMF to PDF](#python-emf-to-pdf) -- [Python Convert EMF to PDF](#python-emf-to-pdf) -- [Python How to convert EMF image to PDF](#python-emf-to-pdf) - -_Format_: **GIF** -- [Python GIF to PDF](#python-gif-to-pdf) -- [Python Convert GIF to PDF](#python-gif-to-pdf) -- [Python How to convert GIF image to PDF](#python-gif-to-pdf) - -_Format_: **PNG** -- [Python PNG to PDF](#python-png-to-pdf) -- [Python Convert PNG to PDF](#python-png-to-pdf) -- [Python How to convert PNG image to PDF](#python-png-to-pdf) - -_Format_: **SVG** -- [Python SVG to PDF](#python-svg-to-pdf) -- [Python Convert SVG to PDF](#python-svg-to-pdf) -- [Python How to convert SVG image to PDF](#python-svg-to-pdf) - -_Format_: **TIFF** -- [Python TIFF to PDF](#python-tiff-to-pdf) -- [Python Convert TIFF to PDF](#python-tiff-to-pdf) -- [Python How to convert TIFF image to PDF](#python-tiff-to-pdf) - ## Python Images to PDF Conversions **Aspose.PDF for Python via .NET** allows you to convert different formats of images to PDF files. Our library demonstrates code snippets for converting the most popular image formats, such as - BMP, CGM, DICOM, EMF, JPG, PNG, SVG and TIFF formats. @@ -66,16 +22,16 @@ _Format_: **TIFF** Convert BMP files to PDF document using **Aspose.PDF for Python via .NET** library. -BMP images are Files having extension. BMP represent Bitmap Image files that are used to store bitmap digital images. These images are independent of graphics adapter and are also called device independent bitmap (DIB) file format. +BMP images are Files having extension. BMP represent Bitmap Image files that are used to store bitmap digital images. These images are independent of graphics adapter and are also called device independent bitmap (DIB) file format. You can convert BMP to PDF files with Aspose.PDF for Python via .NET API. Therefore, you can follow the following steps to convert BMP images: Steps to Convert BMP to PDF in Python: -1. Creates an empty PDF document. -1. Adds a new A4-sized page. +1. Create an empty PDF document. +1. Create the page you need, for example, we created A4, but you can specify your own format. 1. Places the image (from infile) inside the page using the defined rectangle. -1. Saves the document as PDF. +1. Save the document as PDF. So the following code snippet follows these steps and shows how to convert BMP to PDF using Python: @@ -109,9 +65,9 @@ Aspose presents you online free application ["BMP to PDF"](https://products.aspo ## Convert CGM to PDF -Converts a CGM (Computer Graphics Metafile) into PDF (or another supported format) using Aspose.PDF for Python via .NET. +Convert a CGM (Computer Graphics Metafile) into PDF (or another supported format) using Aspose.PDF for Python via .NET. -CGM is a file extension for a Computer Graphics Metafile format commonly used in CAD (computer-aided design) and presentation graphics applications. CGM is a vector graphics format that supports three different encoding methods: binary (best for program read speed), character-based (produces the smallest file size and allows for faster data transfers) or cleartext encoding (allows users to read and modify the file with a text editor). +CGM is a file extension for a Computer Graphics Metafile format commonly used in CAD (computer-aided design) and presentation graphics applications. CGM is a vector graphics format that supports three different encoding methods: binary (best for program read speed), character-based (produces the smallest file size and allows for faster data transfers) or cleartext encoding (allows users to read and modify the file with a text editor). Check next code snippet for converting CGM files to PDF format. @@ -144,7 +100,7 @@ Steps to Convert CGM to PDF in Python: ## Convert DICOM to PDF -DICOM format is the medical industry standard for the creation, storage, transmission, and visualization of digital medical images and documents of examined patients. +DICOM format is the medical industry standard for the creation, storage, transmission, and visualization of digital medical images and documents of examined patients. **Aspose.PDF for Python** allows you to convert DICOM and SVG images, but for technical reasons to add images you need to specify the type of file to be added to PDF. @@ -152,8 +108,23 @@ The following code snippet shows how to convert DICOM files to PDF format with A 1. Load the DICOM file. 1. Extract image dimensions. -1. Print image size. 1. Create a new PDF document. + +мы используем дополнительную библиотеку pydicom чтобы установить размеры данного изображения. Если выхотите прост орасположить изображение на страницу, то данный код можно пропустить (под ) + +```python + + # Load the DICOM file + dicom_file = pydicom.dcmread(path_infile) + + # Get the dimensions of the image + rows = dicom_file.Rows + columns = dicom_file.Columns + + # Print the dimensions + print(f"DICOM image size: {rows} x {columns} pixels") +``` + 1. Prepare the DICOM image for PDF. 1. Set PDF page size and margins. 1. Add the image to the page. @@ -215,7 +186,7 @@ Aspose presents you online free application ["DICOM to PDF"](https://products.as ## Convert EMF to PDF -EMF stores graphical images device-independently. Metafiles of EMF comprises of variable-length records in chronological order that can render the stored image after parsing on any output device. +EMF stores graphical images device-independently. Metafiles of EMF comprises of variable-length records in chronological order that can render the stored image after parsing on any output device. The following code snippet shows how to convert an EMF to PDF with Python: @@ -255,7 +226,7 @@ Aspose presents you online free application ["EMF to PDF"](https://products.aspo Convert GIF files to PDF document using **Aspose.PDF for Python via .NET** library. -GIF is able to store compressed data without loss of quality in a format of no more than 256 colors. The hardware-independent GIF format was developed in 1987 (GIF87a) by CompuServe for transmitting bitmap images over networks. +GIF is able to store compressed data without loss of quality in a format of no more than 256 colors. The hardware-independent GIF format was developed in 1987 (GIF87a) by CompuServe for transmitting bitmap images over networks. So the following code snippet follows these steps and shows how to convert BMP to PDF using Python: @@ -293,14 +264,14 @@ Aspose presents you online free application ["GIF to PDF"](https://products.aspo **Aspose.PDF for Python via .NET** support feature to convert PNG images to PDF format. Check the next code snippet for realizing you task. -PNG refers to a type of raster image file format that use loseless compression, that makes it popular among its users. +PNG refers to a type of raster image file format that use loseless compression, that makes it popular among its users. You can convert PNG to PDF image using the below steps: -1. Create a New PDF Document -1. Define Image Placement -1. Save the PDF -1. Print Conversion Message +1. Create a New PDF Document. +1. Define Image Placement. +1. Save the PDF. +1. Print Conversion Message. Moreover, the code snippet below shows how to convert PNG to PDF with Python: @@ -336,7 +307,7 @@ Aspose presents you online free application ["PNG to PDF"](https://products.aspo ## Convert SVG to PDF -**Aspose.PDF for Python via .NET** explains how to convert SVG images to PDF format and how to get dimensions of the source SVG file. +**Aspose.PDF for Python via .NET** explains how to convert SVG images to PDF format and how to get dimensions of the source SVG file. Scalable Vector Graphics (SVG) is a family of specifications of an XML-based file format for two-dimensional vector graphics, both static and dynamic (interactive or animated). The SVG specification is an open standard that has been under development by the World Wide Web Consortium (W3C) since 1999. @@ -374,7 +345,7 @@ The following code snippet shows the process of converting SVG file into PDF for ## Convert TIFF to PDF -**Aspose.PDF** file format supported, be it a single frame or multi-frame TIFF image. It means that you can convert the TIFF image to PDF. +**Aspose.PDF** file format supported, be it a single frame or multi-frame TIFF image. It means that you can convert the TIFF image to PDF. TIFF or TIF, Tagged Image File Format, represents raster images that are meant for usage on a variety of devices that comply with this file format standard. TIFF image can contain several frames with different images. Aspose.PDF file format is also supported, be it a single frame or multi-frame TIFF image. @@ -404,11 +375,11 @@ You can convert TIFF to PDF in the same manner as the rest raster file formats g ## Convert CDR to PDF -Next code snippet shows how to load a CorelDRAW (CDR) file and save it as a PDF using 'CdrLoadOptions' in Aspose.PDF for Python via .NET. +Following code snippet shows how to load a CorelDRAW (CDR) file and save it as a PDF using 'CdrLoadOptions' in Aspose.PDF for Python via .NET. -1. Initialize load options for CDR format. -1. Load CDR file into Document object. -1. Save the document in PDF format. +1. Create 'CdrLoadOptions()' to configure how the CDR file should be loaded. +1. Initialize a Document object with the CDR file and load options. +1. Save the document as a PDF. ```python @@ -432,7 +403,7 @@ Next code snippet shows how to load a CorelDRAW (CDR) file and save it as a PDF ## Convert JPEG to PDF -This example shows how to create a new PDF document, add a blank A4-sized page, and insert an image into it using Aspose.PDF for Python via .NET. +This example shows how to convert Jpeg to PDF file using Aspose.PDF for Python via .NET. 1. Create a new PDF document. 1. Add a new page. @@ -461,47 +432,3 @@ This example shows how to create a new PDF document, add a blank A4-sized page, document.save(path_outfile) print(infile + " converted into " + outfile) ``` - -## See Also - -This article also covers these topics. The codes are same as above. - -_Format_: **BMP** -- [Python BMP to PDF Code](#python-bmp-to-pdf) -- [Python BMP to PDF API](#python-bmp-to-pdf) -- [Python BMP to PDF Programmatically](#python-bmp-to-pdf) -- [Python BMP to PDF Library](#python-bmp-to-pdf) -- [Python Save BMP as PDF](#python-bmp-to-pdf) -- [Python Generate PDF from BMP](#python-bmp-to-pdf) -- [Python Create PDF from BMP](#python-bmp-to-pdf) -- [Python BMP to PDF Converter](#python-bmp-to-pdf) - -_Format_: **CGM** -- [Python CGM to PDF Code](#python-cgm-to-pdf) -- [Python CGM to PDF API](#python-cgm-to-pdf) -- [Python CGM to PDF Programmatically](#python-cgm-to-pdf) -- [Python CGM to PDF Library](#python-cgm-to-pdf) -- [Python Save CGM as PDF](#python-cgm-to-pdf) -- [Python Generate PDF from CGM](#python-cgm-to-pdf) -- [Python Create PDF from CGM](#python-cgm-to-pdf) -- [Python CGM to PDF Converter](#python-cgm-to-pdf) - -_Format_: **DICOM** -- [Python DICOM to PDF Code](#python-dicom-to-pdf) -- [Python DICOM to PDF API](#python-dicom-to-pdf) -- [Python DICOM to PDF Programmatically](#python-dicom-to-pdf) -- [Python DICOM to PDF Library](#python-dicom-to-pdf) -- [Python Save DICOM as PDF](#python-dicom-to-pdf) -- [Python Generate PDF from DICOM](#python-dicom-to-pdf) -- [Python Create PDF from DICOM](#python-dicom-to-pdf) -- [Python DICOM to PDF Converter](#python-dicom-to-pdf) - -_Format_: **EMF** -- [Python EMF to PDF Code](#python-emf-to-pdf) -- [Python EMF to PDF API](#python-emf-to-pdf) -- [Python EMF to PDF Programmatically](#python-emf-to-pdf) -- [Python EMF to PDF Library](#python-emf-to-pdf) -- [Python Save EMF as PDF](#python-emf-to-pdf) -- [Python Generate PDF from EMF](#python-emf-to-pdf) -- [Python Create PDF from EMF](#python-emf-to-pdf) -- [Python EMF to PDF Converter](#python-emf-to-pdf) diff --git a/en/python-net/converting/convert-other-files-to-pdf/_index.md b/en/python-net/converting/convert-other-files-to-pdf/_index.md index d44bb5e5b..8e2091a25 100644 --- a/en/python-net/converting/convert-other-files-to-pdf/_index.md +++ b/en/python-net/converting/convert-other-files-to-pdf/_index.md @@ -14,53 +14,11 @@ AlternativeHeadline: How to Convert other file formats to PDF in Python Abstract: This article provides a comprehensive guide on converting various file formats to PDF using Python, leveraging the capabilities of Aspose.PDF for Python via .NET. The document outlines conversion processes for several formats, including EPUB, Markdown, PCL, Text, XPS, PostScript, XML, XSL-FO, and LaTeX/TeX. Each section provides specific code snippets and instructions for implementing these conversions. The article emphasizes the utility of Aspose.PDF's features, such as load options tailored for each file type, to ensure accurate and efficient conversion. Additionally, it highlights the availability of free online conversion applications for users to explore the functionality firsthand. The guide serves as a practical resource for developers seeking to integrate PDF conversion capabilities into their Python applications. --- -## Overview - This article explains how to **convert various other types of file formats to PDF using Python**. It covers the following topics. -_Format_: **OFD** -- [Python OFD to PDF](#python-convert-ofd-to-pdf) -- [Python Convert OFD to PDF](#python-convert-ofd-to-pdf) -- [Python How to convert OFD file to PDF](#python-convert-ofd-to-pdf) - -_Format_: **EPUB** -- [Python EPUB to PDF](#python-convert-epub-to-pdf) -- [Python Convert EPUB to PDF](#python-convert-epub-to-pdf) -- [Python How to convert EPUB file to PDF](#python-convert-epub-to-pdf) - -_Format_: **Markdown** -- [Python Markdown to PDF](#python-convert-markdown-to-pdf) -- [Python Convert Markdown to PDF](#python-convert-markdown-to-pdf) -- [Python How to convert Markdown file to PDF](#python-convert-markdown-to-pdf) - -_Format_: **MD** -- [Python MD to PDF](#python-convert-md-to-pdf) -- [Python Convert MD to PDF](#python-convert-md-to-pdf) -- [Python How to convert MD file to PDF](#python-convert-md-to-pdf) - -_Format_: **PCL** -- [Python PCL to PDF](#python-convert-pcl-to-pdf) -- [Python Convert PCL to PDF](#python-convert-pcl-to-pdf) -- [Python How to convert PCL file to PDF](#python-convert-pcl-to-pdf) - -_Format_: **Text** -- [Python Text to PDF](#python-convert-text-to-pdf) -- [Python Convert Text to PDF](#python-convert-text-to-pdf) -- [Python How to convert Text file to PDF](#python-convert-text-to-pdf) - -_Format_: **TXT** -- [Python TXT to PDF](#python-convert-txt-to-pdf) -- [Python Convert TXT to PDF](#python-convert-txt-to-pdf) -- [Python How to convert TXT file to PDF](#python-convert-txt-to-pdf) - -_Format_: **XPS** -- [Python XPS to PDF](#python-convert-xps-to-pdf) -- [Python Convert XPS to PDF](#python-convert-xps-to-pdf) -- [Python How to convert XPS file to PDF](#python-convert-xps-to-pdf) - ## Convert OFD to PDF -OFD stands for Open Fixed-layout Document (sometimes called Open Fixed Document format). It is a Chinese national standard (GB/T 33190-2016) for electronic documents, introduced as an alternative to PDF. +OFD stands for Open Fixed-layout Document (also called Open Fixed Document format). It is a Chinese national standard (GB/T 33190-2016) for electronic documents, introduced as an alternative to PDF. Steps Convert OFD to PDF in Python: @@ -83,9 +41,11 @@ Steps Convert OFD to PDF in Python: print(infile + " converted into " + outfile) ``` -## Convert LaTeX/TeX to PDF +## Convert LaTeX/TeX to PDF! + +The LaTeX file format is a text file format with markup in the LaTeX derivative of the TeX family of languages and LaTeX is a derived format of the TeX system. LaTeX (ˈleɪtɛk/lay-tek or lah-tek) is a document preparation system and document markup language. It is widely used for the communication and publication of scientific documents in many fields, including mathematics, physics, and computer science. It also has a prominent role in the preparation and publication of books and articles that contain complex multilingual materials, such as корейкие , японские ,китайские символы -The LaTeX file format is a text file format with markup in the LaTeX derivative of the TeX family of languages and LaTeX is a derived format of the TeX system. LaTeX (ˈleɪtɛk/lay-tek or lah-tek) is a document preparation system and document markup language. It is widely used for the communication and publication of scientific documents in many fields, including mathematics, physics, and computer science. It also has a prominent role in the preparation and publication of books and articles that contain complex multilingual materials, such as Sanskrit and Arabic, including critical editions. LaTeX uses the TeX typesetting program for formatting its output, and is itself written in the TeX macro language. + and Arabic, including specific editions. LaTeX uses the TeX typesetting program for formatting its output, and is itself written in the TeX macro language. {{% alert color="success" %}} **Try to convert LaTeX/TeX to PDF online** @@ -120,7 +80,7 @@ Steps Convert TEX to PDF in Python: **Aspose.PDF for Python via .NET** allows you simply convert EPUB files to PDF format. -EPUB (short for electronic publication) is a free and open e-book standard from the International Digital Publishing Forum (IDPF). Files have the extension .epub. EPUB is designed for reflowable content, meaning that an EPUB reader can optimize text for a particular display device. +EPUB (short for electronic publication) is a free and open e-book standard from the International Digital Publishing Forum (IDPF). Files have the extension .epub. EPUB is designed for reflowable content, meaning that an EPUB reader can optimize text for a particular display device. EPUB also supports fixed-layout content. The format is intended as a single format that publishers and conversion houses can use in-house, as well as for distribution and sale. It supersedes the Open eBook standard.The version EPUB 3 is also endorsed by the Book Industry Study Group (BISG), a leading book trade association for standardized best practices, research, information and events, for packaging of content. @@ -187,7 +147,7 @@ The following code snippet shows how to use this functionality with Aspose.PDF l ## Convert PCL to PDF -PCL (Printer Command Language) is a Hewlett-Packard printer language developed to access standard printer features. PCL levels 1 through 5e/5c are command based languages using control sequences that are processed and interpreted in the order they are received. At a consumer level, PCL data streams are generated by a print driver. PCL output can also be easily generated by custom applications. +PCL (Printer Command Language) is a Hewlett-Packard printer language developed to access standard printer features. PCL levels 1 through 5e/5c are command based languages using control sequences that are processed and interpreted in the order they are received. At a consumer level, PCL data streams are generated by a print driver. PCL output can also be easily generated by custom applications. {{% alert color="success" %}} **Try to convert PCL to PDF online** @@ -224,7 +184,7 @@ Steps Convert PCL to PDF in Python: print(infile + " converted into " + outfile) ``` -## Convert Text to PDF +## Convert Preformatted Text to PDF **Aspose.PDF for Python via .NET** support the feature converting plain text and pre-formatted text file to PDF format. @@ -240,12 +200,12 @@ Aspose.PDF for Python via .NET presents you online free application ["Text to PD Steps Convert TEXT to PDF in Python: -1. Read the text file. -1. Set up font. -1. Create PDF and first page. -1. Page formatting. -1. Loop through lines. -1. Save PDF. +1. Read the input text file line by line. +1. Set up a monospaced font (Courier New) for consistent text alignment. +1. Create a new PDF Document and add the first page with custom margins and font settings. +1. Iterate through lines of the text file To simulate Typewriter, we use the 'monospace_font' font and size 12. +1. Limit page creation to 4 pages. +1. Save the final PDF to the specified path. ```python @@ -314,7 +274,7 @@ The following code snippet shows the process of converting XPS file into PDF for {{% alert color="success" %}} **Try to convert XPS format to PDF online** -Aspose.PDF for Python via .NET presents you online free appliPostScriptcation ["XPS to PDF"](https://products.aspose.app/pdf/conversion/xps-to-pdf/), where you may try to investigate the functionality and quality it works. +Aspose.PDF for Python via .NET presents you online free application ["XPS to PDF"](https://products.aspose.app/pdf/conversion/xps-to-pdf/), where you may try to investigate the functionality and quality it works. [![Aspose.PDF Convertion XPS to PDF with Free App](xps_to_pdf.png)](https://products.aspose.app/pdf/conversion/xps-to-pdf/) {{% /alert %}} @@ -341,3 +301,68 @@ Following code snippet can be used to convert a XSLFO to PDF format with Aspose. print(xmlfile + " converted into " + outfile) ``` + +## Convert PostScript to PDF + +This example demonstrates how to convert a PostScript file into a PDF document using Aspose.PDF for Python via .NET. + +1. Create an instance of 'PsLoadOptions' to correctly interpret the PS file. +1. Load the 'PostScript' file into a Document object using the load options. +1. Save the document in PDF format to the desired output path. + +```python + + from os import path + import aspose.pdf as ap + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) + + load_options = ap.PsLoadOptions() + + document = ap.Document(path_infile, load_options) + document.save(path_outfile) + + print(infile + " converted into " + outfile) +``` + +## Convert XML with XSLT to PDF + +This example demonstrates how to convert an XML file into a PDF by first transforming it into HTML using an XSLT template and then loading the HTML into Aspose.PDF. + +1. Create an instance of 'HtmlLoadOptions' to configure HTML-to-PDF conversion. +1. Load the transformed HTML file into an Aspose.PDF Document object. +1. Save the document as a PDF at the specified output path. +1. Remove the temporary HTML file after successful conversion. + +```python + + from os import path + import aspose.pdf as ap + + path_infile = path.join(self.data_dir, infile) + path_outfile = path.join(self.data_dir, "python", outfile) + path_template = path.join(self.data_dir, template) + path_temp_file = path.join(self.data_dir, "temp.html") + + load_options = ap.HtmlLoadOptions() + self.transform_xml_to_html(path_infile, path_template, path_temp_file) + + document = ap.Document(path_temp_file, load_options) + document.save(path_outfile) + + if path.exists(path_temp_file): + os.remove(path_temp_file) + + print(infile + " converted into " + outfile) + + xml_doc = etree.parse(xml_file) + + xslt_doc = etree.parse(xslt_file) + transform = etree.XSLT(xslt_doc) + + result = transform(xml_doc) + + with open(html_file, 'w', encoding='utf-8') as f: + f.write(str(result)) +``` diff --git a/en/python-net/converting/convert-pdf-to-excel/_index.md b/en/python-net/converting/convert-pdf-to-excel/_index.md index 6432a3c42..7f7bcdd70 100644 --- a/en/python-net/converting/convert-pdf-to-excel/_index.md +++ b/en/python-net/converting/convert-pdf-to-excel/_index.md @@ -14,40 +14,6 @@ AlternativeHeadline: How to Convert PDF to Excel in Python Abstract: This article provides a comprehensive guide on converting PDF files to various Excel formats using Python, specifically with the Aspose.PDF for Python via .NET library. It details the conversion processes for XLS, XLSX, CSV, and ODS formats. The document explains the steps needed to convert PDF to XLS and XLSX, highlighting the creation of Document and ExcelSaveOptions instances, and the use of the Document.Save() method to specify output formats. The article also discusses features such as controlling the insertion of blank columns and minimizing worksheet numbers during conversion. Additionally, it provides examples of converting PDFs to single Excel worksheets and other formats like CSV and ODS, emphasizing the flexibility and functionality of Aspose.PDF. An online tool for PDF to XLSX conversion is also mentioned, allowing users to explore the conversion quality. The article concludes with a list of related topics and code snippets to further aid in understanding and implementing these conversions programmatically. --- -## Overview - -This article explains how to **convert PDF to Excel formats using Python**. It covers the following topics. - -_Format_: **XLS** - -- [Python PDF to XLS](#python-pdf-to-xls) -- [Python Convert PDF to XLS](#python-pdf-to-xls) -- [Python How to convert PDF file to XLS](#python-pdf-to-xls) - -_Format_: **XLSX** - -- [Python PDF to XLSX](#python-pdf-to-xlsx) -- [Python Convert PDF to XLSX](#python-pdf-to-xlsx) -- [Python How to convert PDF file to XLSX](#python-pdf-to-xlsx) - -_Format_: **Excel** - -- [Python PDF to Excel](#python-pdf-to-xlsx) -- [Python PDF to Excel XLS](#python-pdf-to-xls) -- [Python PDF to Excel XLSX](#python-pdf-to-xlsx) - -_Format_: **CSV** - -- [Python PDF to CSV](#python-pdf-to-csv) -- [Python Convert PDF to CSV](#python-pdf-to-csv) -- [Python How to convert PDF file to CSV](#python-pdf-to-csv) - -_Format_: **ODS** - -- [Python PDF to ODS](#python-pdf-to-ods) -- [Python Convert PDF to ODS](#python-pdf-to-ods) -- [Python How to convert PDF file to ODS](#python-pdf-to-ods) - ## PDF to EXCEL conversion via Python **Aspose.PDF for Python via .NET** support the feature of converting PDF files to Excel, and CSV formats. @@ -64,7 +30,7 @@ Aspose.PDF presents you online free application ["PDF to XLSX"](https://products The following code snippet shows the process for converting PDF file into XLS or XLSX format with Aspose.PDF for Python via .NET. -Steps: Convert a PDF file to an Excel (XML Spreadsheet 2003) format +Steps: Convert a PDF file to an Excel (XML Spreadsheet 2003) format 1. Load the PDF document. 1. Set up Excel save options using [ExcelSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/excelsaveoptions/). @@ -86,7 +52,7 @@ The following code snippet shows the process for converting PDF file into XLS or print(infile + " converted into " + outfile) ``` -Steps: Convert a PDF file to an XLSX format (Excel 2007+) +Steps: Convert a PDF file to an XLSX format (Excel 2007+) 1. Load the PDF document. 1. Set up Excel save options using [ExcelSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/excelsaveoptions/). @@ -133,7 +99,7 @@ When converting a PDF to XLS format, a blank column is added to the output file Aspose.PDF for Python via .NET shows how to convert a PDF to an Excel (.xlsx) file, with the 'minimize_the_number_of_worksheets' option enabled. -Steps: Convert PDF to XLS or XLSX Single Worksheet in Python +Steps: Convert PDF to XLS or XLSX Single Worksheet in Python 1. Load the PDF document. 1. Set up Excel save options using [ExcelSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/excelsaveoptions/). @@ -183,7 +149,7 @@ This Python example shows how to convert a PDF file into an Excel file in XLSM f The 'convert_pdf_to_excel_2007_csv' function performs the same operation as before, but this time the target format is CSV (Comma-Separated Values) instead of XLSM. -Steps: Convert PDF to CSV in Python +Steps: Convert PDF to CSV in Python 1. Create an instance of [Document](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/) object with the source PDF document. 1. Create an instance of [ExcelSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/excelsaveoptions/) with **ExcelSaveOptions.ExcelFormat.CSV** @@ -207,7 +173,7 @@ The 'convert_pdf_to_excel_2007_csv' function performs the same operation as befo ### Convert to ODS -Steps: Convert PDF to ODS in Python +Steps: Convert PDF to ODS in Python 1. Create an instance of [Document](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/) object with the source PDF document. 1. Create an instance of [ExcelSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/excelsaveoptions/) with **ExcelSaveOptions.ExcelFormat.ODS** @@ -230,57 +196,3 @@ Conversion to ODS format performs in the same way as all other formats. print(infile + " converted into " + outfile) ``` - -## See Also - -This article also covers these topics. The codes are same as above. - -_Format_: **Excel** -- [Python PDF to Excel Code](#python-pdf-to-xlsx) -- [Python PDF to Excel API](#python-pdf-to-xlsx) -- [Python PDF to Excel Programmatically](#python-pdf-to-xlsx) -- [Python PDF to Excel Library](#python-pdf-to-xlsx) -- [Python Save PDF as Excel](#python-pdf-to-xlsx) -- [Python Generate Excel from PDF](#python-pdf-to-xlsx) -- [Python Create Excel from PDF](#python-pdf-to-xlsx) -- [Python PDF to Excel Converter](#python-pdf-to-xlsx) - -_Format_: **XLS** -- [Python PDF to XLS Code](#python-pdf-to-xls) -- [Python PDF to XLS API](#python-pdf-to-xls) -- [Python PDF to XLS Programmatically](#python-pdf-to-xls) -- [Python PDF to XLS Library](#python-pdf-to-xls) -- [Python Save PDF as XLS](#python-pdf-to-xls) -- [Python Generate XLS from PDF](#python-pdf-to-xls) -- [Python Create XLS from PDF](#python-pdf-to-xls) -- [Python PDF to XLS Converter](#python-pdf-to-xls) - -_Format_: **XLSX** -- [Python PDF to XLSX Code](#python-pdf-to-xlsx) -- [Python PDF to XLSX API](#python-pdf-to-xlsx) -- [Python PDF to XLSX Programmatically](#python-pdf-to-xlsx) -- [Python PDF to XLSX Library](#python-pdf-to-xlsx) -- [Python Save PDF as XLSX](#python-pdf-to-xlsx) -- [Python Generate XLSX from PDF](#python-pdf-to-xlsx) -- [Python Create XLSX from PDF](#python-pdf-to-xlsx) -- [Python PDF to XLSX Converter](#python-pdf-to-xlsx) - -_Format_: **CSV** -- [Python PDF to CSV Code](#python-pdf-to-csv) -- [Python PDF to CSV API](#python-pdf-to-csv) -- [Python PDF to CSV Programmatically](#python-pdf-to-csv) -- [Python PDF to CSV Library](#python-pdf-to-csv) -- [Python Save PDF as CSV](#python-pdf-to-csv) -- [Python Generate CSV from PDF](#python-pdf-to-csv) -- [Python Create CSV from PDF](#python-pdf-to-csv) -- [Python PDF to CSV Converter](#python-pdf-to-csv) - -_Format_: **ODS** -- [Python PDF to ODS Code](#python-pdf-to-ods) -- [Python PDF to ODS API](#python-pdf-to-ods) -- [Python PDF to ODS Programmatically](#python-pdf-to-ods) -- [Python PDF to ODS Library](#python-pdf-to-ods) -- [Python Save PDF as ODS](#python-pdf-to-ods) -- [Python Generate ODS from PDF](#python-pdf-to-ods) -- [Python Create ODS from PDF](#python-pdf-to-ods) -- [Python PDF to ODS Converter](#python-pdf-to-ods) diff --git a/en/python-net/converting/convert-pdf-to-html/_index.md b/en/python-net/converting/convert-pdf-to-html/_index.md index 2cc2a4361..34216b89e 100644 --- a/en/python-net/converting/convert-pdf-to-html/_index.md +++ b/en/python-net/converting/convert-pdf-to-html/_index.md @@ -14,18 +14,9 @@ AlternativeHeadline: How to Convert PDF to HTML in Python Abstract: This article provides a comprehensive guide on converting PDF files to HTML using Python, specifically through the Aspose.PDF for Python via .NET library. It outlines the necessary steps to achieve this conversion programmatically, highlighting the creation of a `Document` object from the source PDF and utilizing the `HtmlSaveOptions` for saving the document in HTML format. The article includes a concise Python code snippet demonstrating the conversion process. Additionally, it introduces an online tool, Aspose.PDF's "PDF to HTML" application, for users to explore the functionality and quality of the conversion. The article is structured to cater to various related topics, ensuring a thorough understanding of using Python for PDF to HTML conversion. --- -## Overview - -This article explains how to **convert PDF to HTML using Python**. It covers these topics. - -_Format_: **HTML** -- [Python PDF to HTML](#python-pdf-to-html) -- [Python Convert PDF to HTML](#python-pdf-to-html) -- [Python How to convert PDF file to HTML](#python-pdf-to-html) - ## Convert PDF to HTML -**Aspose.PDF for Python via .NET** provides many features for converting various file formats into PDF documents and converting PDF files into various output formats. This article discusses how to convert a PDF file into HTML. You can use just a couple of lines of code Python for converting PDF To HTML. You may need to convert PDF to HTML if you want to create a website or add content to an online forum. One way to convert PDF to HTML is to programmatically use Python. +**Aspose.PDF for Python via .NET** provides many features for converting various file formats into PDF documents and converting PDF files into various output formats. This article discusses how to convert a PDF file into HTML. You can use just a couple of lines of code Python for converting PDF To HTML. You may need to convert PDF to HTML if you want to create a website or add content to an online forum. One way to convert PDF to HTML is to programmatically use Python. {{% alert color="success" %}} **Try to convert PDF to HTML online** @@ -35,10 +26,10 @@ Aspose.PDF for Python presents you online free application ["PDF to HTML"](https [![Aspose.PDF Convertion PDF to HTML with Free App](pdf_to_html.png)](https://products.aspose.app/pdf/conversion/pdf-to-html) {{% /alert %}} -Steps: Convert PDF to HTML in Python +Steps: Convert PDF to HTML in Python 1. Create an instance of [Document](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/) object with the source PDF document. -2. Save it to [HtmlSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/htmlsaveoptions/) by calling [save()](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/#methods) method. +1. Save it to [HtmlSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/htmlsaveoptions/) by calling [save()](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/#methods) method. ```python @@ -80,7 +71,7 @@ This function converts a PDF file into HTML format using Aspose.PDF for Python v This function converts a PDF file into multi-page HTML, where each PDF page is exported as a separate HTML file. This makes the output easier to navigate and reduces loading time for large PDFs. -1. Load the source PDF using 'apdf.Document'. +1. Load the source PDF using 'ap.Document'. 1. Create 'HtmlSaveOptions' and 'set split_into_pages'. 1. Save the document as HTML with pages split into separate files. 1. Print a confirmation message. @@ -104,7 +95,7 @@ This function converts a PDF file into multi-page HTML, where each PDF page is e This function converts a PDF into HTML format while storing all images as SVG files in a specified folder, instead of embedding them directly in the HTML. -1. Load the source PDF using 'apdf.Document'. +1. Load the source PDF using 'ap.Document'. 1. Create 'HtmlSaveOptions' and 'set special_folder_for_svg_images' to the target folder. 1. Save the document as HTML with external SVG images. 1. Print a confirmation message. @@ -128,7 +119,7 @@ This function converts a PDF into HTML format while storing all images as SVG fi This snippet converts a PDF into HTML format, storing all images as SVG files in a specified folder and compressing them to reduce file size. -1. Load the PDF document using 'apdf.Document'. +1. Load the PDF document using 'ap.Document'. 1. Create 'HtmlSaveOptions' and: - Set 'special_folder_for_svg_images' to store SVG images externally. - Enable 'compress_svg_graphics_if_any' to compress SVG images. @@ -155,7 +146,7 @@ This snippet converts a PDF into HTML format, storing all images as SVG files in This snippet converts a PDF into HTML format, embedding raster images as PNG page backgrounds. This approach preserves image quality and page layout within the HTML. -1. Load the PDF document using 'apdf.Document'. +1. Load the PDF document using 'ap.Document'. 1. Create 'HtmlSaveOptions' and 'set raster_images_saving_mode' to 'AS_EMBEDDED_PARTS_OF_PNG_PAGE_BACKGROUND'. 1. Save the document as HTML with embedded raster images. 1. Print a confirmation message. @@ -179,7 +170,7 @@ This snippet converts a PDF into HTML format, embedding raster images as PNG pag This function converts a PDF into HTML format, generating only the 'body' content without extra 'html' or 'head' tags, and splits the output into separate pages. -1. Load the PDF document using 'apdf.Document'. +1. Load the PDF document using 'ap.Document'. 1. Create 'HtmlSaveOptions' and configure: - 'html_markup_generation_mode = WRITE_ONLY_BODY_CONTENT' to generate only the 'body' content. - 'split_into_pages' to create separate HTML files for each PDF page. @@ -206,7 +197,7 @@ This function converts a PDF into HTML format, generating only the 'body' conten This function converts a PDF into HTML format, rendering all text as transparent, including shadowed texts, which preserves visual fidelity while allowing flexible styling in the output HTML. -1. Load the PDF document using 'apdf.Document'. +1. Load the PDF document using 'ap.Document'. 1. Create 'HtmlSaveOptions' and configure: - 'save_transparent_texts' to render normal text as transparent. - 'save_shadowed_texts_as_transparent_texts' to render shadowed text as transparent. @@ -233,7 +224,7 @@ This function converts a PDF into HTML format, rendering all text as transparent This function converts a PDF into HTML format, preserving document layers by converting marked content into separate layers in the output HTML. This allows layered elements (like annotations, backgrounds, and overlays) to be rendered accurately. -1. Load the PDF document using 'apdf.Document'. +1. Load the PDF document using 'ap.Document'. 1. Create 'HtmlSaveOptions' and enable 'convert_marked_content_to_layers' to preserve layers. 1. Save the document as HTML with layered content. 1. Print a confirmation message. @@ -253,16 +244,3 @@ This function converts a PDF into HTML format, preserving document layers by con print(infile + " converted into " + outfile) ``` -## See Also - -This article also covers these topics. The codes are same as above. - -_Format_: **HTML** -- [Python PDF to HTML Code](#python-pdf-to-html) -- [Python PDF to HTML API](#python-pdf-to-html) -- [Python PDF to HTML Programmatically](#python-pdf-to-html) -- [Python PDF to HTML Library](#python-pdf-to-html) -- [Python Save PDF as HTML](#python-pdf-to-html) -- [Python Generate HTML from PDF](#python-pdf-to-html) -- [Python Create HTML from PDF](#python-pdf-to-html) -- [Python PDF to HTML Converter](#python-pdf-to-html) diff --git a/en/python-net/converting/convert-pdf-to-images-format/_index.md b/en/python-net/converting/convert-pdf-to-images-format/_index.md index 397a57090..52ade6caf 100644 --- a/en/python-net/converting/convert-pdf-to-images-format/_index.md +++ b/en/python-net/converting/convert-pdf-to-images-format/_index.md @@ -14,45 +14,6 @@ AlternativeHeadline: How to Convert PDF to Image Formats in Python Abstract: This article provides a comprehensive guide on converting PDF files into various image formats using Python, specifically leveraging the Aspose.PDF for Python library. The document outlines methods for converting PDFs to image formats including TIFF, BMP, EMF, JPG, PNG, GIF, and SVG. Two primary conversion approaches are discussed - using the Device approach and SaveOption. The Device approach involves utilizing classes like `DocumentDevice` and `ImageDevice` for whole document or page-specific conversions. Detailed steps and Python code examples are provided for converting PDF pages to different formats such as TIFF using `TiffDevice`, and BMP, EMF, JPEG, PNG, and GIF using respective device classes (`BmpDevice`, `EmfDevice`, `JpegDevice`, `PngDevice`, `GifDevice`). For SVG conversion, the `SvgSaveOptions` class is introduced. The article also highlights online tools for trying these conversions. --- -## Overview - -This article explains how to convert PDF to different image formats using Python. It covers the following topics. - -_Image Format_: **TIFF** -- [Python PDF to TIFF](#python-pdf-to-tiff) -- [Python Convert PDF to TIFF](#python-pdf-to-tiff) -- [Python Convert Single or Particular Pages of PDF to TIFF](#python-pdf-to-tiff-pages) - -_Image Format_: **BMP** -- [Python PDF to BMP](#python-pdf-to-bmp) -- [Python Convert PDF to BMP](#python-pdf-to-bmp) -- [Python PDF to BMP Converter](#python-pdf-to-bmp) - -_Image Format_: **EMF** -- [Python PDF to EMF](#python-pdf-to-emf) -- [Python Convert PDF to EMF](#python-pdf-to-emf) -- [Python PDF to EMF Converter](#python-pdf-to-emf) - -_Image Format_: **JPG** -- [Python PDF to JPG](#python-pdf-to-jpg) -- [Python Convert PDF to JPG](#python-pdf-to-jpg) -- [Python PDF to JPG Converter](#python-pdf-to-jpg) - -_Image Format_: **PNG** -- [Python PDF to PNG](#python-pdf-to-png) -- [Python Convert PDF to PNG](#python-pdf-to-png) -- [Python PDF to PNG Converter](#python-pdf-to-png) - -_Image Format_: **GIF** -- [Python PDF to GIF](#python-pdf-to-gif) -- [Python Convert PDF to GIF](#python-pdf-to-gif) -- [Python PDF to GIF Converter](#python-pdf-to-gif) - -_Image Format_: **SVG** -- [Python PDF to SVG](#python-pdf-to-svg) -- [Python Convert PDF to SVG](#python-pdf-to-svg) -- [Python PDF to SVG Converter](#python-pdf-to-svg) - ## Python Convert PDF to Image **Aspose.PDF for Python** uses several approaches to convert PDF to image. Generally speaking, we use two approaches: conversion using the Device approach and conversion using SaveOption. This section will show you how to convert PDF documents to image formats such as BMP, JPEG, GIF, PNG, EMF, TIFF, and SVG formats using one of those approaches. @@ -77,12 +38,12 @@ Aspose.PDF for Python via .NET presents you online free application ["PDF to TIF Aspose.PDF for Python explain how to convert all pages in a PDF file to a single TIFF image: -Steps: Convert PDF to TIFF in Python +Steps: Convert PDF to TIFF in Python 1. Create an object of the [Document](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/) class. -2. Create [TiffSettings](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/tiffsettings/) and [TiffDevice](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/tiffdevice/) objects -3. Call the [process](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/tiffdevice/#methods) method to convert the PDF document to TIFF. -4. To set the output file's properties, use the [TiffSettings](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/tiffsettings/) class. +1. Create [TiffSettings](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/tiffsettings/) and [TiffDevice](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/tiffdevice/) objects +1. Call the [process](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/tiffdevice/#methods) method to convert the PDF document to TIFF. +1. To set the output file's properties, use the [TiffSettings](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/tiffsettings/) class. The following code snippet shows how to convert all the PDF pages to a single TIFF image. @@ -123,12 +84,6 @@ Let's take a look at how to convert a PDF page to an image. [BmpDevice](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/bmpdevice/) class provides a method named [process](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/bmpdevice/#methods) which allows you to convert a particular page of the PDF file to BMP image format. The other classes have the same method. So, if we need to convert a PDF page to an image, we just instantiate the required class. - - - - - - The following steps and code snippet in Python shows this possibility: - [Convert PDF to BMP in Python](#python-pdf-to-image) @@ -137,16 +92,16 @@ The following steps and code snippet in Python shows this possibility: - [Convert PDF to PNG in Python](#python-pdf-to-image) - [Convert PDF to GIF in Python](#python-pdf-to-image) -Steps: PDF to Image (BMP, EMF, JPG, PNG, GIF) in Python +Steps: PDF to Image (BMP, EMF, JPG, PNG, GIF) in Python 1. Load the PDF file using [Document](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/) class. -2. Create an instance of subclass of [ImageDevice](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/imagedevice/) i.e. +1. Create an instance of subclass of [ImageDevice](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/imagedevice/) i.e. * [BmpDevice](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/bmpdevice/) (to convert PDF to BMP) * [EmfDevice](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/emfdevice/) (to convert PDF to Emf) * [JpegDevice](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/jpegdevice/) (to convert PDF to JPG) * [PngDevice](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/pngdevice/) (to convert PDF to PNG) * [GifDevice](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/gifdevice/) (to convert PDF to GIF) -3. Call the [device.process()](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/imagedevice/#methods) method to perform PDF to Image conversion. +1. Call the [device.process()](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/imagedevice/#methods) method to perform PDF to Image conversion. ### Convert PDF to BMP @@ -219,7 +174,7 @@ The following steps and code snippet in Python shows this possibility: page_count = page_count + 1 print(infile + " converted into " + outfile) -``` +``` ### Convert PDF to PNG @@ -299,7 +254,7 @@ The following steps and code snippet in Python shows this possibility: page_count = page_count + 1 print(infile + " converted into " + outfile) -``` +``` {{% alert color="success" %}} **Try to convert PDF to PNG online** @@ -313,7 +268,7 @@ Aspose.PDF for Python presents you online free application ["PDF to PNG"](https: ## Convert PDF using SaveOptions class -This part of article shows you how to convert PDF to SVG using Python and SaveOptions class. +This part of article shows you how to convert PDF to SVG using Python and SaveOptions class. {{% alert color="success" %}} **Try to convert PDF to SVG online** @@ -331,11 +286,11 @@ Aspose.PDF for Python supports the feature to convert SVG image to PDF format an The following code snippet shows the steps for converting a PDF file to SVG format with Python. -Steps: Convert PDF to SVG in Python +Steps: Convert PDF to SVG in Python 1. Create an object of the [Document](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/) class. -2. Create [SvgSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/svgsaveoptions/) object with needed settings. -3. Call the [document.save()](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/#methods) method and pass it [SvgSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/svgsaveoptions/) object convert the PDF document to SVG. +1. Create [SvgSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/svgsaveoptions/) object with needed settings. +1. Call the [document.save()](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/#methods) method and pass it [SvgSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/svgsaveoptions/) object convert the PDF document to SVG. ### Convert PDF to SVG diff --git a/en/python-net/converting/convert-pdf-to-other-files/_index.md b/en/python-net/converting/convert-pdf-to-other-files/_index.md index 3c4e6533d..2d251531c 100644 --- a/en/python-net/converting/convert-pdf-to-other-files/_index.md +++ b/en/python-net/converting/convert-pdf-to-other-files/_index.md @@ -24,7 +24,7 @@ Aspose.PDF for Python presents you online free application ["PDF to EPUB"](https [![Aspose.PDF Convertion PDF to EPUB with Free App](pdf_to_epub.png)](https://products.aspose.app/pdf/conversion/pdf-to-epub) {{% /alert %}} -**EPUB** is a free and open e-book standard from the International Digital Publishing Forum (IDPF). Files have the extension .epub. +**EPUB** is a free and open e-book standard from the International Digital Publishing Forum (IDPF). Files have the extension .epub. EPUB is designed for reflowable content, meaning that an EPUB reader can optimize text for a particular display device. EPUB also supports fixed-layout content. The format is intended as a single format that publishers and conversion houses can use in-house, as well as for distribution and sale. It supersedes the Open eBook standard. Aspose.PDF for Python also supports the feature to convert PDF documents to EPUB format. Aspose.PDF for Python has a class named 'EpubSaveOptions' which can be used as the second argument to [document.save()](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/#methods) method, to generate an EPUB file. @@ -109,7 +109,7 @@ Aspose.PDF for Python presents you online free application ["PDF to Text"](https ## Convert PDF to XPS -**Aspose.PDF for Python** gives a possibility to convert PDF files to XPS format. Let try to use the presented code snippet for converting PDF files to XPS format with Python. +**Aspose.PDF for Python** gives a possibility to convert PDF files to XPS format. Let try to use the presented code snippet for converting PDF files to XPS format with Python. {{% alert color="success" %}} **Try to convert PDF to XPS online** @@ -191,21 +191,4 @@ This method converts a PDF document into the MOBI (MobiXML) format, which is com document.save(path_outfile, ap.SaveFormat.MOBI_XML) print(infile + " converted into " + outfile) -``` - -## Convert PDF to XML - -{{% alert color="success" %}} -**Try to convert PDF to XML online** - -Aspose.PDF for Python presents you online free application ["PDF to XML"](https://products.aspose.app/pdf/conversion/pdf-to-xml), where you may try to investigate the functionality and quality it works. - -[![Aspose.PDF Convertion PDF to XML with Free App](pdf_to_xml.png)](https://products.aspose.app/pdf/conversion/pdf-to-xml) -{{% /alert %}} - -XML is a markup language and file format for storing, transmitting, and reconstructing arbitrary data. - -Aspose.PDF for Python also supports the feature to convert PDF documents to XML format. Aspose.PDF for Python has a class named 'XmlSaveOptions' which can be used as the second argument to [save()](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/#methods) method, to generate an XML file. -Please try using the following code snippet to accomplish this requirement with Python. - - +``` \ No newline at end of file diff --git a/en/python-net/converting/convert-pdf-to-other-files/pdf_to_xml.png b/en/python-net/converting/convert-pdf-to-other-files/pdf_to_xml.png deleted file mode 100644 index 5f2bc80689753bcb2812b5432e4a10c80145de3a..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 28069 zcmdSAcTiJZ7dIM36hxXz5d=g5kt$t4LArD)QWBbUID}5@PLzz{xRn> zA;L>kQTp|3RSk7L14Bzo3#gM53gw$3>hkL9>g4ij z_v&ir^lI(mYVqJ=V*hgT>T2ZTd~oNYb-GnY%!motB@{R&wwc3up3Sq^uc@^o3Ua9>h#S$gg~D(E!$3>bL^j6MU<&m1t% z9LAnO$Dcv5&!D(x(1~Y`lg}KdIRAN1b2<^;Gn`JdoX&Hc&hwnk3!E;CoGwd`U6xoK zr=HmlJ+mKtX4n7Bu8+&Mm&>+?%jPeaO&6DS2bXmlmsJaw<)5d{y~4hKgnX)m97;Kz z%bq#?BDX_Q`}`4#>=ymrC6YP9P%=%OGfvYvNz=K=*>%QQcESli;!Hl|iMSAoI_C`C z<#ajVbl%}~!V|%+)Ux;G8&4g}7OV<}Ld*NVhA;a@EIax)82J~!H_L|D`o6LZ5Ymhl z5{?oQb`+8~VGvUg26MiZWCO$nze)1DSnGMhU!hYk)(Wq7e_vhpU0zL|Z_S)<4xdeQug&#OPfe~aEv|2E9iALrU2RP+o%f%N z4mGTsdZ*dC{~HkX|VN9?a}tND(&4~|%^FLlPI1sd{+;*R5NX*iCyNsie~ zj`?BLzIwj#9O{Zp`j&5I%e&4jt00C$(7h`V=n8a0JDZw1n~XXDbW>6N`t<;6a>Ds% z>g4Y?op(SWduwHR8C@^z22NemT>oLmPE*R+K=((A4=)>n)06T<-?zjX54#WR>)w+U zX(U#bctH0-!X;1Ry1?NzOTiTDQ;vbrfm}BGp{pw+idzGEe@A=p{^+JS7llTerHhhaCj+5~K({%u*%@++W6lO9pMt z7_WiuNR#;0hwT|mXxTH4@|M+9X@WotxxSN*V(a%_hCfGYcmOF^rFeMn&1=7}(h}F% z{G_#bX7VRHN=M?+5`9aDd~stc&?1V-r|q^X-_(}%?*&^>4jhLWn)K9FOsFVoh(=pe zf&A18Oul~q-noUiD=+3_>fnSKyDjdnFI(2tL?XyFq8{4T!<`)$sV*;)wzambZQ16g z;*T(J36WP(mjSLxaXrmo#T!z8gt`l&O^m9i5CZ0A zXG8-d?zP4mO+`VmqsM3Al57q$1o82RSBS7(7vBO;Dl-8%onbsYRJc1{T49$ieo(9!(Z9N_kHhs2u;DySKxg$NR!^vC$N9vRa z>+Uil5o5lTP!*Bf_p5QPR;B3ra~ zHg3ZGugkEul^qB+1U-uV=v;>?fj|Pc9%j(rTF}Lt0mI4p`|{!#wnob_G}ahi6V@Ma>>2hp%{bf)TiaC$krGq|%J z_f55LsV55l#P_;SHE&YVU!lNhY_;~QtSIeeDZ|SeZDlt>a2+QOdq68a*V+m9^LfS7 zz@)FedD5=b^~B$oDr0~HI)U{tMD`K`E_Am9)w(02Q+%!CTkP|za<7dpv+bS3JWv`7xhfsRkliCE=(`Wi2?_v;tW~-Q_4s}@~L4^ ziI!d|UHg$OR5yyH^vbuoscmS8))s*lOjoj)GaD`;kFyt>ol?U%{S7wP(F z3RKp&+j(FB_hVSuUsSS@cB4z83WYoo(8U}tAGPNQzDICDPmJuXxLS5dA>5&Fq~W*p zr5jMbBx1EQMzY@L7Md96DKvI4E}}DWBx`qNsO6x#K?SJ}uw&9SJX9 zU3Y#r&8te5U!h5{Ik1ZqC#OfgN7o*H=*5IfEb>*LX{jy;V)3CihGma*ETOdct?=T} z@wAHGDJJ2BiVY6H^RMvfN7$3w#lHqOqgP8O+qLOXqQIy()fZ$^{x146-u8a@`M#`* z#pDB>+R|5iaeJ1uHdtga)V4d3)*b>_8S?TLlXC!K`u$jPfJB9Inujq*XS4kC_Rg7% z=xITue45hY9!-BmG2A%&vchp;$dIgQgbKE9HpYpu8qDE7cHl7Nkqr}ec;^!kz_)i| zG-YIL9H|(Vl*nILq%}G8*Eam-B2VPuBR$?kWXiU{ui% zvKi^WeAq4}H9YXQ5X2y%&~Vt!U|S=Ww_I!J?zG|Gn3(G^c#0lw7l(;@@S?U3Zq>VO z-EidQZt5JJN?Lz(%+4P@(W6jBA%p z;T>zej+JNg(!%X0d$=ClE$))e8gR=`;7Bg}Q*|nq%TpMQzEvTHsyQfzANsvYGqxYy zt76$rFFEqjUC;1o=h)YUHgZ{eOP%EnWxsPO7%Et_-kkj1R@}FqTo-^zE;gyFd&7&i zXMcpwK52<`|2uap^}=xdgbN{Er+Dsk@tzmxaEOM^4!V{Ze6|`(?`}o_!}z6 z@oQ$~hkd6$<+OF~6D3WLfhx0R_nOk7R;eofr5_8CiugB$kD(oF#bY7(=V!XjrR+c2 z)sDK+6)1x>iYG3I9Nv<0-4tJTEhLhP8!9@nX9cgKaz?gDQD)6nw5YwBfQo(PZJCNg z*}|wbfnzpcK>}7)02QOGm3!r^j=ov;x7L;sKPGXX8)f~`v1nB4$Lh>Wpho|Nv)G0@ zCHEi3VY51#JSvEBdkU%r6b?z>$*8~ zSP|)elyp%y1x=*)IgHuMV@>KXk-qU%D9dK67}R063j64i7|(Pg_|gd@1xZr@Jgb;8oO7~DE4jP>33a` zjo`Qq<)UHVp;y?F)bV(cC7mXs4*9c&fF{A};SyFL7?&$1|Kf#LJ?4>}=aYH^TJ4yK zn1oIdlx<;Hvet#AoHT_q=7HS$#O6zqST^-xHd1+$$YT9DqVd}^0Ya3K)Yez_4AW-9{+F$-m+9}sSnrdDy;qS|5UrmC)CN0(S{YjD}>O%Pu)kkkc;7f6t;ppy^I9Xgj)? z7;W~qzy%V zo{GC;;?0B284cd)A&OGcZk0Ylcb)$2sB^l+ICiC zgtk;V#poX^$p;pa<-aVN*fz(Y#w9NeN7Pavtu? z7H8_+y}{aB6nl8L=7aI9i>1XExLS?ILa}Qv5w_3~CzDz$&G)T^>G;t)pOZ=NuqQ6| z=nn6Oc@EV28`H7NcO`ao4ogK!bJx~=9xc^M*kL-xmZEH&+IT0E{vQ0XVuBpw5-uzQ zHNK3`33Mk~xSQJZZr@Lz}A;G8PeaO z2ve84E?Iq{X8Ie&gzcKGgG2G$2auet!&*^M{iZTgT!%sZA-5+A>h{KI0WV)H+I+22r=#%!9gE-{lg0^D*mFJzQ0Yq3c^@NjI;3^MsL=bGUnqn z+78BXA64PR>8<|ow8l`Xh&~caCnyWf9+t%vZFlNR67y%Ylyk>0TKy0>vcm^#v9=y1 zl|e;&BGA`0=Ni$6M}W*bIkoiva5TRqex4g&6!+fUOG^|H6*~CGs^Ghv2>&-l!287@ zRx;}HtRa31yQxsL9RR;Lw>OCQKi^sIIvz3YUKw^hU-u3}I|jZTHy{UhTMU7jH7a2Y zeab+~`+esUbby!tOL^dEXEjuB*OUEx#QYTE1l?La;D#@1+3`}^C;`!Zs!n(|ly4T%zFo}kZa3(Zip16!JP<& zLGlvkTXX6jrrorK)9Mx0dD_5HdbdzS;0XIc@NvZy7643DCUzU*`L~<=Nd~Z^3o~c? zZ-;B`@qgsrI@NUnw<>8zGm+a$ko7k+_R;&hA?q^=T7XAtkyCAYqc=0V>hxxbL;B@o zn5A~_AQn-%^~A1-r>f7Ew@?n4)m%jfxXg!~-UBmcY}#l%?2gDSmq;5mF7V9RcI}*e zH~`iSMK@;*!5i>-P7aP!*lH);IuE-&wuP+klL>q2sgB&Q98>?J#R7%zHFugfA}i8U z1CcI(rt8zF#jaEXO^-KScV^G!+i#a=PuuZw$pf>|C_Vl;`vwfUu+h@7Wx8E&nYRAI zepv}T%tP+J-YkK|rYutUM-81^hbx5q6D$C~KRxf?@RoZT+*RtXwOTiVwXsm%PrHcc zNmcCLyR)0KYu#}hiqCn%BJQFzT1iWjh1~8yiUM0hrvOwiY_z^zsr!f1RQCpn3}hXJ z{zk3Xl0Nhw=Nth2VQ?Uvi7&t2JhPzZ8(T>vMUMr2CiTVC7=H5x zZ;hh9rtn)b@0K>#QSG--xUR^%eB>`$c!;#LBlHp_kPAK&7tD%j=uKp9b-u2LACo+`85|z7Axt0^7$_c$FHB6#vcMXN4;u z33c#n5{sHxs{4gBr?&$ZO=fHF=H4o=C5l~z`M)Zv3KhJ{{SWdYm=WJN`sTYWJk2w^ zvN#Z8bIM^XJiVq5a~Q=2!7Q1WrcLbi67%^t4E~*G4vA&NC~7!y(6%iv=#vs~?a@mx z8EAG83?hj}Uw`dYNegPuMuMoA|2L^jn(VAs5Lu|5;Vu#`Oeg3QHWV-V!W*>)H*qC; zE8K;488d7rW)_yC=F_hP&RW=D8?|;%z3sPH#&Kw0$i`m4snl;Fa2dIEbN-~L!uElrf3DdR z_B+{@3M}v!F?ba~NbXbRUxJ;+Ok~r2a9x6RLnZ%{b)!0OtMxSUf>NZQ$mH2K-Zd|W zW#cEK+hf=luZ-a9)+tZw9Pl!*FVBTJvXI9Q2C#799txh0)!iuaf@^m2%Qaj73E*=LPz!h<8R|Yu4!6cW`bfTB*pRs`b}=_+YzSEjr$iu$*ZV?L+o2 zKsvrukL3Ve7D0&K#}?cona1f4`%i(Onf=1M{*3uUOd(B14BBM+xPO=iPtuch^jb&e3UlIJ)c{Rkn@BVXprm<>=cR5{gd5Aj3hg6{j5mR<~uKc3Ku^6RKMthPoK zK6S;@8>>zKx<$h0_Z^@Vi?_OXVIW(b4KV6Y&fHqYIzv8?v@{sfOa1_qAc#?u9%IQu zeiTVH_T66IMllSZ@=AZAtF%B^(6yN@sJVAn=e`GASmETwBSQj%M;EmhBArSdY97&c zySfi{W@Y(Slk${5hVqH+OE|xi16O|(u!PXui4T$Fl32CdaTff+UZT$73imx+Tdl=R zA1it=P0?cx`DnTA-oON;2k&37-@5L}V{ZtT*+?yKr zdn07t19yjd3wDR3@r(AFdzdI5_sYtZO!7!8d~E3YtlYVd6s&f7wK&Vc_G$%t$MqfL z!C%P%%$A3lq}=*bv~poi;s<}m7fhSV*KSoWGmB1iel9hn7E12haTcb`h4}`zRV3bN z-Tlg79vasU)RAV%%Xv)T*6*;3=?`Fr*j zCG+|tmW(&S`e3Q-yRdjUJrH|YP?L$j6gsRb+g!25g!PNrn;O*x(UG_$yw=YJ#5ty) z>&XspzU|?c8ELf$XkA%z>8ROEU5#@jVirgZ;_2u^G@rJ7gUbK$M=h3fVv>HE!=-23R za`(n*Y(GLY{4%@Rkooh5IWdcq$SC5`hgT@`r!fHI-S`KatkXaFc(0cw`(<4$FCx!X zFms)jTAS?Y{Sn`UukI@tuTwNN85>HHz7fp?ggWgp;X)3SYZ}Y2sn{L078?y}UI%LQ zH&S6%)n~lzNRTInat`Znm`|QvHzoDn@|4I0KgQ0yLM2-aXnho3p+Dg(8l$s|@r>0p zSOY5q@^sWa2_I!Z9nuQ1EJOTEp4Vet2q`b0Kpt`h-aUyIt1&j!=Ua#fH~AgnXA@zR z@v7+D&l9q>??p13M7laDjWh!*VA3JKUC$nq@|i z0}UeRFQfg0=QkSeU@aTo&i6@ zc*5_rb89wQ9EsKqARkzkR`~TKR?`aW)##buQcq*|z@&3S4}I%Ui%yH_y-@v$Xxs7! zA4o`8+1%j8u84$QTE$&G*A;E!+#P%d(CwV$evV+5-0^ioQHYzoh!}Tvm}lweu%4lWTae z%G2%+Rvu620NRjxT=>JE{`qF2T3GR0b#siOWJ0DiMW!S|!asVMsHqjX?6Vjwe%j9* zI-nr6Q{u~pIM&!?9Qa-Zqp&WKfEJkfx@@eez zz;v-Y)F1^unGV=vYPyr^{LU6;jeOs=M736n&K`rG?Y1)I?fk4pSG!$^$db4qh!u0o z>+SvUrK~5gH{9!ij~om{!rckTz}D(RCrE<1&D3191%;q1rreeeC-w$}8)_`{}z9 z32PWI3xiBm+wU>??N5xYhmfKdG|X7mV@% zhxPt9!M0Bfm*flLV?|7kM-f33GF3v zI!yAbk2LpcXw!L?C2d6)o*RDma)en;Ta!nzBsW?%4hO;pX|EkL) zi7Hgt@sIxJ=bytSJK48tUS}sW>g}r_T4hTFeIarED<5sWKOEBqYEXmyOc6uyNiPk#&klV z^rb$;7V5B-hUQB!? z;ran|S9zJ<_~HU3T;6LQhCXu9x~BE1P63l15oKY-{SM>dal-huWHfcW64yCj#@EY) z-tKG&W(@|eO?5viNi~3lZ%f>&_t{F@Ovt?)%R1Smd+Gqg0QNO7dG;cyd-# z5@S^o)>S#oUD-FM1ISNQu~P25te+ZpRZ$9_m?YX&Q|yHuWqnRT@^~- zkXFkVs=3&zTHSB@vk`5Qdm?sMN-e*s2jzQ(+rMx~L;WoH6E8V7|qI$I?(x4-p%RZ4qt6Lk`ZbzIRRl7?=7`gsy=x<_%-(r+Kun6nFO zipc%i-R9SHitJmviVEnPT#~^&(w`jgk}ZR?-(H4h$qUfX_+O#la;k(HUOfM?U>iCH znRD!D{bk(Pm^FKGVu{V~x;Sxm57>Fp=(RE2yK=r;pm}~X#xKgz#B_3M&zF)e+I9W% z{IiqWxqHCC!TZGi$rvRHio-ASa&~i33>qE6es1hfr=sfT!zh=O)Y=!)w_h(Hj_E5y z+9HVoEj)sf@`V7O%t7{%X0)gDOFW+a%inI&X>6vvsa4{eRGKe86fXm5>tH9s98qaB z>QDUyEbpACAn(&DZ~m%o&Z_T>HYg}N+Ersl91QyA2;h9;UvN*q2cH0e%Tb=UA`pi+ z>OLTGh7$&*w4)mG^EayGhislIbp?JTDk$1yaz5){yhnsYahtw)AMGp2*0uYH)$Olb z{-f?Eh?Va>sB)F+G?nwHsEqI;8+;JUUT_7hG{aUi)q#Y$BS(q-mKrm?#tC@S>y-f< z`7}pg&Y&*sKW{+>P5b$M}cvtmI!{UUX;XY7`{@aJVnAk}IsEZYNdkwk6*RY4Lrp7pyYNY$Sc?}sKZb=7v=b$$; zIvL&l9wlC%NK(SxdcPkVE9no?jMX@K5gG?1nfFg?K$rM5H-5Ct(R(ncEh* zXh^0@E&I#m!+Ty?5}FT464uJgxy6r!PhAr9<4*&p5sCeVbqr3wG_&T0fM%* z@Kb{bD%oCnH~TU0?RTr|4`fN8MuYX7@xf$-@i#yb@;+a=UfzMtCn}%rv{A?RE(G(o z>yky2SLVFL$xb{Q@+1vJ?~+x6%?PlI_HjtdkTm%H!>6C9us>y^daos(|1*Fb1n+WL zz(`F3jN~mRR<+#cvpNJ7Te#b4A;YZ@oKi}EBGt?X_p^~*F+8uf4G76c85kvg}fBTCc z=$u{|h~h>f3G2w;zXQE40QR&3x&WR8M}y>>KGxMVG#0EzVr7FsB*YKQ(njgBQFM@Nz?nJe3iDL0<}I|w5;iiQk} zeud^DXNf;(!h0*jDsRHY*GbO=2+fi?GoH2g`%ofd0-G-%3DlLTPiyA z(th&tH{MPiOGntd@RWo;gn~MJUwn_$BAIr+R>CsEKzi-N_r|B^koS4zw%H^>WCsTH=+{s+IP z2Qc-gU(7sO5Nodv;C1^V08&*sngEqRHKKiViimFLW9yDaMXrH5!3P&=7~UWIu1AIS$4?F_wTcoAU`TmO^|*n1c9^*H-(#L-hX0+q@+Ho9+@HqJ!(md zz|Y)g9mAHaTEWu~efU8^ceV}|^v&O|=Rnpo9*uF|`AP+Pq%&$_PohS>3n{wysYMR) z9P~O5*&>(r`0Ef>8t!F?Q^f~^K!QyB=8XIfSCxKnbxmOM#vlJ1vp-ouL28jiM|L;F z6t*=TjDc5-7Fzapyr5$UE)wqK%QFdXX#dG*;rLpGI>A%WYvqR@3AFs3$slQC6ywjnU}@(BU6^4j$%3(gQ!?>gs^z!C!+X zD*viiaH7Z?Uc!>a2}va2vYDWVz@SbjoLOODQh<8^%io2rwZo5#T?1L3!j?EtUFd)= zC|n)CGZW080uBVOSf~wErOx?w-FDw=E+igjCi@+$TfXu+_LzD9?z6n<=PSIljXf(u&EjvM7JvUyAfF~K;SP+q(?O9gPQsi~eJ-#ul?fZ3D$6Wu| z+LA#ba;I945<>{WxmF+ulth&|lD6Kg7d?hex(P0#d*J@bOh*;CR_{2A#*VE~$&?5k zLjctcKEdi~i!Vwf%XHU4o$fB1pGkJMgsRu?h~{6SYgH|}sIL<;6%;W-^~acrtV*u~ zDIr^Jdz(-Od$OS<5~aQN(9;el2YV!PIT*c($CHB8s^6|9PlmuvY_`wuYt$k$qz^Rw zM%NptWh+j81q*=Gz7WQHr>HVbJo6hjnql$iwP&A}7PBuwpy#Zz1$7Q<%Ad5rjHV33 z|3GiohCmo8srT0m0su`yp`GK<=?kdC>Oh$)y@(E^e)K63==Ud*+)Xa#F|_>!P}?zu zUW1o;8R1qkj%)X;P+<7&e<>mO&hi@>c7E$QMdd;mSx$VMYY$)A^MZckuzRoB6`85?t%IRn5PoxqG=@ZAwfH8^a-`qqHu*h;ZB^bumpf_>YCr-eD z4buVI;E3ZR69xu3``15`M2SEE|68=Scg8Ww4uGekMb{24(e(c~LkuZOu{iLt_=L`@ zYIkHlxCII#rB<0#_i=Ohf@ZdScv_T#Cg$DI$^TIeDDem!7X3j*Xqd?%cs&|hdrCnqwH+6#5#Y9ulMhz@mt&UIq& z**Dw&lN6?x`w7!nep4eLI*K`fJea_8u^F!JPd&TBX%FvR8-POz^RC3i5lG3dL>pVZ zVJXQ)f=sFiWC!%u1Y1bp4LOAKx_Y!p6c*c$Lb52?;?d~u|2jZEjEzPiGeVv4hF4rI za1f{-?veO_aQ_kXwJWX?=H{jols`oML!iCKJtOF;rOwd}c!QPGo&IKR?=oYa-tXb~ zw%u5e=F@61*qIX;5)p7M6g_vc;+|NkV=Y756Yrn=-Yz0P4u?I~cec+)q6EgUjNpjn z8Tb!J^_01W3A*h3s@DXdg|`V+n$cQ83m_QuP%=+4@oG}@ zemH1eOdFsy1~fmwFXCBBs7bnjwMI}GV-|8b3?0Bw zgVfJN>k%FqxZ!nuxIM|vkT2Q1>#}a*%*iexjJKWtn*;`1Bv7Ttl|V3$5nQaPL91j| z>c;%lE+`#JPDv3wdYo21`{sIkUrOy&4stUt{LqWn^TBp@?+=<7r1os%Mc#g%zbPj? zjK?#vG67aLMmy6{25*3|ynAQ?U@#bB`lbg$!{6D3rk_1H)3ll5QN~?Lm(ljtgXS02 zCoRCfYf!vrSL+mgde6nzXYZEOe5ZI|^H@>0gxDW7bVWKmZX>i7v?`OHPJpHzuSu** z2g)63##Nzc-PwCS2O`*>~kQ zMX*7?{_DJ)pzBx4Hfp({WC}@(JDSlcHO|pz&he zo?CtVNk*Jwvy^vg9`D)$T>% zOKLpb9Bqx44cY0hk#Q(*kcO0qcP8FjH*z>nRo+o3vS`5@+KA@hHwTePWg@$Ak(o#{ z^~Z(XM@zG5?k$^^0WNaQ#Ec_WAhLiH=N17bIR3!r&QIky*~TTUh?c_shi9YTAPYO? z)v8iNw&j8zdEkf|fq^%)eSdC7ZQm$3|1596aKq@EN*5@(Sd9rx1_Z6u=On%^! zWEr#naF*%uFbAm*fDLR$W|BB#CSgw`I81r zw%${q;Af-N`yQlTTm|#=OuQyMew?IW)o%Xrny}#y2^~1`0OxVi1vL5o^B`hd-k0tL z_euGSMsJ&T-(@We=QRjMH-Dsyl_abuQLv4>F#cPTUn^;s0huDvnrqWC9l4@JwiXwD zyi%bc9}g}zNEchfA2vJXLNg)bfv(eKssZqT!-lv!cSlS6ou8vN_V_7gFxe40XL#Rf zWW#I~Jg{*OTvtVCH~2_3efcHnTBR(IDgU^i)G#JCVc^f)jM>)%w4YDn6vO*4c1q3`8*Gg5{BA^Kff#>V=_oLr$=>}w^=*Ei*w zns%9j{U6~3i8^@O1U#~m@L(2lVt&sdCoK2~X*1`m=V*uj(fO;1*z5tAtacIJTmOo|p@_c0FOZ93#7viF zZsugE-uMFZG;8;a)-=WBVZ`5v321ksMI?8&L!4iCGmt{#x2S&Qqk+Xl^G9~!32fVG?F>Se+ zYTB4o*JM(WMbg4KlHm&v)!lDP-!y5e-%1e`#FOxRbNDUWysN5w zPhoL@%rD;Wla!EV!&IcjkAY&$&Us+W^`tao*Aqaf;^X4!;7(Sm^{ZiQPsn^##PZ;J zTYdo==AnC`|IAZWDxtg4z%}?1&R)W*7SdGp1hAM@L89;y-UX&rleeC<`P&(Mq}cnY|*2I_|mgc1L4ayw2W(h`=$jqn@b9?^%#uyMfgBCy1BPtOJ6 zT&ALYt{^C>c78rj8J)bF?6(&dI2oy}xVt{*`;&U~?v|dEJ^pYE-7cXL zy&p*pBAtMS$9f}^cz9j8zwqHqK2s=F2|=-P{3L-FhMhmksx^XLp-)pOA6WF(>9uZ73`D9=R)Qm(?SM3H z!*UFI4VxsYV~!nWhmWZ*7S2A__qR%V>93$MSIarN{}J9-RQtyIbIqf*cSrsb2bBfr z!+w*>$>TrRB)*W}!UEw~>b0T;na??;C!yXFtF_>mr(cro!e6i#lpMQ^h`OI~^N61t$ z<9?i09JpvX(Jg&*3{_WLcZwpGvIE17O-xWE!+}k{OSn#_cLV6~KPET*UmsSw)H}~Y zihB;sbYvIj7mQDgNe{@TvBRG?gXzb+-^T2A1(Pl3LMFWiIGK1mtt@r3?kPy>vH3oj z(Dy$v((uL+dfZ+u&SfiBLdq`ljbHS;PzpBQjjc6@bYl%l=NuOjqAtG`a==CMQm3-( zrsK>T3$d9^alNSVg&*a9auf|CCs@zgDt$=G18YYK^^>VRpo-7B2S?_8VaPp+wd_E6 z$88LXEb$vINCaS0Ccxt5vj}9due&$JwTe>AcV5!SEsHH)ji|+jEEzyx*rzuX{_Mj0u<3F*z@ypew3SW~(E4^XXo#eF9FG15pJvXUP3Tg087us-I9C%8S<@HS72%#Mds|$t{jq-qc#FdT9;8xH(uR*s>840~v z{}*_ehsWsk->GpUG&nF!5JVDGW}#L@AW~gW34=m=|2u?k2N66$3V_KTf&NeO{~R3g z`+8OmwbK%tRXaa=%DN_iwJ2`urWqn^OxWe34ml2*_jLQ-bqalw^^ej0sy}$FX4A`7 zAQZ++VTm8NzNAbxlw+ia&3GpL7-@$1`uS~u%-ZbM!lAsN_=kk85ML9pueBK{WaQ#L?-v?G8w9+DT`b;L2N9f(5=SDE4|`}gf)Xqi4QiIFTq{vuG`D(VrC#Yp5k%?Tf8ox6X7k1V_k+0D z|LByvDxhdVDpCLSpHOvU1a5?Y%ssHJYK_ptT_Y@Sn=!ObUwFR`fIsyQ8$yd0LH8^B!fq(&IgeCZlQsOIf3C5q=V0JW62b3@0 zC<$=SOEH3vilq*Mz2ph3!pOynu12`kAD}UZ>kz?&+r?46oZ+6g<~eI$(8bhohWEw# zPfBeRLDGD(7Z{o?^EH9@;RMXPz9&cMzT~_^E9BrWQL80HVr}vOrudM9?*1lU-z^LB zEl!Eyz&K&UE56OO@sd5P9iT+Fe3~u=4`C++MY*^(;MGLpY!eb0`4_#NiS++5Xw9MC z5Lv}A_#&TSx=|FCjM-C8t$;Iqm8>ms%0fQ>m+i$U6;cNRDG^32mhFMnZFXZFM4IFL zqI|kUJ_rq}6S{v-AIg12H{XXV9Oo?~-S^Vx0D>V zX*w0021i2Ai|Ln|DW(o=IG z8g4$#rWVoRClaNr7G-P zhPocR0m#Zq+C8>)(eTq#B@ebY5gjijGa4A^0>}QbJ$q@hHQ|aA&3bP1(`AtFr%Z$> z@oo1zA8}M{zh#3Re!s~wdf0L8JnKed>&>30hQQA|oQyFGotpI|#@c~>Zy%bl=~#pc z5?``7sLVgmwlR@<*!y58@5_CQg~~+eGkCYLmO{BeKgMp}1ERhyaYiMagKgE9L(Di>>-G96s5zCb z{Hn(=PYF|HOT~4jJx;1e`FnLa_EJowAs&&ZfXPtaNnS#)w5L%B+RS>N` z!PWUOScj*(KexO?ce48`D(Wv4|J6$GX|n>O%I%HD2$>xkBPEU$^~{?y7o)|b1T*v^ zp1ggG#26TWZWhnv-XSf*%dK2ISn((MJDc4!gMGjk^mE=q!nP-S*`)X314Ay`YCT2f zqsItVwJ6kN^it55S9Qu5G}S*rfXKe}ZQOs{{8D6p>)0Q$<#Yc^*V`B+*8*QEb?W%{^$)*1rXUpeNdD!k=&(T zH#_xX=){~nRkoy8r*U^zy9sx=T~yc^E0g^!KgX9yo~OesD7l5xmRNlW>G6q>qy-g= z0n64$2O=y!!AAoh7-oMCXb1nL3eGl)5YSF-BudDF%RlQ6Zq2uyjduP0$a#U33WeSK zX|L=hy+Fy!0}IcJcZMWW=4zzh;*NCHaEnHd<8lG6|r z29R{d@Bebn%USC_+`H~eukNZ{`?qUX_3G-{p;Vy`%=O%&O}(DXyIiP{!z~|ljN({0 zTxlNyR|?95!#Ldqgt@@f_J2JU6)XXZBDT5+D*L`vlQIM#J&VG;LkRmAe2e%=@(qm; zUIS&vh%hMU8?J(iE8MPHXfiSM9j~j|i$Caz)9}{D0c%qpPZ^uU!b+F&fgQZ#Oh5p; zK)F^tjU3dXr&Rg@<=3`s+{gN>fCb${^<=~bT9q({=!)wNC@u8L15eWSTqM$;b?$uO z`7~B#*SJyH_ODjO9p*P`Lnj3qau?aKXSfM6S1xH)LMIRWW<$5d82PEAQKU6yx{}th zQ-o`nl|Do@o=on4NX&OJiE0o(vyZ-P;&Jx94j+A;Yo-vaTcFxtRlis`XX@H7I93&% zJB~Ou13NsJOHJ^Z<2WXOOMil6ClHoOo?Xf{I$IfD{O>5+J^@-=n3pv=v;4xNIL%ax z#Zr4fVH2Zcfo{`k!ioNKT&iruxeXxp_ua7tu2`n#G%9(HILNtj6V4g`P9?)H2dahS^wVL+l>0Z>C7@=u^=T6qqa zM=Pk&H<6_bqm1qXUnuMc(E&!1OHp#@WCXi;V8Cx#zmo!9GgwB;0n`vsqu1F0?Cg!NWYe@Qw!im0GV)sL z+~3#R`>{Z3YlqI+;r`Yyi!#JtuX$FU}np2 ztHUzOE6n?i*^HDU#jAd2KYmzhv5cr+XfiEmya5WYP$;$M6x#ANYx13u4?xMmE-i(?F9Y+)uz&%*|P z_Ge91v2-*qL`wSLrv&l}m?KB=xk1nI5pfKSTB^4SKBA3l4U$7g0a?sqs4h+!CZO zDB`bA8Z~Iho?$@(gDvDEPGyV7dDsMXDvVW(%#uV`Ye_ zy;qF=ghXAO+->t&iXT35aHdXZY9z=I@)Z z4g7Ch=#c?fGMvg0R^Aq9-KyU{^0kP7z*zD)mb}&De3=1)H-fi*_-BqM7&Vx~fV`sL zIyle}e6O5Az2pls#e0(bc<=|k#M>ZEUvBgpxY((`+o{K0`pse1C8=xwgB<$;V}V0+ zPiyh>@CQnY1;6`GO7Qc&BQW?h9y$YsH6RKduvv~)U-2eEs<$jai_-wAjSPu;VGd?K zFT|JWNgi0`#UUDyZ8g(1?+FN==3p>L+z`y;_FOui)A0lY&gAn`rAxog05 zdKzDb=wNUdUJ)iDI1r)i$*#dc`~a+WnDHUr6Pb+Q#@Y$JFOEt+AF&t@Yn4J7;`vrj zC$x&tVo(?qaU#cS2fRB(*ydgNO{&m23}!+2M7|TGf**jFz=#;}c(#2*g9q?oqfOm` z^$9_6@1q>dPoHWW>a#$o;jG}ll4N5P=C%b4c30-%rx`)~ko49Ti=UbfL18U59t|WG zc)93Xm~MIDqY1^oDJ9L+DiQ>jMye%az7rR;B0OVF%?C}%2#i!Hnnegm^5pw+pBA5DW&hHw}|__x&XxpquGqjCrN(g-R3G!jgT$ zXl7#I+z1%VLmxRZg6cs{p+=DA4ZAP~=Xf*6TLg4dLeHr@v&C{!$1s^l9+(EKuF@k7 z;RrM|bk;1mo9|S^hG9o2391jx>;y(UAFgLa^Kz6E*eKeg?`tRziY$kAsowAJosU{3 zTfT+6*y_yvVJhc0^h7@DzLa*#Z8-_1L7c+OX|9R>u)7b2flFK)b?^92!`uR7TDtC3 zM;N@uXe3NX4UM<>Bswi#EP=^1FM}zPoq@wt~!2@ zuz+rssy4U?!-f7E4tqK`s);nh#6ZawLdllzkt;Uw8S%vw`YXV%GDFtx8Bqmr<6F=n zG!c2y)2fExeav4KBnQ|hK@M5MeWwt7DB$*noY&|{OrZl2VaNl%WSY?!TQf|U2Hz;9 z>a@626%G34DB{*2ZA$Ih28lVC*U_+Hq#<9+NViG%fzqtJV8v0i+abT=jw&8TRyOIH zme3OOmz!i6^vb9OgqO<=37Zss5${OvI@Kb9;>I)RV(GfDJcKTwlYPma7K+4W>h>oL1jJnwX=WtfrjSG(DD6*<4} zk$g?mGQOg9{rgnwDcQQXVWf!(?ft!((>K@(v09nk1|wx5Udq$shT_bpZlr$!JFJ6z z%>e0^h~X>Ej2OG7(gj__GDdmsZ!^u(y@-Am!@FDOtBxlJk*x$KY)5k~IY;l84-nCh z7sy{D{ryai9#YxWf0# z&00p+Nb12D8|iWJ&z=Ej-`Zn`!DcJ3y7~;GSeo1uFkwENC9#P=KvC2pyfI2T? zqn4i5EREXZq@-UGgypPt&vmYnzaw@w!hqPB2BYb7wiBTd{B#(t&$VCL!^AjTPUt!C zHCqkcYg%rcJA1Htdb1!FcgUv5txSlK?46q5XTjVimkMmBQ!0p$l{2MN86PPdgZs@~ zJGP4u8_AQBNIZEe6DRX>a)32JF8aJQQ-agk>O9(F=W_W^AAaqc$@N;fkE~cb-Av!(!h6eJ0GJ6+m?0?5Hz>F^eHxFmds+r{}P!= z_PTig^n9YJxKnOtaj&4uK^}H_UuraR<;?sg^U7q`l~_>L#wgj5+xk{F&S|}#Ha}_T zj7l;l3x%6^HZEK@T!qVd1-aQ?lnu{3%;s-(<=yLe|9HyDKdb{mHXk z8CfqFX9mgX8icNj-g5;;kA%fAN4_Cw*z6 zwDRlqmA@>nS2|8UMp!eL-a*T(Q#jB&TT4h;j}iGD;m%nk%Kx2*!&$NGY8XDdi;T}P zYt&yRC%I8W^wge>@N2MM&f0@RWw+ap#yKt(PKj$XszT*H4HxU{x-S%Kmqo0`9(0|z zZCrN!+2Vd?0gjnj4Pax*UE23AF6f;~6stCN_ABPe{=uPOkCNIc3<$GZKn&M<%!PLJ z#Z$isaqUkc?|O+_pm(e#cA_{TPNMfa1X)o3B|hk2!-n+Bkg@jxGj380B6ox ze$%{3sXkvklkIWdeEBFc(IxS0-Jmf=FVWChpt96tj=D>i;f7M&+i=z*76noMT>RL#)o?IQbk6p_=0EdkV?an+}SuMb6`)b?23|a9Fy) zi)u9NV~cbA{Uia=2gJrGo+W0_h_|akSn@l@*7)6(`UxQdN$0;VJRM9!5)nRep&Ymn zpYM$dOQ9Icj%ZrbdCu}kuL%Va>k_)U-}R>nX*(8liGTWp-yTapt8S4uqzTnOv6A3ExPTazfn=4HBo@Avm; ze@sq=%`M5{bT>_!(Mi(x8TSZlVtaj7Gk70LjXc6zN<^){pPlL_WdcymEdEcw)2tfzEm$~O>TmuXsHNPEShkiR zpP{Mm<@|X6TUO-YvTuZcwBb7FnY7H-uWR*-^P+>wLDM)(Cc}`8$E#74#tQz9H>muK zFPktC3~tv4LTEj1h6)Z>dF78%%cZdQ*iIudd#Kt%v(uYjt9)@?yxup$360guuNs$Z zxU-6$eM1S662?tXJfshfljvy}WZntuuPTZP2)oggO)XHa!9|`aLa+>53qyO)`@euy zm;r`L=4M26J@^SA94|yEcnF(mTK4Vj=b%5huUBp6VoeoA^FWtDcf+QgoJeo8sZAw zB@||BTkHsZObl@oSSrxL-Fdjq^1UrLYhrSHm}U>I6im@9L}6axUFgOxPxFw_{WftG z6PbDli<_FQ3j0a(41SQi>*^GVMwKX2XFRjWQXezJ118t~ec~v+8GIpL*yZVA6od)aN_uCwBVtl5 ztVVh%K+##y6>a`Vt;c9kNy6Nzm{)G^Sq4%Wk>~=gb67ya!Q6Bna-t!v%yB+!(*B^6 z^fRY6+__J#=kM%xpALi5SCfB=y0`+C`nNz;mLEZ^2f-I>I^+ZWVcI&xeP`5m=2^_l z`YTly1F9gi{2jpXK-4I9_sg>PK||*XZ)oI(-G292k`cY6G9$Ciz-A>aXC5izxo-wT z(4X-9sAU^+i-HbK<4);0+qi@IY@*1fN22Ex+TP}Fj@CS<*a?qZt zpU;h!@GL?gw0cgt=Y#vzq|D(fzOXueG4NB+XFWKS#fd^+{g?nvi+k8w;iM9f$Mp@e zG!W>N_;XzOokRQG=|Go}*kKp9^X5|{t_F0tfw>J_Y(e}K6~2R_k@pwxHk=I8sUaLe zNjUxHSK>pHvWHEIpq1yuN$9erGj!O)W<+c#*O{LIX{D(^IMtE_{)w!~bGpH!XQ9|X zoXT83d#PKAh4Ul6vct9x=vN}#kS95$aUGZ2>Oy796_MvEe?2tY_X8 zjgnV;UhM|9&e1kbTjoi)t*|T1R$v@q5p~p1L}8@%OXl)UOhD+sClRIF%I+q7AUrfk zt(s|Of3o~l*$+4;sXcvy5@*r?qVTd0QHanzK{(}c@&rlxUhVvHYorZtNs~_ll+OPN zVCXKu(z1tn(9qgkO><58A36MZlu8d0tMazSh!69R_uyYIiygRz=`uW?WZZfcG!D`W z`h(a5F>PW`4m-n&9~5@~?@GD0Ks)Ho=@k-OkaC7N*1jF5dCb5(AxGPszi>P<6??C} z2bQaNBa8kd;9afz!>!yG+$$D?+MncXT)I!F^%?C(GV=;YAN4oV60phs|Ni3n_`=}n z^Q>Xlg}}I=yaw((P2?yxj|;pUmw7`Z6j14~f6+>To?NTAq?((o>pk%HY=&M7&EB&a zB2S_=lBCd+Rmjk=VZTtX#de&@i&H*lcXi=DkKZ*KnUhB5eK<56Mzq1+IgfMgaX)4E zK(wVE7-0R!w{o%7PJ?<3>hp#L6Vm$$91BzOU$O@Ai8S}N0_XZ>e+*qVnm>0hh??7W z7SbqLx$`P?@w){jSIJ~9)49&i`1T*2@sF=;eqCNbY|K&34rUL2Vzj>+N0d!l*BMHO zAJG@cH+f39J=N^2@^dYo`e9$$^06;kIbl{JFtT?~OLYk$Lxepa zd$pr-V5!iTv?}UkM*dHNY+`NLSJKIn6nw^G*gS$`$a0}V<6d5c7K7s@ijn$Iio``(eNqB1n7j&;bRAjA^7umsP3E#mS&`bU$#_4*GjZq zh2Zjfmg3g1C(U!wfFsj1N5OWx+`Z-V>TrjO9*AVd=?vmTI`MN)_jjCtaAMZJv5iDs zZxaB~x?zw%LUpF0`(PP@G-PVLx_m$C}5vMx5W z8z^o5q`npO*3;WulGGW?BLPBV829`?F2Ooo*nBlPW1%wPAX1n{7gL_4I^L)^*&L67(rs`ObF_7+N^sp%; zj!W0rd(8(5j(xWNhUK12`g2v^AA@;}KUHfhq`IOY(+^v45;c7Gwh#c=j~26&ZgfLa zSnc&960XP37<7GFi6GXxWAM@gMk(`H z0bqCiA^`F@z5W2Gyt?)xDWoh>Cw9F!v$`&6vn#1oWLpQ_k8DBh#iJXN+3jB)>Cj1| zDIViL5s$Hw#k8~0&gz{dJovOfDQ7$*<4-?(gO_Gs&PR;-83XWmGHt6R#KV8snB$<` z`B61y8+=~+*mu59LiO3d2ul1}(5pKKhV>n}UB!#=~a)PiUj8^@br!pQKi9=6i}) zAfPA+{&?lLLbBwUG&5!AJ6@n|78@rllT*WM1-Ta z_%Tys)cuQ+PdmUTm`8tf-|w%(jo(fvkGGiMh^95ysp{Rb}g5h&;Z}c2hmwEOfJUu#w z5xUC?2F=zOF@*s&$FxqEzf^L6@^V*@&za{PNtc4}!N^2FWZodRAS2mmeI6 z;$W&1_AiE{m`CE*?trrBYL+7NMa6U2>_Vo=EAMjm zi!zJr#a~{hi8z1b-3!auDJNTIN!EFr{MZhAHn5h8*TN$%>?3hzC4IkgVAS^!c!9E1 z@<0NPI1E_19nMPjB5IH7G5idaa>LRg(6T+?#DLCxL7t;v4=+!d$0osGhdn zkL&3_Z}#i|k1rVcid}M}_8e7=m)A@f1+yNx*ph`KZXp+0c0ew;k*CQQQ;LuEJ6qqV5=8@PHstz2eySe()W?$8;oJ* zg_0QXkOhb)Gd9sN%uRW~ty2zfmm&&7tw;QyjNX0(`yIGesn@@UiamGJjHS7TZxRkyB#+}b<4s&(>7@0eZ ziZFD7T7-1Or`_GPoSIw!KNiLGTxCdcE|53n!4bnxFjKt_A^UTvX8&y&u*x_%rAXw@ zpp+ghY?!eg*|1PbEm;&Gp;6*j3VsJ>(i^qpI-lD*e2mXpxj614sAan}-1*J3ZRiC} z{F5&{qlQmk`U)xCQ#Yh6M;ZZGQH;7Fv+Z*oaOI;vQMLl~_2N2u+@aNR$EFTPz@z|S z3YWjjU*8z<5lzSYL|Hb~d9J>Orx&Ys6zd3PRcJZhRl}3cL49F8KF$4)7AXdfXjbN1 zjh;ePNNb9N$kGcCt&ScdryX5G0k(8>vhBS98D>%CnFHEgcFg9f)J6GF__s5IeJ*iV zK!>Our;kMa4dm#71-SU;;m<_y+If+I-s{$M72^V+uh^kTa%rMbyavn67l&p<0#`(dNS-6$ZM?& zN)H9L+(!CXy1Wygbe4=g$A&G6mi?i9H&d_79F3=!>sZl!2dB3u{IXcD_P0Y18auxQ zDBsm7TlVt1pe?cotn_vc=wGd%ti`ROzMeUi3hrRMvSVK&_gjB`n~OpX6zbl(ZrvZH zk6Fn6`6P4+BOgI&d%qNw3I=_MNv z>0p$qaDWJ0U=9-CG?)jSy^1d(st1*IV)$wIWFS&djPgu6;RiBsMdh{_>e2q_O>J3`AuM)3C(wMwBbWdpmASMP#yXieZn+j&utp zD@P38v@ER14)>FXz!txy7OuK$z!oO_+OBLrM#JjUB1wG1-j;d560jw`mf3#5AcRRR ztyR3|sb$PtdsvliQ@4MyIbiszmlj5Zx4ExA3uDoPuR7*VU7 znc0_B^&7E4pPzK}UkPMR{idGW9)PV;u8zt9>?AO#XXWLTv{-K<;X|9_mk@2=?C&r! zKdN`{&`hpQvoY!nT_OQOgB$*Z8;B26C8O=U(zG;Sz7`-|>W$-|tqWvOA#CZiz;Oq( zE?$B7n;X_S39jRvA7n$MJn8r}@#~HZ*PlguSgOs1ay{+MF^qLX8w*SKbI<;qo(0$I;erLU?I3=?1T3<* zm;J$he;+QqQ5^_F%u6@_X0!9WJij*~3AU4Y+lu)8V~-j{cEc9>2h6fNS+cP3)*)js zYjeIYLutG9Q~Yh$C4<7w>MdW{ya_Qe#T;hiCZ}Cbfs=TsilI;eNF%&{sMs4yw@w-^Pqq^8*2#WvAVF&x^64MZ1(2C$1Xmlbs4n|?j&0eMQ zSL9dmDlB?$*QYn2JVx8pKj*zUnPS@B%?4NM=XCGuy1zjD*w>XJ^>mwX61e_56ndP} zPI$8Eo9$9hn<5WbRHquhS~$n_p|)wDxlx|p9^kW~iCFw5PmKOhjeabV?6J?$7M z>iBw)N|>}N1l23EI{Kw_M;1}p7s=2KVA};G$AaWqFy|Z))hKj z^L4mkO_$;P_S>QWB9t6p!Q`^x3tT?(r~$NDqIO5kW^J>Lx)LB|a2^ zgca{mK_!FtyPPTX conversion, the text is rendered as Text where you can select/update it. Please note that in order to convert PDF files to PPTX format, Aspose.PDF provides a class named [PptxSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/pptxsaveoptions/). An object of the PptxSaveOptions class is passed as a second argument to the [save()](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/#methods). The following code snippet shows the process for converting PDF files into PPTX format. +During PDF to PPTX conversion, the text is rendered as Text where you can select/update it. Please note that in order to convert PDF files to PPTX format, Aspose.PDF provides a class named [PptxSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/pptxsaveoptions/). An object of the PptxSaveOptions class is passed as a second argument to the [save()](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/#methods). The following code snippet shows the process for converting PDF files into PPTX format. ## Simple conversion PDF to PowerPoint using Python and Aspose.PDF for Python via .NET In order to convert PDF to PPTX, Aspose.PDF for Python advice to use the following code steps. -Steps: Convert PDF to PowerPoint in Python | Steps: Convert PDF to PPTX in Python +Steps: Convert PDF to PowerPoint in Python 1. Create an instance of [Document](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/) class. 1. Create an instance of [PptxSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/pptxsaveoptions/) class. @@ -67,7 +52,7 @@ In order to convert PDF to PPTX, Aspose.PDF for Python advice to use the followi {{% alert color="success" %}} **Try to convert PDF to PowerPoint online** -Aspose.PDF for Python via .NET presents you online free application ["PDF to PPTX"](https://products.aspose.app/pdf/conversion/pdf-to-pptx), where you may try to investigate the functionality and quality it works. +Aspose.PDF for Python via .NET present you online free application ["PDF to PPTX"](https://products.aspose.app/pdf/conversion/pdf-to-pptx), where you may try to investigate the functionality and quality it works. [![Aspose.PDF Convertion PDF to PPTX with Free App](pdf_to_pptx.png)](https://products.aspose.app/pdf/conversion/pdf-to-pptx) {{% /alert %}} @@ -95,7 +80,7 @@ In case if you need to convert a searchable PDF to PPTX as images instead of sel This method converts a PDF document into a PowerPoint (PPTX) file while setting a custom image resolution (300 DPI) for improved quality. -1. Load the PDF into an 'apdf.Document' object. +1. Load the PDF into an 'ap.Document' object. 1. Create a 'PptxSaveOptions' instance. 1. Set the 'image_resolution' property to 300 DPI for high-quality rendering. 1. Save the PDF as a PPTX file using the defined save options. @@ -116,27 +101,3 @@ This method converts a PDF document into a PowerPoint (PPTX) file while setting print(infile + " converted into " + outfile) ``` - -## See Also - -This article also covers these topics. The codes are same as above. - -_Format_: **PowerPoint** -- [Python PDF to PowerPoint Code](#python-pdf-to-powerpoint) -- [Python PDF to PowerPoint API](#python-pdf-to-powerpoint) -- [Python PDF to PowerPoint Programmatically](#python-pdf-to-powerpoint) -- [Python PDF to PowerPoint Library](#python-pdf-to-powerpoint) -- [Python Save PDF as PowerPoint](#python-pdf-to-powerpoint) -- [Python Generate PowerPoint from PDF](#python-pdf-to-powerpoint) -- [Python Create PowerPoint from PDF](#python-pdf-to-powerpoint) -- [Python PDF to PowerPoint Converter](#python-pdf-to-powerpoint) - -_Format_: **PPTX** -- [Python PDF to PPTX Code](#python-pdf-to-pptx) -- [Python PDF to PPTX API](#python-pdf-to-pptx) -- [Python PDF to PPTX Programmatically](#python-pdf-to-pptx) -- [Python PDF to PPTX Library](#python-pdf-to-pptx) -- [Python Save PDF as PPTX](#python-pdf-to-pptx) -- [Python Generate PPTX from PDF](#python-pdf-to-pptx) -- [Python Create PPTX from PDF](#python-pdf-to-pptx) -- [Python PDF to PPTX Converter](#python-pdf-to-pptx) diff --git a/en/python-net/converting/convert-pdf-to-word/_index.md b/en/python-net/converting/convert-pdf-to-word/_index.md index daef56eb4..7b5bb6317 100644 --- a/en/python-net/converting/convert-pdf-to-word/_index.md +++ b/en/python-net/converting/convert-pdf-to-word/_index.md @@ -14,34 +14,15 @@ AlternativeHeadline: How to Convert PDF to Word in Python Abstract: This article provides a comprehensive guide on converting PDF files to Microsoft Word formats (DOC and DOCX) using Python, specifically utilizing the Aspose.PDF library. It outlines the advantages of converting PDFs to editable Word documents, enabling easier content manipulation such as text, tables, and images. The article details the process of converting PDF to DOC (Word 97-2003 format) and DOCX, with code snippets demonstrating these conversions through Python. The process involves creating a `Document` object from the PDF and saving it in the desired format using the `save()` method and the `SaveFormat` enumeration. Additionally, it introduces the `DocSaveOptions` class, which allows further customization of the conversion process, such as specifying recognition modes. The article also highlights online applications provided by Aspose.PDF for testing the conversion quality and functionality. The content includes a structured overview and links to corresponding sections for each format. --- -## Overview - -This article explains how to **convert PDF to Microsoft Word Documents using Python**. It covers these topics. - -_Format_: **DOC** -- [Python PDF to DOC](#python-pdf-to-doc) -- [Python Convert PDF to DOC](#python-pdf-to-doc) -- [Python How to convert PDF file to DOC](#python-pdf-to-doc) - -_Format_: **DOCX** -- [Python PDF to DOCX](#python-pdf-to-docx) -- [Python Convert PDF to DOCX](#python-pdf-to-docx) -- [Python How to convert PDF file to DOCX](#python-pdf-to-docx) - -_Format_: **Word** -- [Python PDF to Word](#python-pdf-to-docx) -- [Python Convert PDF to Word](#python-pdf-to-doc) -- [Python How to convert PDF file to Word](#python-pdf-to-docx) - ## Convert PDF to DOC One of the most popular features is the PDF to Microsoft Word DOC conversion, which makes content management easier. **Aspose.PDF for Python via .NET** allows you to convert PDF files not only to DOC but also to DOCX format, easily and efficiently. The [DocSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/docsaveoptions/) class provides numerous properties that improve the process of converting PDF files to DOC format. Among these properties, Mode enables you to specify the recognition mode for PDF content. You can specify any value from the RecognitionMode enumeration for this property. Each of these values has specific benefits and limitations: -Steps: Convert PDF to DOC in Python +Steps: Convert PDF to DOC in Python -1. Load the PDF into an 'apdf.Document' object. +1. Load the PDF into an 'ap.Document' object. 1. Create a 'DocSaveOptions' instance. 1. Set the format property to 'DocFormat.DOC' to ensure the output is in .doc format (older Word format). 1. Save the PDF as a Word document using the specified save options. @@ -77,9 +58,9 @@ Aspose.PDF for Python API lets you read and convert PDF documents to DOCX using The following Python code snippet shows the process of converting a PDF file into DOCX format. -Steps: Convert PDF to DOCX in Python +Steps: Convert PDF to DOCX in Python -1. Load the source PDF using 'apdf.Document'. +1. Load the source PDF using 'ap.Document'. 1. Create an instance of 'DocSaveOptions'. 1. Set the format property to 'DocFormat.DOC_X' to generate a .docx file (modern Word format). 1. Save the PDF as a DOCX file with the configured save options. @@ -111,37 +92,3 @@ Aspose.PDF for Python presents you online free application ["PDF to Word"](https [![Aspose.PDF Convertion PDF to Word Free App](/pdf/net/images/pdf_to_word.png)](https://products.aspose.app/pdf/conversion/pdf-to-docx) {{% /alert %}} - -## See Also - -This article also covers these topics. The codes are same as above. - -_Format_: **Word** -- [Python PDF to Word Code](#python-pdf-to-docx) -- [Python PDF to Word API](#python-pdf-to-docx) -- [Python PDF to Word Programmatically](#python-pdf-to-docx) -- [Python PDF to Word Library](#python-pdf-to-docx) -- [Python Save PDF as Word](#python-pdf-to-docx) -- [Python Generate Word from PDF](#python-pdf-to-docx) -- [Python Create Word from PDF](#python-pdf-to-docx) -- [Python PDF to Word Converter](#python-pdf-to-docx) - -_Format_: **DOC** -- [Python PDF to DOC Code](#python-pdf-to-doc) -- [Python PDF to DOC API](#python-pdf-to-doc) -- [Python PDF to DOC Programmatically](#python-pdf-to-doc) -- [Python PDF to DOC Library](#python-pdf-to-doc) -- [Python Save PDF as DOC](#python-pdf-to-doc) -- [Python Generate DOC from PDF](#python-pdf-to-doc) -- [Python Create DOC from PDF](#python-pdf-to-doc) -- [Python PDF to DOC Converter](#python-pdf-to-doc) - -_Format_: **DOCX** -- [Python PDF to DOCX Code](#python-pdf-to-docx) -- [Python PDF to DOCX API](#python-pdf-to-docx) -- [Python PDF to DOCX Programmatically](#python-pdf-to-docx) -- [Python PDF to DOCX Library](#python-pdf-to-docx) -- [Python Save PDF as DOCX](#python-pdf-to-docx) -- [Python Generate DOCX from PDF](#python-pdf-to-docx) -- [Python Create DOCX from PDF](#python-pdf-to-docx) -- [Python PDF to DOCX Converter](#python-pdf-to-docx) From f5b66a578bdd1be1cbb29aa7014f241f4ccacb88 Mon Sep 17 00:00:00 2001 From: AnHolub <68945813+AnHolub@users.noreply.github.com> Date: Mon, 22 Sep 2025 11:00:22 +0300 Subject: [PATCH 47/65] Update extract images from PDF documentation for clarity and detail --- .../extract-images-from-pdf-file/_index.md | 74 +++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/en/python-net/advanced-operations/working-with-images/extract-images-from-pdf-file/_index.md b/en/python-net/advanced-operations/working-with-images/extract-images-from-pdf-file/_index.md index e3efb498d..5a6453333 100644 --- a/en/python-net/advanced-operations/working-with-images/extract-images-from-pdf-file/_index.md +++ b/en/python-net/advanced-operations/working-with-images/extract-images-from-pdf-file/_index.md @@ -73,4 +73,78 @@ This method extracts images located within a specified rectangular region on a P with FileIO(path_outfile.replace("index", str(index)), "w") as output_image: image_placement.image.save(output_image) index = index + 1 +``` + +## Extract Image Information from PDF + +This example demonstrates how to analyze images embedded in a PDF page and calculate their effective resolution. + +1. Open the PDF with 'ap.Document'. +1. Track graphics state while reading page content. +1. Handle operators: + - 'GSave'/'GRestore' - push/pop matrix. + - 'ConcatenateMatrix' - update transform. + - 'Do' - if it’s an image, get size & apply transform. +1. Calculate effective DPI. +1. Print image name, scaled size, and DPI. + +```python + + import aspose.pdf as ap + from io import FileIO + from os import path + + path_infile = path.join(self.data_dir, infile) + + document = ap.Document(path_infile) + + default_resolution = 72 + graphics_state = [] + + image_names = list(document.pages[1].resources.images.names) + + graphics_state.append(drawing.drawing2d.Matrix(float(1), float(0), float(0), float(1), float(0), float(0))) + + for op in document.pages[1].contents: + if is_assignable(op, ap.operators.GSave): + graphics_state.append(cast(drawing.drawing2d.Matrix, graphics_state[-1]).clone()) + + elif is_assignable(op, ap.operators.GRestore): + graphics_state.pop() + + elif is_assignable(op, ap.operators.ConcatenateMatrix): + opCM = cast(ap.operators.ConcatenateMatrix, op) + cm = drawing.drawing2d.Matrix( + float(opCM.matrix.a), + float(opCM.matrix.b), + float(opCM.matrix.c), + float(opCM.matrix.d), + float(opCM.matrix.e), + float(opCM.matrix.f), + ) + + graphics_state[-1].multiply(cm) + continue + + elif is_assignable(op, ap.operators.Do): + opDo = cast(ap.operators.Do, op) + if opDo.name in image_names: + last_ctm = cast(drawing.drawing2d.Matrix, graphics_state[-1]) + index = image_names.index(opDo.name) + 1 + image = document.pages[1].resources.images[index] + + scaled_width = math.sqrt(last_ctm.elements[0] ** 2 + last_ctm.elements[1] ** 2) + scaled_height = math.sqrt(last_ctm.elements[2] ** 2 + last_ctm.elements[3] ** 2) + + original_width = image.width + original_height = image.height + + res_horizontal = original_width * default_resolution / scaled_width + res_vertical = original_height * default_resolution / scaled_height + + print( + f"{self.data_dir}image {opDo.name} " + f"({scaled_width:.2f}:{scaled_height:.2f}): " + f"res {res_horizontal:.2f} x {res_vertical:.2f}" + ) ``` \ No newline at end of file From 991d3a4b599f69d8fb28ce468f4ffd5d88c376f8 Mon Sep 17 00:00:00 2001 From: AnHolub <68945813+AnHolub@users.noreply.github.com> Date: Mon, 22 Sep 2025 11:01:27 +0300 Subject: [PATCH 48/65] Update documentation for converting PDF to Word in Python --- en/python-net/converting/convert-pdf-to-word/_index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/en/python-net/converting/convert-pdf-to-word/_index.md b/en/python-net/converting/convert-pdf-to-word/_index.md index 7b5bb6317..688de9786 100644 --- a/en/python-net/converting/convert-pdf-to-word/_index.md +++ b/en/python-net/converting/convert-pdf-to-word/_index.md @@ -1,6 +1,6 @@ --- title: Convert PDF to Microsoft Word Documents in Python -linktitle: Convert PDF to Word 2003/2019 +linktitle: Convert PDF to Word type: docs weight: 10 url: /python-net/convert-pdf-to-word/ From d978e65d870d92cd7f5b7a6161067c5a1592c2c8 Mon Sep 17 00:00:00 2001 From: Anastasiia Holub Date: Mon, 22 Sep 2025 15:46:03 +0300 Subject: [PATCH 49/65] Update DICOM conversion instructions and code snippets in documentation --- .../convert-images-format-to-pdf/_index.md | 30 ++++--------------- 1 file changed, 5 insertions(+), 25 deletions(-) diff --git a/en/python-net/converting/convert-images-format-to-pdf/_index.md b/en/python-net/converting/convert-images-format-to-pdf/_index.md index f74e8342b..d7aab8985 100644 --- a/en/python-net/converting/convert-images-format-to-pdf/_index.md +++ b/en/python-net/converting/convert-images-format-to-pdf/_index.md @@ -104,32 +104,12 @@ DICOM format is the medical industry standard for the creation, storage, transmi **Aspose.PDF for Python** allows you to convert DICOM and SVG images, but for technical reasons to add images you need to specify the type of file to be added to PDF. -The following code snippet shows how to convert DICOM files to PDF format with Aspose.PDF. You should load DICOM image, place the image on a page in a PDF file and save the output as PDF. +The following code snippet shows how to convert DICOM files to PDF format with Aspose.PDF. You should load DICOM image, place the image on a page in a PDF file and save the output as PDF. We use the additional pydicom library to set the dimensions of this image. If you want to position the image on the page, you can skip this code snippet. -1. Load the DICOM file. -1. Extract image dimensions. -1. Create a new PDF document. - -мы используем дополнительную библиотеку pydicom чтобы установить размеры данного изображения. Если выхотите прост орасположить изображение на страницу, то данный код можно пропустить (под ) - -```python - - # Load the DICOM file - dicom_file = pydicom.dcmread(path_infile) - - # Get the dimensions of the image - rows = dicom_file.Rows - columns = dicom_file.Columns - - # Print the dimensions - print(f"DICOM image size: {rows} x {columns} pixels") -``` - -1. Prepare the DICOM image for PDF. -1. Set PDF page size and margins. -1. Add the image to the page. -1. Save the PDF. -1. Print Conversion Message. +1. Initialize a new 'ap.Document()' and add a page +1. Insert DICOM Image. Create an apdf.Image(), set its type to DICOM, and assign the file path. +1. Adjust Page Size. Match the PDF page dimensions to the DICOM image size, remove margins. +1. Add the image to the page, save the document to the output file. ```python From 193801817fbb53fcead8f4eaec5e88353bab88b5 Mon Sep 17 00:00:00 2001 From: Anastasiia Holub Date: Mon, 22 Sep 2025 17:17:54 +0300 Subject: [PATCH 50/65] Add abbreviations for image formats in conversion documentation --- .../convert-images-format-to-pdf/_index.md | 14 +++++++------- .../convert-other-files-to-pdf/_index.md | 4 ++-- .../convert-pdf-to-images-format/_index.md | 6 +----- .../convert-pdf-to-other-files/_index.md | 2 +- 4 files changed, 11 insertions(+), 15 deletions(-) diff --git a/en/python-net/converting/convert-images-format-to-pdf/_index.md b/en/python-net/converting/convert-images-format-to-pdf/_index.md index d7aab8985..47063c79a 100644 --- a/en/python-net/converting/convert-images-format-to-pdf/_index.md +++ b/en/python-net/converting/convert-images-format-to-pdf/_index.md @@ -22,7 +22,7 @@ Abstract: This article provides a comprehensive guide on converting various imag Convert BMP files to PDF document using **Aspose.PDF for Python via .NET** library. -BMP images are Files having extension. BMP represent Bitmap Image files that are used to store bitmap digital images. These images are independent of graphics adapter and are also called device independent bitmap (DIB) file format. +BMPimages are Files having extension. BMP represent Bitmap Image files that are used to store bitmap digital images. These images are independent of graphics adapter and are also called device independent bitmap (DIB) file format. You can convert BMP to PDF files with Aspose.PDF for Python via .NET API. Therefore, you can follow the following steps to convert BMP images: @@ -67,7 +67,7 @@ Aspose presents you online free application ["BMP to PDF"](https://products.aspo Convert a CGM (Computer Graphics Metafile) into PDF (or another supported format) using Aspose.PDF for Python via .NET. -CGM is a file extension for a Computer Graphics Metafile format commonly used in CAD (computer-aided design) and presentation graphics applications. CGM is a vector graphics format that supports three different encoding methods: binary (best for program read speed), character-based (produces the smallest file size and allows for faster data transfers) or cleartext encoding (allows users to read and modify the file with a text editor). +CGM is a file extension for a Computer Graphics Metafile format commonly used in CAD (computer-aided design) and presentation graphics applications. CGM is a vector graphics format that supports three different encoding methods: binary (best for program read speed), character-based (produces the smallest file size and allows for faster data transfers) or cleartext encoding (allows users to read and modify the file with a text editor). Check next code snippet for converting CGM files to PDF format. @@ -100,7 +100,7 @@ Steps to Convert CGM to PDF in Python: ## Convert DICOM to PDF -DICOM format is the medical industry standard for the creation, storage, transmission, and visualization of digital medical images and documents of examined patients. +DICOM format is the medical industry standard for the creation, storage, transmission, and visualization of digital medical images and documents of examined patients. **Aspose.PDF for Python** allows you to convert DICOM and SVG images, but for technical reasons to add images you need to specify the type of file to be added to PDF. @@ -166,7 +166,7 @@ Aspose presents you online free application ["DICOM to PDF"](https://products.as ## Convert EMF to PDF -EMF stores graphical images device-independently. Metafiles of EMF comprises of variable-length records in chronological order that can render the stored image after parsing on any output device. +EMF stores graphical images device-independently. Metafiles of EMF comprises of variable-length records in chronological order that can render the stored image after parsing on any output device. The following code snippet shows how to convert an EMF to PDF with Python: @@ -206,7 +206,7 @@ Aspose presents you online free application ["EMF to PDF"](https://products.aspo Convert GIF files to PDF document using **Aspose.PDF for Python via .NET** library. -GIF is able to store compressed data without loss of quality in a format of no more than 256 colors. The hardware-independent GIF format was developed in 1987 (GIF87a) by CompuServe for transmitting bitmap images over networks. +GIF is able to store compressed data without loss of quality in a format of no more than 256 colors. The hardware-independent GIF format was developed in 1987 (GIF87a) by CompuServe for transmitting bitmap images over networks. So the following code snippet follows these steps and shows how to convert BMP to PDF using Python: @@ -244,7 +244,7 @@ Aspose presents you online free application ["GIF to PDF"](https://products.aspo **Aspose.PDF for Python via .NET** support feature to convert PNG images to PDF format. Check the next code snippet for realizing you task. -PNG refers to a type of raster image file format that use loseless compression, that makes it popular among its users. +PNG refers to a type of raster image file format that use loseless compression, that makes it popular among its users. You can convert PNG to PDF image using the below steps: @@ -291,7 +291,7 @@ Aspose presents you online free application ["PNG to PDF"](https://products.aspo Scalable Vector Graphics (SVG) is a family of specifications of an XML-based file format for two-dimensional vector graphics, both static and dynamic (interactive or animated). The SVG specification is an open standard that has been under development by the World Wide Web Consortium (W3C) since 1999. -SVG images and their behaviors are defined in XML text files. This means that they can be searched, indexed, scripted, and if required, compressed. As XML files, SVG images can be created and edited with any text editor, but it is often more convenient to create them with drawing programs such as Inkscape. +SVG images and their behaviors are defined in XML text files. This means that they can be searched, indexed, scripted, and if required, compressed. As XML files, SVG images can be created and edited with any text editor, but it is often more convenient to create them with drawing programs such as Inkscape. {{% alert color="success" %}} **Try to convert SVG format to PDF online** diff --git a/en/python-net/converting/convert-other-files-to-pdf/_index.md b/en/python-net/converting/convert-other-files-to-pdf/_index.md index 8e2091a25..cd91ac39d 100644 --- a/en/python-net/converting/convert-other-files-to-pdf/_index.md +++ b/en/python-net/converting/convert-other-files-to-pdf/_index.md @@ -82,7 +82,7 @@ Steps Convert TEX to PDF in Python: EPUB (short for electronic publication) is a free and open e-book standard from the International Digital Publishing Forum (IDPF). Files have the extension .epub. EPUB is designed for reflowable content, meaning that an EPUB reader can optimize text for a particular display device. -EPUB also supports fixed-layout content. The format is intended as a single format that publishers and conversion houses can use in-house, as well as for distribution and sale. It supersedes the Open eBook standard.The version EPUB 3 is also endorsed by the Book Industry Study Group (BISG), a leading book trade association for standardized best practices, research, information and events, for packaging of content. +EPUB also supports fixed-layout content. The format is intended as a single format that publishers and conversion houses can use in-house, as well as for distribution and sale. It supersedes the Open eBook standard.The version EPUB 3 is also endorsed by the Book Industry Study Group (BISG), a leading book trade association for standardized best practices, research, information and events, for packaging of content. {{% alert color="success" %}} **Try to convert EPUB to PDF online** @@ -147,7 +147,7 @@ The following code snippet shows how to use this functionality with Aspose.PDF l ## Convert PCL to PDF -PCL (Printer Command Language) is a Hewlett-Packard printer language developed to access standard printer features. PCL levels 1 through 5e/5c are command based languages using control sequences that are processed and interpreted in the order they are received. At a consumer level, PCL data streams are generated by a print driver. PCL output can also be easily generated by custom applications. +PCL (Printer Command Language) is a Hewlett-Packard printer language developed to access standard printer features. PCL levels 1 through 5e/5c are command based languages using control sequences that are processed and interpreted in the order they are received. At a consumer level, PCL data streams are generated by a print driver. PCL output can also be easily generated by custom applications. {{% alert color="success" %}} **Try to convert PCL to PDF online** diff --git a/en/python-net/converting/convert-pdf-to-images-format/_index.md b/en/python-net/converting/convert-pdf-to-images-format/_index.md index 7a7c4794b..fbcddfe5b 100644 --- a/en/python-net/converting/convert-pdf-to-images-format/_index.md +++ b/en/python-net/converting/convert-pdf-to-images-format/_index.md @@ -101,11 +101,7 @@ Steps: PDF to Image (BMP, EMF, JPG, PNG, GIF) in Python * [JpegDevice](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/jpegdevice/) (to convert PDF to JPG) * [PngDevice](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/pngdevice/) (to convert PDF to PNG) * [GifDevice](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/gifdevice/) (to convert PDF to GIF) -<<<<<<< HEAD -1. Call the [device.process()](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/imagedevice/#methods) method to perform PDF to Image conversion. -======= 3. Call the [ImageDevice.process()](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/imagedevice/#methods) method to perform PDF to Image conversion. ->>>>>>> develop ### Convert PDF to BMP @@ -272,7 +268,7 @@ Aspose.PDF for Python presents you online free application ["PDF to PNG"](https: ## Convert PDF using SaveOptions class -This part of article shows you how to convert PDF to SVG using Python and SaveOptions class. +This part of article shows you how to convert PDF to SVG using Python and SaveOptions class. {{% alert color="success" %}} **Try to convert PDF to SVG online** diff --git a/en/python-net/converting/convert-pdf-to-other-files/_index.md b/en/python-net/converting/convert-pdf-to-other-files/_index.md index 2d251531c..6238bf668 100644 --- a/en/python-net/converting/convert-pdf-to-other-files/_index.md +++ b/en/python-net/converting/convert-pdf-to-other-files/_index.md @@ -24,7 +24,7 @@ Aspose.PDF for Python presents you online free application ["PDF to EPUB"](https [![Aspose.PDF Convertion PDF to EPUB with Free App](pdf_to_epub.png)](https://products.aspose.app/pdf/conversion/pdf-to-epub) {{% /alert %}} -**EPUB** is a free and open e-book standard from the International Digital Publishing Forum (IDPF). Files have the extension .epub. +EPUB is a free and open e-book standard from the International Digital Publishing Forum (IDPF). Files have the extension .epub. EPUB is designed for reflowable content, meaning that an EPUB reader can optimize text for a particular display device. EPUB also supports fixed-layout content. The format is intended as a single format that publishers and conversion houses can use in-house, as well as for distribution and sale. It supersedes the Open eBook standard. Aspose.PDF for Python also supports the feature to convert PDF documents to EPUB format. Aspose.PDF for Python has a class named 'EpubSaveOptions' which can be used as the second argument to [document.save()](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/#methods) method, to generate an EPUB file. From 4a15b307b36ba573b9969ccf0d97be17d778364e Mon Sep 17 00:00:00 2001 From: Anastasiia Holub Date: Tue, 30 Sep 2025 11:35:56 +0300 Subject: [PATCH 51/65] Update documentation for converting various file formats to PDF in Python --- en/python-net/converting/convert-html-to-pdf/_index.md | 4 ---- .../converting/convert-other-files-to-pdf/_index.md | 6 +++--- .../converting/convert-pdf-to-images-format/_index.md | 2 +- .../converting/convert-pdf-to-other-files/_index.md | 2 +- 4 files changed, 5 insertions(+), 9 deletions(-) diff --git a/en/python-net/converting/convert-html-to-pdf/_index.md b/en/python-net/converting/convert-html-to-pdf/_index.md index 9ae890d21..9a9377331 100644 --- a/en/python-net/converting/convert-html-to-pdf/_index.md +++ b/en/python-net/converting/convert-html-to-pdf/_index.md @@ -128,11 +128,7 @@ This example shows how to convert an HTML file to PDF while embedding fonts. If print(infile + " converted into " + outfile) ``` -<<<<<<< HEAD ## Render Content on single Page during HTML to PDF conversion -======= -## Render content on single page during HTML to PDF conversion ->>>>>>> develop This example demonstrates how to convert an HTML file into a single-page PDF using Aspose.PDF for Python. You can display all content on one page using the 'is_render_to_single_page property'. diff --git a/en/python-net/converting/convert-other-files-to-pdf/_index.md b/en/python-net/converting/convert-other-files-to-pdf/_index.md index cd91ac39d..7776309ff 100644 --- a/en/python-net/converting/convert-other-files-to-pdf/_index.md +++ b/en/python-net/converting/convert-other-files-to-pdf/_index.md @@ -41,11 +41,11 @@ Steps Convert OFD to PDF in Python: print(infile + " converted into " + outfile) ``` -## Convert LaTeX/TeX to PDF! +## Convert LaTeX/TeX to PDF -The LaTeX file format is a text file format with markup in the LaTeX derivative of the TeX family of languages and LaTeX is a derived format of the TeX system. LaTeX (ˈleɪtɛk/lay-tek or lah-tek) is a document preparation system and document markup language. It is widely used for the communication and publication of scientific documents in many fields, including mathematics, physics, and computer science. It also has a prominent role in the preparation and publication of books and articles that contain complex multilingual materials, such as корейкие , японские ,китайские символы +The LaTeX file format is a text file format with markup in the LaTeX derivative of the TeX family of languages and LaTeX is a derived format of the TeX system. LaTeX (ˈleɪtɛk/lay-tek or lah-tek) is a document preparation system and document markup language. It is widely used for the communication and publication of scientific documents in many fields, including mathematics, physics, and computer science. It also plays a key role in the preparation and publication of books and articles containing complex multilingual material, such as Korean, Japanese, Chinese characters, and Arabic, including special editions. - and Arabic, including specific editions. LaTeX uses the TeX typesetting program for formatting its output, and is itself written in the TeX macro language. +LaTeX uses the TeX typesetting program for formatting its output, and is itself written in the TeX macro language. {{% alert color="success" %}} **Try to convert LaTeX/TeX to PDF online** diff --git a/en/python-net/converting/convert-pdf-to-images-format/_index.md b/en/python-net/converting/convert-pdf-to-images-format/_index.md index fbcddfe5b..ea8f26a8e 100644 --- a/en/python-net/converting/convert-pdf-to-images-format/_index.md +++ b/en/python-net/converting/convert-pdf-to-images-format/_index.md @@ -101,7 +101,7 @@ Steps: PDF to Image (BMP, EMF, JPG, PNG, GIF) in Python * [JpegDevice](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/jpegdevice/) (to convert PDF to JPG) * [PngDevice](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/pngdevice/) (to convert PDF to PNG) * [GifDevice](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/gifdevice/) (to convert PDF to GIF) -3. Call the [ImageDevice.process()](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/imagedevice/#methods) method to perform PDF to Image conversion. +1. Call the [ImageDevice.process()](https://reference.aspose.com/pdf/python-net/aspose.pdf.devices/imagedevice/#methods) method to perform PDF to Image conversion. ### Convert PDF to BMP diff --git a/en/python-net/converting/convert-pdf-to-other-files/_index.md b/en/python-net/converting/convert-pdf-to-other-files/_index.md index 6238bf668..cc1d6e981 100644 --- a/en/python-net/converting/convert-pdf-to-other-files/_index.md +++ b/en/python-net/converting/convert-pdf-to-other-files/_index.md @@ -143,7 +143,7 @@ The following code snippet shows the process of converting PDF file into XPS for ## Convert PDF to MD -Aspose.PDF has the class 'MarkdownSaveOptions(), which converts a PDF document into Markdown (MD) format while preserving images and resources. +Aspose.PDF has the class 'MarkdownSaveOptions()', which converts a PDF document into Markdown (MD) format while preserving images and resources. 1. Load the source PDF using 'ap.Document'. 1. Create an instance of 'MarkdownSaveOptions'. From de9fac38238ce5cfba7fd3a5fca7d9bda335e7be Mon Sep 17 00:00:00 2001 From: AnHolub <68945813+AnHolub@users.noreply.github.com> Date: Wed, 1 Oct 2025 09:59:52 +0300 Subject: [PATCH 52/65] Update en/python-net/converting/convert-html-to-pdf/_index.md Co-authored-by: Andriy Andruhovski --- en/python-net/converting/convert-html-to-pdf/_index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/en/python-net/converting/convert-html-to-pdf/_index.md b/en/python-net/converting/convert-html-to-pdf/_index.md index 9a9377331..43421dfd9 100644 --- a/en/python-net/converting/convert-html-to-pdf/_index.md +++ b/en/python-net/converting/convert-html-to-pdf/_index.md @@ -78,7 +78,7 @@ This example shows how to convert an HTML file to PDF using Aspose.PDF for Pytho ## Convert HTML to PDF priority CSS Page Rule -Some documents may contain layout settings that utilize the Page rule, which can create ambiguity when generating the layout. You can control the priority using the 'is_priority_css_page_rule' property. If this property is set to 'True', the CSS rule is applied first. +Some documents may contain layout settings that utilize [the Page rule](https://developer.mozilla.org/en-US/docs/Web/CSS/@page), which can create ambiguity when generating the layout. You can control the priority using the 'is_priority_css_page_rule' property. If this property is set to 'True', the CSS rule is applied first. 1. Create an instance of the [HtmlLoadOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/htmlloadoptions/) class. 1. Set 'is_priority_css_page_rule = False' to disable prioritizing @page CSS rules, allowing other styles to take precedence. From a63f7d341a139d99649c60125c9751782eed80ac Mon Sep 17 00:00:00 2001 From: AnHolub <68945813+AnHolub@users.noreply.github.com> Date: Wed, 1 Oct 2025 10:00:03 +0300 Subject: [PATCH 53/65] Update en/python-net/converting/convert-html-to-pdf/_index.md Co-authored-by: Andriy Andruhovski --- en/python-net/converting/convert-html-to-pdf/_index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/en/python-net/converting/convert-html-to-pdf/_index.md b/en/python-net/converting/convert-html-to-pdf/_index.md index 43421dfd9..4eb99d46e 100644 --- a/en/python-net/converting/convert-html-to-pdf/_index.md +++ b/en/python-net/converting/convert-html-to-pdf/_index.md @@ -51,7 +51,7 @@ Aspose presents you online free application ["HTML to PDF"](https://products.asp [![Aspose.PDF Convertion HTML to PDF using Free App](html.png)](https://products.aspose.app/html/en/conversion/html-to-pdf) {{% /alert %}} -## Convert HTML to PDF media type +## Convert HTML to PDF using media type This example shows how to convert an HTML file to PDF using Aspose.PDF for Python via .NET with specific rendering options. From 4b258fa9fe82a26d6cc0c01ae8ce3a67a3cbfbf2 Mon Sep 17 00:00:00 2001 From: AnHolub <68945813+AnHolub@users.noreply.github.com> Date: Wed, 1 Oct 2025 10:07:00 +0300 Subject: [PATCH 54/65] Update en/python-net/converting/convert-images-format-to-pdf/_index.md Co-authored-by: Andriy Andruhovski --- en/python-net/converting/convert-images-format-to-pdf/_index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/en/python-net/converting/convert-images-format-to-pdf/_index.md b/en/python-net/converting/convert-images-format-to-pdf/_index.md index 47063c79a..a185cd41b 100644 --- a/en/python-net/converting/convert-images-format-to-pdf/_index.md +++ b/en/python-net/converting/convert-images-format-to-pdf/_index.md @@ -22,7 +22,7 @@ Abstract: This article provides a comprehensive guide on converting various imag Convert BMP files to PDF document using **Aspose.PDF for Python via .NET** library. -BMPimages are Files having extension. BMP represent Bitmap Image files that are used to store bitmap digital images. These images are independent of graphics adapter and are also called device independent bitmap (DIB) file format. +BMP images are Files having extension. BMP represent Bitmap Image files that are used to store bitmap digital images. These images are independent of graphics adapter and are also called device independent bitmap (DIB) file format. You can convert BMP to PDF files with Aspose.PDF for Python via .NET API. Therefore, you can follow the following steps to convert BMP images: From d9dc9097e127ce84c392b55d1dbf4e16e448b026 Mon Sep 17 00:00:00 2001 From: Anastasiia Holub Date: Wed, 1 Oct 2025 10:07:52 +0300 Subject: [PATCH 55/65] Add HTML and PPTX abbreviations in conversion documentation --- en/python-net/converting/convert-pdf-to-html/_index.md | 2 +- en/python-net/converting/convert-pdf-to-powerpoint/_index.md | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/en/python-net/converting/convert-pdf-to-html/_index.md b/en/python-net/converting/convert-pdf-to-html/_index.md index 6d4a8b261..634e8dab0 100644 --- a/en/python-net/converting/convert-pdf-to-html/_index.md +++ b/en/python-net/converting/convert-pdf-to-html/_index.md @@ -16,7 +16,7 @@ Abstract: This article provides a comprehensive guide on converting PDF files to ## Convert PDF to HTML -**Aspose.PDF for Python via .NET** provides many features for converting various file formats into PDF documents and converting PDF files into various output formats. This article discusses how to convert a PDF file into HTML. You can use just a couple of lines of code Python for converting PDF To HTML. You may need to convert PDF to HTML if you want to create a website or add content to an online forum. One way to convert PDF to HTML is to programmatically use Python. +**Aspose.PDF for Python via .NET** provides many features for converting various file formats into PDF documents and converting PDF files into various output formats. This article discusses how to convert a PDF file into HTML. You can use just a couple of lines of code Python for converting PDF To HTML. You may need to convert PDF to HTML if you want to create a website or add content to an online forum. One way to convert PDF to HTML is to programmatically use Python. {{% alert color="success" %}} **Try to convert PDF to HTML online** diff --git a/en/python-net/converting/convert-pdf-to-powerpoint/_index.md b/en/python-net/converting/convert-pdf-to-powerpoint/_index.md index a8f0cc306..8c2522a87 100644 --- a/en/python-net/converting/convert-pdf-to-powerpoint/_index.md +++ b/en/python-net/converting/convert-pdf-to-powerpoint/_index.md @@ -18,7 +18,7 @@ Abstract: This article provides a comprehensive guide on converting PDF files in **Aspose.PDF for Python via .NET** lets you track the progress of PDF to PPTX conversion. -We have an API named Aspose.Slides which offers the feature to create as well as manipulate PPT/PPTX presentations. This API also provides the feature to convert PPT/PPTX files to PDF format. During this conversion, the individual pages of the PDF file are converted to separate slides in the PPTX file. +We have an API named Aspose.Slides which offers the feature to create as well as manipulate PPT/PPTX presentations. This API also provides the feature to convert PPTX files to PDF format. During this conversion, the individual pages of the PDF file are converted to separate slides in the PPTX file. During PDF to PPTX conversion, the text is rendered as Text where you can select/update it. Please note that in order to convert PDF files to PPTX format, Aspose.PDF provides a class named [PptxSaveOptions](https://reference.aspose.com/pdf/python-net/aspose.pdf/pptxsaveoptions/). An object of the PptxSaveOptions class is passed as a second argument to the [save()](https://reference.aspose.com/pdf/python-net/aspose.pdf/document/#methods). The following code snippet shows the process for converting PDF files into PPTX format. From e3f3e01540079496c42329e5df3dacb2cc06ed80 Mon Sep 17 00:00:00 2001 From: AnHolub <68945813+AnHolub@users.noreply.github.com> Date: Wed, 1 Oct 2025 10:08:26 +0300 Subject: [PATCH 56/65] Update en/python-net/advanced-operations/working-with-images/delete-images-from-pdf-file/_index.md Co-authored-by: Andriy Andruhovski --- .../working-with-images/delete-images-from-pdf-file/_index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/en/python-net/advanced-operations/working-with-images/delete-images-from-pdf-file/_index.md b/en/python-net/advanced-operations/working-with-images/delete-images-from-pdf-file/_index.md index af01f8fd9..6dd9c4aa2 100644 --- a/en/python-net/advanced-operations/working-with-images/delete-images-from-pdf-file/_index.md +++ b/en/python-net/advanced-operations/working-with-images/delete-images-from-pdf-file/_index.md @@ -5,7 +5,7 @@ type: docs weight: 20 url: /python-net/delete-images-from-pdf-file/ description: This section explain how to delete Images from PDF File using Aspose.PDF for Python via .NET. -lastmod: "2025-09 -27" +lastmod: "2025-09-27" TechArticle: true AlternativeHeadline: How to remove images from PDF using Python Abstract: The article discusses the various reasons for removing images from PDF files, such as protecting privacy, preventing unauthorized access to sensitive information, reducing file size for easier sharing and storage, and preparing the document for compression or text extraction. It introduces **Aspose.PDF for Python via .NET** as a tool to accomplish this task. The article provides step-by-step instructions and code snippets for deleting specific images or all images from a PDF file using Aspose.PDF. The process involves opening an existing PDF document, deleting images either individually or in bulk, and saving the updated file. The provided Python code demonstrates how to remove images by accessing the document's resources and modifying the desired pages. From adae019080f949819c2b8a39ba0b2d746de60b17 Mon Sep 17 00:00:00 2001 From: AnHolub <68945813+AnHolub@users.noreply.github.com> Date: Wed, 1 Oct 2025 10:08:38 +0300 Subject: [PATCH 57/65] Update en/python-net/advanced-operations/working-with-images/extract-images-from-pdf-file/_index.md Co-authored-by: Andriy Andruhovski --- .../working-with-images/extract-images-from-pdf-file/_index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/en/python-net/advanced-operations/working-with-images/extract-images-from-pdf-file/_index.md b/en/python-net/advanced-operations/working-with-images/extract-images-from-pdf-file/_index.md index 5a6453333..e2cad746f 100644 --- a/en/python-net/advanced-operations/working-with-images/extract-images-from-pdf-file/_index.md +++ b/en/python-net/advanced-operations/working-with-images/extract-images-from-pdf-file/_index.md @@ -14,7 +14,7 @@ Abstract: This article discusses the process of extracting images from PDF files Do you need to separate images from your PDF files? For simplified management, archiving, analysis, or sharing images of your documents, use **Aspose.PDF for Python** and extract images from PDF files. 1. Load the PDF document with 'ap.Document()'. -1. Access the first page of the document (document.pages[1]). +1. Access the desired page of the document (document.pages[1]). 1. Select the first image from the page resources (resources.images[1]). 1. Create an output stream (FileIO) for the target file. 1. Save the extracted image using 'xImage.save(output_image)'. From 07643e5bcd7b413fa6351c3c693af6f6b3c49123 Mon Sep 17 00:00:00 2001 From: AnHolub <68945813+AnHolub@users.noreply.github.com> Date: Wed, 1 Oct 2025 10:08:50 +0300 Subject: [PATCH 58/65] Update en/python-net/advanced-operations/working-with-images/extract-images-from-pdf-file/_index.md Co-authored-by: Andriy Andruhovski --- .../working-with-images/extract-images-from-pdf-file/_index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/en/python-net/advanced-operations/working-with-images/extract-images-from-pdf-file/_index.md b/en/python-net/advanced-operations/working-with-images/extract-images-from-pdf-file/_index.md index e2cad746f..b6d4b87c7 100644 --- a/en/python-net/advanced-operations/working-with-images/extract-images-from-pdf-file/_index.md +++ b/en/python-net/advanced-operations/working-with-images/extract-images-from-pdf-file/_index.md @@ -15,7 +15,7 @@ Do you need to separate images from your PDF files? For simplified management, a 1. Load the PDF document with 'ap.Document()'. 1. Access the desired page of the document (document.pages[1]). -1. Select the first image from the page resources (resources.images[1]). +1. Select the image from the page resources (for example, resources.images[1]). 1. Create an output stream (FileIO) for the target file. 1. Save the extracted image using 'xImage.save(output_image)'. From 7c5594c4aa5c7500600a604e7bef1547a206d4f3 Mon Sep 17 00:00:00 2001 From: AnHolub <68945813+AnHolub@users.noreply.github.com> Date: Wed, 1 Oct 2025 10:08:56 +0300 Subject: [PATCH 59/65] Update en/python-net/advanced-operations/working-with-images/extract-images-from-pdf-file/_index.md Co-authored-by: Andriy Andruhovski --- .../working-with-images/extract-images-from-pdf-file/_index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/en/python-net/advanced-operations/working-with-images/extract-images-from-pdf-file/_index.md b/en/python-net/advanced-operations/working-with-images/extract-images-from-pdf-file/_index.md index b6d4b87c7..ca70041f8 100644 --- a/en/python-net/advanced-operations/working-with-images/extract-images-from-pdf-file/_index.md +++ b/en/python-net/advanced-operations/working-with-images/extract-images-from-pdf-file/_index.md @@ -77,7 +77,7 @@ This method extracts images located within a specified rectangular region on a P ## Extract Image Information from PDF -This example demonstrates how to analyze images embedded in a PDF page and calculate their effective resolution. +The example below demonstrates how to analyze images embedded in a PDF page and calculate their effective resolution. 1. Open the PDF with 'ap.Document'. 1. Track graphics state while reading page content. From fcaa343d2e23351c34ea35cc5005d379ec6e6bb4 Mon Sep 17 00:00:00 2001 From: AnHolub <68945813+AnHolub@users.noreply.github.com> Date: Wed, 1 Oct 2025 10:09:05 +0300 Subject: [PATCH 60/65] Update en/python-net/advanced-operations/working-with-images/extract-images-from-pdf-file/_index.md Co-authored-by: Andriy Andruhovski --- .../working-with-images/extract-images-from-pdf-file/_index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/en/python-net/advanced-operations/working-with-images/extract-images-from-pdf-file/_index.md b/en/python-net/advanced-operations/working-with-images/extract-images-from-pdf-file/_index.md index ca70041f8..f5e5d53eb 100644 --- a/en/python-net/advanced-operations/working-with-images/extract-images-from-pdf-file/_index.md +++ b/en/python-net/advanced-operations/working-with-images/extract-images-from-pdf-file/_index.md @@ -36,7 +36,7 @@ Do you need to separate images from your PDF files? For simplified management, a ## Extract Images from Specific Region in PDF -This method extracts images located within a specified rectangular region on a PDF page and saves them as separate files. +This example extracts images located within a specified rectangular region on a PDF page and saves them as separate files. 1. Load the PDF document using 'ap.Document'. 1. Create an 'ImagePlacementAbsorber' to collect all images on the first page. From 4396619076cb700e32e1d4130f344c639866ba43 Mon Sep 17 00:00:00 2001 From: Anastasiia Holub Date: Wed, 1 Oct 2025 10:17:04 +0300 Subject: [PATCH 61/65] Refine descriptions for image handling examples in PDF documentation --- .../add-image-to-existing-pdf-file/_index.md | 2 +- .../working-with-images/extract-images-from-pdf-file/_index.md | 2 +- .../replace-image-in-existing-pdf-file/_index.md | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/en/python-net/advanced-operations/working-with-images/add-image-to-existing-pdf-file/_index.md b/en/python-net/advanced-operations/working-with-images/add-image-to-existing-pdf-file/_index.md index e420539ed..b25d107ef 100644 --- a/en/python-net/advanced-operations/working-with-images/add-image-to-existing-pdf-file/_index.md +++ b/en/python-net/advanced-operations/working-with-images/add-image-to-existing-pdf-file/_index.md @@ -122,7 +122,7 @@ Steps: ## Add Image with Alternative Text -This example demonstrates how to add an image to a PDF page and assign alternative text (alt text) for accessibility compliance (such as PDF/UA). +This example shows how to add an image to a PDF page and assign alternative text (alt text) for accessibility compliance (such as PDF/UA). 1. Create a new 'Document' and add a page (842 × 595, landscape A4). 1. Place the image on the page using 'page.add_image()' with a rectangle that spans the full page. diff --git a/en/python-net/advanced-operations/working-with-images/extract-images-from-pdf-file/_index.md b/en/python-net/advanced-operations/working-with-images/extract-images-from-pdf-file/_index.md index 5a6453333..e54594e8c 100644 --- a/en/python-net/advanced-operations/working-with-images/extract-images-from-pdf-file/_index.md +++ b/en/python-net/advanced-operations/working-with-images/extract-images-from-pdf-file/_index.md @@ -77,7 +77,7 @@ This method extracts images located within a specified rectangular region on a P ## Extract Image Information from PDF -This example demonstrates how to analyze images embedded in a PDF page and calculate their effective resolution. +This example shows how to analyze images embedded in a PDF page and calculate their effective resolution. 1. Open the PDF with 'ap.Document'. 1. Track graphics state while reading page content. diff --git a/en/python-net/advanced-operations/working-with-images/replace-image-in-existing-pdf-file/_index.md b/en/python-net/advanced-operations/working-with-images/replace-image-in-existing-pdf-file/_index.md index 7671d179d..3b203307e 100644 --- a/en/python-net/advanced-operations/working-with-images/replace-image-in-existing-pdf-file/_index.md +++ b/en/python-net/advanced-operations/working-with-images/replace-image-in-existing-pdf-file/_index.md @@ -13,7 +13,7 @@ Abstract: The Aspose.PDF for Python via .NET documentation provides a comprehens ## Replace an Image in PDF -This example demonstrates how to replace an existing image on a PDF page with a new image using Aspose.PDF for Python via .NET. +How to replace an existing image on a PDF page with a new image? Implement this using Aspose.PDF for Python via .NET. 1. Import necessary modules (aspose.pdf, os.path, FileIO). 1. Define paths for: From 5b7513ba105564b9ca248049797b62b840afcbf0 Mon Sep 17 00:00:00 2001 From: Andriy Andruhovski Date: Wed, 1 Oct 2025 17:02:26 +0300 Subject: [PATCH 62/65] Update en/python-net/converting/convert-pdf-to-pdfx/_index.md Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- en/python-net/converting/convert-pdf-to-pdfx/_index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/en/python-net/converting/convert-pdf-to-pdfx/_index.md b/en/python-net/converting/convert-pdf-to-pdfx/_index.md index 532374225..b4c4d4bd4 100644 --- a/en/python-net/converting/convert-pdf-to-pdfx/_index.md +++ b/en/python-net/converting/convert-pdf-to-pdfx/_index.md @@ -139,7 +139,7 @@ Next code snippet demonstrates how to embed external files into a PDF and then c fileSpecification = ap.FileSpecification(self.data_dir + "aspose-logo.jpg", "Large Image file") document.embedded_files.add(fileSpecification) - document.convert(path_logfile, ap.PdfFormat.PdfFormat.PDF_A_3A, ap.ConvertErrorAction.DELETE) + document.convert(path_logfile, ap.PdfFormat.PDF_A_3A, ap.ConvertErrorAction.DELETE) document.save(path_outfile) print(infile + " converted into " + outfile) ``` From fe3ebf341f6b7a8f4de4e21237f366cbb0366251 Mon Sep 17 00:00:00 2001 From: Andriy Andruhovski Date: Wed, 1 Oct 2025 17:03:28 +0300 Subject: [PATCH 63/65] Apply suggestion from @Copilot Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- .../working-with-documents/working-with-layers/_index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/en/python-net/advanced-operations/working-with-documents/working-with-layers/_index.md b/en/python-net/advanced-operations/working-with-documents/working-with-layers/_index.md index e54be37da..a29ec7016 100644 --- a/en/python-net/advanced-operations/working-with-documents/working-with-layers/_index.md +++ b/en/python-net/advanced-operations/working-with-documents/working-with-layers/_index.md @@ -3,7 +3,7 @@ title: Work with PDF layers using Python linktitle: Work with PDF layers type: docs weight: 50 -url: /net/working-with-pdf-layers/ +url: /python-net/working-with-pdf-layers/ description: The next task explains how to lock a PDF layer, extract PDF layer elements, flatten a layered PDF, and merge all layers inside PDF into one. lastmod: "2025-09-17" sitemap: From 92263201ad16bed6b866798d533f1ea5ae71b6aa Mon Sep 17 00:00:00 2001 From: Andriy Andruhovski Date: Wed, 1 Oct 2025 21:59:55 +0300 Subject: [PATCH 64/65] Update en/python-net/converting/convert-pdf-to-pdfx/_index.md Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> --- en/python-net/converting/convert-pdf-to-pdfx/_index.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/en/python-net/converting/convert-pdf-to-pdfx/_index.md b/en/python-net/converting/convert-pdf-to-pdfx/_index.md index b4c4d4bd4..084249681 100644 --- a/en/python-net/converting/convert-pdf-to-pdfx/_index.md +++ b/en/python-net/converting/convert-pdf-to-pdfx/_index.md @@ -11,7 +11,7 @@ sitemap: priority: 0.8 TechArticle: true AlternativeHeadline: How to convert PDF to PDF/x formats -Abstract: The article provides a comprehensive guide on converting PDF to PDF/A, PDF/E and PDF/X formats using Aspose.PDF for Python. +Abstract: The article provides a comprehensive guide on converting PDF to PDF/A, PDF/E, and PDF/X formats using Aspose.PDF for Python. --- **PDF to PDF/x format means the ability to convert PDF to additional formats, namely PDF/A, PDF/E and PDF/X.** From bc138263a71c0ff29184dbcc61735ea91d8155a5 Mon Sep 17 00:00:00 2001 From: Andriy Andruhovski Date: Thu, 2 Oct 2025 15:45:14 +0300 Subject: [PATCH 65/65] Fix formatting and improve image processing logic in PDF extraction example --- .../_index.md | 26 +++++++------------ 1 file changed, 9 insertions(+), 17 deletions(-) diff --git a/en/python-net/advanced-operations/working-with-images/search-and-get-images-from-pdf-document/_index.md b/en/python-net/advanced-operations/working-with-images/search-and-get-images-from-pdf-document/_index.md index ac31fbfdd..4810048af 100644 --- a/en/python-net/advanced-operations/working-with-images/search-and-get-images-from-pdf-document/_index.md +++ b/en/python-net/advanced-operations/working-with-images/search-and-get-images-from-pdf-document/_index.md @@ -7,7 +7,7 @@ url: /python-net/search-and-get-images-from-pdf-document/ description: Learn how to search and get images from the PDF document in Python using Aspose.PDF. lastmod: "2025-09-17" TechArticle: true -AlternativeHeadline: Searching and Extracting Images from PDF +AlternativeHeadline: Searching and Extracting Images from PDF Abstract: The Aspose.PDF for Python via .NET library offers robust capabilities for searching and extracting images from PDF documents. Utilizing the 'ImagePlacementAbsorber' class, developers can efficiently locate and access images embedded across all pages of a PDF. --- @@ -112,7 +112,6 @@ This function analyzes all images on the first page of a PDF and calculates thei from aspose.pycore import cast, is_assignable from os import path - path_infile = path.join(self.data_dir, infile) document = ap.Document(path_infile) @@ -120,23 +119,20 @@ This function analyzes all images on the first page of a PDF and calculates thei default_resolution = 72 graphics_state = [] - # Collect image names from page resources image_names = list(document.pages[1].resources.images.names) - # Push identity matrix to stack - graphics_state.append(Matrix23(1, 0, 0, 1, 0, 0)) + graphics_state.append(drawing.drawing2d.Matrix(float(1), float(0), float(0), float(1), float(0), float(0))) - # Process operators on first page for op in document.pages[1].contents: if is_assignable(op, ap.operators.GSave): - graphics_state.append(graphics_state[-1]) + graphics_state.append(cast(drawing.drawing2d.Matrix, graphics_state[-1]).clone()) elif is_assignable(op, ap.operators.GRestore): graphics_state.pop() elif is_assignable(op, ap.operators.ConcatenateMatrix): opCM = cast(ap.operators.ConcatenateMatrix, op) - cm = Matrix23( + cm = drawing.drawing2d.Matrix( float(opCM.matrix.a), float(opCM.matrix.b), float(opCM.matrix.c), @@ -145,29 +141,25 @@ This function analyzes all images on the first page of a PDF and calculates thei float(opCM.matrix.f), ) - graphics_state[-1] = graphics_state[-1] * cm + graphics_state[-1].multiply(cm) continue elif is_assignable(op, ap.operators.Do): opDo = cast(ap.operators.Do, op) if opDo.name in image_names: - last_ctm = graphics_state[-1] + last_ctm = cast(drawing.drawing2d.Matrix, graphics_state[-1]) index = image_names.index(opDo.name) + 1 image = document.pages[1].resources.images[index] - # Calculate scaled dimensions - scaled_width = math.sqrt(last_ctm.a**2 + last_ctm.b**2) - scaled_height = math.sqrt(last_ctm.c**2 + last_ctm.d**2) + scaled_width = math.sqrt(last_ctm.elements[0] ** 2 + last_ctm.elements[1] ** 2) + scaled_height = math.sqrt(last_ctm.elements[2] ** 2 + last_ctm.elements[3] ** 2) - # Original dimensions original_width = image.width original_height = image.height - # Compute resolution res_horizontal = original_width * default_resolution / scaled_width res_vertical = original_height * default_resolution / scaled_height - # Output info print( f"{self.data_dir}image {opDo.name} " f"({scaled_width:.2f}:{scaled_height:.2f}): " @@ -208,4 +200,4 @@ This function retrieves alternative text (alt text) from all images on the first ) lines = image_placement.image.get_alternative_text(page) print("Alt Text: " + lines[0]) -``` \ No newline at end of file +``` \ No newline at end of file