diff --git a/src/foundation/src/PDFsharp/src/PdfSharp/Internal/Diagnostics.cs b/src/foundation/src/PDFsharp/src/PdfSharp/Internal/Diagnostics.cs
index 6b7b4bf4..35abe830 100644
--- a/src/foundation/src/PDFsharp/src/PdfSharp/Internal/Diagnostics.cs
+++ b/src/foundation/src/PDFsharp/src/PdfSharp/Internal/Diagnostics.cs
@@ -73,11 +73,11 @@ public static void HandleUnexpectedCharacter(char ch)
"If you think this is a bug in PDFsharp, please send us your PDF file.", (int)ch);
ThrowParserException(message);
}
- public static void HandleUnexpectedToken(string token)
+ public static void HandleUnexpectedToken(string token, int position)
{
string message = String.Format(CultureInfo.InvariantCulture,
- "Unexpected token '{0}' in PDF stream. The file may be corrupted. " +
- "If you think this is a bug in PDFsharp, please send us your PDF file.", token);
+ "Unexpected token '{0}' at position {1} in PDF stream. The file may be corrupted. " +
+ "If you think this is a bug in PDFsharp, please send us your PDF file.", token, position);
ThrowParserException(message);
}
}
diff --git a/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.Advanced/PdfCrossReferenceStream.cs b/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.Advanced/PdfCrossReferenceStream.cs
index 6c3de94c..67baf9e9 100644
--- a/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.Advanced/PdfCrossReferenceStream.cs
+++ b/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.Advanced/PdfCrossReferenceStream.cs
@@ -12,7 +12,7 @@ namespace PdfSharp.Pdf.Advanced
sealed class PdfCrossReferenceStream : PdfTrailer // Reference: 3.4.7 Cross-Reference Streams / Page 106
{
///
- /// Initializes a new instance of the class.
+ /// Initializes a new instance of the class.
///
public PdfCrossReferenceStream(PdfDocument document)
: base(document)
diff --git a/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.Advanced/PdfObjectStream.cs b/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.Advanced/PdfObjectStream.cs
index bbe0ced5..4ecef76c 100644
--- a/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.Advanced/PdfObjectStream.cs
+++ b/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.Advanced/PdfObjectStream.cs
@@ -49,37 +49,6 @@ internal PdfObjectStream(PdfDictionary dict)
#endif
}
- ///
- /// Reads the compressed object with the specified index.
- ///
- internal void ReadReferences(PdfCrossReferenceTable xrefTable)
- {
- ////// Create parser for stream.
- ////Parser parser = new Parser(_document, new MemoryStream(Stream.Value));
- for (int idx = 0; idx < _header.Length; idx++)
- {
- int objectNumber = _header[idx][0];
- int offset = _header[idx][1];
-
- PdfObjectID objectID = new PdfObjectID(objectNumber);
-
- // HACK: -1 indicates compressed object.
- PdfReference iref = new PdfReference(objectID, -1);
- ////iref.ObjectID = objectID;
- ////iref.Value = xrefStream;
- if (!xrefTable.Contains(iref.ObjectID))
- {
- xrefTable.Add(iref);
- }
- else
- {
-#if DEBUG
- GetType();
-#endif
- }
- }
- }
-
///
/// Reads the compressed object with the specified index.
///
@@ -108,7 +77,7 @@ public class Keys : PdfStream.Keys
///
/// (Required) The type of PDF object that this dictionary describes;
- /// must be ObjStmfor an object stream.
+ /// must be ObjStm for an object stream.
///
[KeyInfo(KeyType.Name | KeyType.Required, FixedValue = "ObjStm")]
public const string Type = "/Type";
@@ -130,7 +99,7 @@ public class Keys : PdfStream.Keys
/// (Optional) A reference to an object stream, of which the current object
/// stream is considered an extension. Both streams are considered part of
/// a collection of object streams (see below). A given collection consists
- /// of a set of streams whose Extendslinks form a directed acyclic graph.
+ /// of a set of streams whose Extends links form a directed acyclic graph.
///
[KeyInfo(KeyType.Stream | KeyType.Optional)]
public const string Extends = "/Extends";
diff --git a/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.Advanced/PdfReference.cs b/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.Advanced/PdfReference.cs
index 1c4ba091..b8c62596 100644
--- a/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.Advanced/PdfReference.cs
+++ b/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.Advanced/PdfReference.cs
@@ -17,7 +17,7 @@ namespace PdfSharp.Pdf.Advanced
/// Represents an indirect reference to a PdfObject.
///
[DebuggerDisplay("iref({ObjectNumber}, {GenerationNumber})")]
- public sealed class PdfReference : PdfItem
+ public class PdfReference : PdfItem
{
// About PdfReference
//
@@ -154,7 +154,7 @@ public int Position
///
/// Gets or sets the referenced PdfObject.
///
- public PdfObject Value
+ public virtual PdfObject Value
{
get => _value;
set
@@ -246,4 +246,93 @@ public int Compare(PdfReference? l, PdfReference? r)
int _uid;
#endif
}
+
+ ///
+ /// Represents an indirect reference to an object stored in an
+ /// The value of this object is "lazily" loaded when first accessed.
+ ///
+ public sealed class PdfReferenceToCompressedObject : PdfReference
+ {
+ private readonly int _objectStreamNumber;
+ private readonly int _indexInObjectStream;
+
+ internal PdfReferenceToCompressedObject(PdfDocument doc, PdfObjectID objectID,
+ int objectStreamNumber, int indexInObjectStream)
+ : base(objectID, -1)
+ {
+ Document = doc ?? throw new ArgumentNullException(nameof(doc));
+ _objectStreamNumber = objectStreamNumber;
+ _indexInObjectStream = indexInObjectStream;
+ }
+
+ public override PdfObject Value
+ {
+ get
+ {
+ if (base.Value is null)
+ {
+ ReadValue();
+ }
+ return base.Value!;
+ }
+ set => base.Value = value;
+ }
+
+ ///
+ /// Reads the value of this object
+ ///
+ void ReadValue()
+ {
+ PdfObjectStream? ostm = null;
+ var stmObjID = new PdfObjectID(_objectStreamNumber);
+ // reference to object stream
+ var streamRef = Document.IrefTable[stmObjID];
+ if (streamRef is not null)
+ {
+ if (streamRef.Value is null)
+ {
+ // object stream not yet loaded. do it now
+ var parser = Document.GetParser()!;
+ var state = parser.SaveState();
+ var obj = parser.ReadObject(null, stmObjID, false, false);
+ if (obj is PdfDictionary ostmDict)
+ {
+ // decrypt if necessary
+ // must be done before type-transformation because PdfObjectStream
+ // tries to parse the stream-header in the constructor
+ Document.EffectiveSecurityHandler?.DecryptObject(ostmDict);
+ ostm = new PdfObjectStream(ostmDict);
+ }
+ parser.RestoreState(state);
+ Debug.Assert(ostm != null, "Object stream should not be null here");
+ }
+ // already transformed ?
+ else if (streamRef.Value is not PdfObjectStream existingOstm)
+ {
+ if (streamRef.Value is PdfDictionary ostmDict)
+ {
+ // decrypt if necessary
+ Document.EffectiveSecurityHandler?.DecryptObject(ostmDict);
+ ostm = new PdfObjectStream(ostmDict);
+ }
+ Debug.Assert(ostm != null, "Object stream should not be null here");
+ }
+ else
+ ostm = existingOstm;
+
+ if (ostm is not null)
+ {
+ // store the loaded and decrypted object-stream
+ streamRef.Value = ostm;
+ // read the actual object we're looking for
+ var iref = ostm.ReadCompressedObject(_indexInObjectStream);
+ if (iref is not null)
+ {
+ Debug.Assert(iref.ObjectID == ObjectID, "ObjectID mismatch");
+ base.Value = iref.Value;
+ }
+ }
+ }
+ }
+ }
}
diff --git a/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.IO/Lexer.cs b/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.IO/Lexer.cs
index b6f7c590..9909ce62 100644
--- a/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.IO/Lexer.cs
+++ b/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.IO/Lexer.cs
@@ -937,8 +937,8 @@ public double TokenToReal
{
get
{
- // ReSharper disable once CompareOfFloatsByEqualityOperator
- Debug.Assert(_tokenAsReal == double.Parse(_token.ToString(), CultureInfo.InvariantCulture));
+ // had several documents where the assertion failed with an equality comparision (==)
+ Debug.Assert(Math.Abs(_tokenAsReal - double.Parse(_token.ToString(), CultureInfo.InvariantCulture)) < 0.000000001);
return _tokenAsReal;
}
}
diff --git a/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.IO/Parser.cs b/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.IO/Parser.cs
index 1dc257f9..3b387e34 100644
--- a/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.IO/Parser.cs
+++ b/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.IO/Parser.cs
@@ -42,10 +42,10 @@ public Parser(PdfDocument? document, Stream pdf)
_stack = new ShiftStack();
}
- public Parser(PdfDocument document)
+ public Parser(PdfDocument document, Lexer lexer)
{
- _document = document;
- _lexer = document?._lexer ?? throw new ArgumentNullException(nameof(document), "Lexer not defined.");
+ _document = document ?? throw new ArgumentNullException(nameof(document));
+ _lexer = lexer ?? throw new ArgumentNullException(nameof(lexer));
_stack = new ShiftStack();
}
@@ -105,7 +105,7 @@ public PdfObject ReadObject(PdfObject? pdfObject, PdfObjectID objectID, bool inc
{
// Attempt to read an object that was already registered. Keep the former object.
// This only happens with corrupt PDF files that have duplicate IDs.
- if (iref.Value != null!)
+ if (iref is not PdfReferenceToCompressedObject && iref.Value != null!)
{
LogHost.Logger.LogWarning("Another instance of object {iref} was found. Using previously encountered object instead.", iref);
// Attempt to read an object that was already read. Keep the former object.
@@ -258,12 +258,12 @@ public PdfObject ReadObject(PdfObject? pdfObject, PdfObjectID objectID, bool inc
case Symbol.Keyword:
// Should not come here anymore.
- ParserDiagnostics.HandleUnexpectedToken(_lexer.Token);
+ ParserDiagnostics.HandleUnexpectedToken(_lexer.Token, _lexer.Position - _lexer.Token.Length);
break;
default:
// Should not come here anymore.
- ParserDiagnostics.HandleUnexpectedToken(_lexer.Token);
+ ParserDiagnostics.HandleUnexpectedToken(_lexer.Token, _lexer.Position - _lexer.Token.Length);
break;
}
symbol = ScanNextToken();
@@ -344,19 +344,10 @@ int GetStreamLength(PdfDictionary dict)
{
#if true
object length;
- if (reference.Position == -1 && reference.Value != null!)
- {
- if (reference.Value is not PdfIntegerObject integer)
- throw new InvalidOperationException("Cannot retrieve stream length from stream object.");
+ if (reference.Value is not PdfIntegerObject integer)
+ throw new InvalidOperationException("Cannot retrieve stream length from stream object.");
- length = integer;
- }
- else
- {
- ParserState state = SaveState();
- length = ReadObject(null, reference.ObjectID, false, false);
- RestoreState(state);
- }
+ length = integer;
#else
ParserState state = SaveState();
object length = ReadObject(null, reference.ObjectID, false, false);
@@ -558,7 +549,7 @@ void ParseObject(Symbol stop)
//case Symbol.StartXRef:
//case Symbol.Eof:
default:
- ParserDiagnostics.HandleUnexpectedToken(_lexer.Token);
+ ParserDiagnostics.HandleUnexpectedToken(_lexer.Token, _lexer.Position - _lexer.Token.Length);
SkipCharsUntil(stop);
return;
}
@@ -673,7 +664,7 @@ Symbol ReadSymbol(Symbol symbol)
}
Symbol current = _lexer.ScanNextToken();
if (symbol != current)
- ParserDiagnostics.HandleUnexpectedToken(_lexer.Token);
+ ParserDiagnostics.HandleUnexpectedToken(_lexer.Token, _lexer.Position - _lexer.Token.Length);
return current;
}
@@ -684,7 +675,7 @@ Symbol ReadToken(string token)
{
Symbol current = _lexer.ScanNextToken();
if (token != _lexer.Token)
- ParserDiagnostics.HandleUnexpectedToken(_lexer.Token);
+ ParserDiagnostics.HandleUnexpectedToken(_lexer.Token, _lexer.Position - _lexer.Token.Length);
return current;
}
@@ -696,7 +687,7 @@ string ReadName()
string name;
Symbol symbol = ScanNextToken(out name);
if (symbol != Symbol.Name)
- ParserDiagnostics.HandleUnexpectedToken(name);
+ ParserDiagnostics.HandleUnexpectedToken(name, _lexer.Position - name.Length);
return name;
}
@@ -773,7 +764,7 @@ int ReadInteger(bool canBeIndirect)
_lexer.Position = position;
return n;
}
- ParserDiagnostics.HandleUnexpectedToken(_lexer.Token);
+ ParserDiagnostics.HandleUnexpectedToken(_lexer.Token, _lexer.Position - _lexer.Token.Length);
return 0;
}
@@ -823,115 +814,9 @@ int ReadInteger()
// return parser.ReadObject(pdfObject, objectID, false);
// }
- ///
- /// Reads an object from the PDF input stream using the default parser.
- ///
- public static PdfObject ReadObject(PdfDocument owner, PdfObjectID objectID)
- {
- if (owner == null)
- throw new ArgumentNullException(nameof(owner));
-
- Parser parser = new Parser(owner);
- return parser.ReadObject(null, objectID, false, false);
- }
-
- ///
- /// Reads the irefs from the compressed object with the specified index in the object stream
- /// of the object with the specified object id.
- ///
- internal void ReadIRefsFromCompressedObject(PdfObjectID objectID)
- {
- Debug.Assert(_document.IrefTable.ObjectTable.ContainsKey(objectID));
- if (!_document.IrefTable.ObjectTable.TryGetValue(objectID, out var iref))
- {
- // We should never come here because the object stream must be a type 1 entry in the xref stream
- // and iref was created before.
- throw new NotImplementedException("This case is not coded or something else went wrong");
- }
-
- Debug.Assert(iref.Value != null, "The object shall be read in by PdfReader.ReadIndirectObjectsFromIrefTable() before accessing it.");
-
- if (iref.Value is not PdfObjectStream objectStreamStream)
- {
- Debug.Assert(((PdfDictionary)iref.Value).Elements.GetName("/Type") == "/ObjStm");
-
- objectStreamStream = new PdfObjectStream((PdfDictionary)iref.Value);
- Debug.Assert(objectStreamStream.Reference == iref);
- // objectStream.Reference = iref; Superfluous, see Assert in line before.
- Debug.Assert(objectStreamStream.Reference.Value != null, "Something went wrong.");
- }
- Debug.Assert(objectStreamStream != null);
-
- //PdfObjectStream objectStreamStream = (PdfObjectStream)iref.Value;
- if (objectStreamStream == null)
- throw new Exception("Something went wrong here.");
- objectStreamStream.ReadReferences(_document.IrefTable);
- }
-
- ///
- /// Reads the compressed object with the specified index in the object stream
- /// of the object with the specified object ID.
- ///
- internal PdfReference ReadCompressedObject(PdfObjectID objectID, int index)
- {
-#if true
- Debug.Assert(_document.IrefTable.ObjectTable.ContainsKey(objectID));
- if (!_document.IrefTable.ObjectTable.TryGetValue(objectID, out var iref))
- {
- throw new NotImplementedException("This case is not coded or something else went wrong");
- }
-#else
- // We should never come here because the object stream must be a type 1 entry in the xref stream
- // and iref was created before.
-
- // Has the specified object already an iref in the object table?
- if (!_document._irefTable.ObjectTable.TryGetValue(objectID, out iref))
- {
- try
- {
-#if true_
- iref = new PdfReference(objectID,);
- iref.ObjectID = objectID;
- _document._irefTable.Add(os);
-#else
- PdfDictionary dict = (PdfDictionary)ReadObject(null, objectID, false, false);
- PdfObjectStream os = new PdfObjectStream(dict);
- iref = new PdfReference(os);
- iref.ObjectID = objectID;
- _document._irefTable.Add(os);
-#endif
- }
- catch (Exception ex)
- {
- Debug.WriteLine(ex.Message);
- throw;
- }
- }
-#endif
-
- Debug.Assert(iref.Value != null, "The object shall be read in by PdfReader.ReadIndirectObjectsFromIrefTable() before accessing it.");
-
- var objectStreamStream = iref.Value as PdfObjectStream;
- if (objectStreamStream == null)
- {
- Debug.Assert(((PdfDictionary)iref.Value).Elements.GetName("/Type") == "/ObjStm");
-
- objectStreamStream = new PdfObjectStream((PdfDictionary)iref.Value);
- Debug.Assert(objectStreamStream.Reference == iref);
- // objectStream.Reference = iref; Superfluous, see Assert in line before.
- Debug.Assert(objectStreamStream.Reference.Value != null, "Something went wrong.");
- }
- Debug.Assert(objectStreamStream != null);
-
- //PdfObjectStream objectStreamStream = (PdfObjectStream)iref.Value;
- if (objectStreamStream == null)
- throw new Exception("Something went wrong here.");
- return objectStreamStream.ReadCompressedObject(index);
- }
-
///
/// Reads the compressed object with the specified number at the given offset.
- /// The parser must be initialized with the stream an object stream object.
+ /// The parser must be initialized with the stream of an object stream object.
///
internal PdfReference ReadCompressedObject(int objectNumber, int offset)
{
@@ -1111,7 +996,7 @@ internal PdfTrailer ReadTrailer()
return trailer;
}
else
- ParserDiagnostics.HandleUnexpectedToken(_lexer.Token);
+ ParserDiagnostics.HandleUnexpectedToken(_lexer.Token, _lexer.Position - _lexer.Token.Length);
}
}
// ReSharper disable once RedundantIfElseBlock because of code readability.
@@ -1345,7 +1230,6 @@ PdfTrailer ReadXRefStream(PdfCrossReferenceTable xrefTable)
#endif
// Add iref for all uncompressed objects.
xrefTable.Add(new PdfReference(objectID, position));
-
}
#if DEBUG
else
@@ -1356,7 +1240,14 @@ PdfTrailer ReadXRefStream(PdfCrossReferenceTable xrefTable)
break;
case 2:
- // Nothing to do yet.
+ // object-stream number / index in object-stream
+ // collect irefs for objects stored in object streams
+ objectID = new PdfObjectID(subsections[ssc][0] + idx, 0);
+ if (!xrefTable.Contains(objectID))
+ {
+ xrefTable.Add(new PdfReferenceToCompressedObject(_document, objectID,
+ (int)item.Field2, (int)item.Field3));
+ }
break;
}
}
@@ -1735,7 +1626,7 @@ public static object Read(PdfObject o, string key)
}
*/
- ParserState SaveState()
+ internal ParserState SaveState()
{
var state = new ParserState
{
@@ -1745,13 +1636,13 @@ ParserState SaveState()
return state;
}
- void RestoreState(ParserState state)
+ internal void RestoreState(ParserState state)
{
_lexer.Position = state.Position;
_lexer.Symbol = state.Symbol;
}
- class ParserState
+ internal class ParserState
{
public int Position;
public Symbol Symbol;
diff --git a/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.IO/PdfReader.cs b/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.IO/PdfReader.cs
index 01162dee..f36a14b2 100644
--- a/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.IO/PdfReader.cs
+++ b/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.IO/PdfReader.cs
@@ -259,7 +259,7 @@ public static PdfDocument Open(Stream stream, string? password, PdfDocumentOpenM
// After reading all objects, all documents placeholder references get replaced by references knowing their objects in FinishReferences(),
// which finally sets IsUnderConstruction to false.
document.IrefTable.IsUnderConstruction = true;
- var parser = new Parser(document);
+ var parser = document.GetParser()!;
// Read all trailers or cross-reference streams, but no objects.
document.Trailer = parser.ReadTrailer();
if (document.Trailer == null!)
@@ -324,16 +324,10 @@ public static PdfDocument Open(Stream stream, string? password, PdfDocumentOpenM
}
}
- // Read all file level indirect objects and decrypt them.
- // Objects stored in object streams are not yet included and must not be decrypted as they are not encrypted.
+ // Read all indirect objects and decrypt them.
+ // This includes objects stored in object-streams
ReadIndirectObjectsFromIrefTable(document, parser, true);
- // Read all indirect objects stored in object streams.
- ReadCompressedObjects(document, parser);
-
- // Read all not yet known indirect objects (the ones that were stored in object streams) and don't decrypt them, as they are not encrypted.
- ReadIndirectObjectsFromIrefTable(document, parser, false);
-
// Reset encryption so that it must be redefined to save the document encrypted.
effectiveSecurityHandler?.SetEncryptionToNoneAndResetPasswords();
@@ -412,7 +406,8 @@ static void ReadIndirectObjectsFromIrefTable(PdfDocument document, Parser parser
}
// Decrypt object, if needed.
- if (decrypt && iref.Value is { } pdfObject2)
+ // skip objects stored in object streams (iref.Position = -1)
+ if (decrypt && iref.Position >= 0 && iref.Value is { } pdfObject2)
effectiveSecurityHandler?.DecryptObject(pdfObject2);
// Set maximum object number.
@@ -420,57 +415,6 @@ static void ReadIndirectObjectsFromIrefTable(PdfDocument document, Parser parser
}
}
- static void ReadCompressedObjects(PdfDocument document, Parser parser)
- {
- // The PDF Reference 1.7 states in chapter 7.5.6 (Incremental Updates):
- // "When a conforming reader reads the file,
- // it shall build its cross-reference information in such a way that the
- // most recent copy of each object shall be the one accessed from the file."
-
- // IrefTable.AllReferences is sorted by ObjectId which gives older objects preference
- // (as they typically have lower ObjectIds).
- // For xref-streams, we revert the order, so that the most current object is read first.
- // This is because Parser.ReadObject does not overwrite an object that was already collected.
-
- // Collect xref streams.
- var xrefStreams = new List();
- foreach (var iref in document.IrefTable.AllReferences)
- {
- if (iref.Value is PdfCrossReferenceStream xrefStream)
- xrefStreams.Add(xrefStream);
- }
- // Sort them so the last xref stream is read first.
- // TODO: Is this always sufficient? (haven't found any issues so far testing with ~1300 PDFs...)
- xrefStreams.Sort((a, b) => (b.Reference?.Position ?? 0) - (a.Reference?.Position ?? 0));
-
-
-
- Dictionary objectStreams = new();
- foreach (var xrefStream in xrefStreams)
- {
- foreach (var item in xrefStream.Entries)
- {
- // Is type xref to compressed object?
- if (item.Type == 2)
- {
- int objectNumber = (int)item.Field2;
-
- if (!objectStreams.ContainsKey(objectNumber))
- {
- objectStreams.Add(objectNumber, null);
- var objectID = new PdfObjectID((int)item.Field2);
- parser.ReadIRefsFromCompressedObject(objectID);
- }
-
- PdfReference irefNew = parser.ReadCompressedObject(new PdfObjectID((int)item.Field2),
- (int)item.Field3);
- Debug.Assert(document.IrefTable.Contains(xrefStream.ObjectID));
- Debug.Assert(document.IrefTable.Contains(irefNew.ObjectID));
- }
- }
- }
- }
-
static void FinishReferences(PdfDocument document)
{
Debug.Assert(document.IrefTable.IsUnderConstruction);
@@ -555,6 +499,24 @@ static bool FinishReference(PdfReference currentReference, PdfDocument document,
var isChanged = false;
PdfItem? reference = currentReference;
+ if (reference is PdfReferenceToCompressedObject cref)
+ {
+ // replace with regular reference
+ // TODO: necessary ? does it make a difference ?
+ if (cref.Value is not null)
+ {
+ var newIref = new PdfReference(currentReference.ObjectID, -1)
+ {
+ Document = document,
+ Value = cref.Value
+ };
+ document.IrefTable.Remove(currentReference);
+ document.IrefTable.Add(newIref);
+ reference = newIref;
+ isChanged = true;
+ }
+ }
+
// The value of the reference may be null.
// If a file level PdfObject refers object stream level PdfObjects, that were not yet decompressed when reading it,
// placeholder references are used.
diff --git a/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.Security/PdfStandardSecurityHandler.cs b/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.Security/PdfStandardSecurityHandler.cs
index 74d48c7d..69fe28b2 100644
--- a/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.Security/PdfStandardSecurityHandler.cs
+++ b/src/foundation/src/PDFsharp/src/PdfSharp/Pdf.Security/PdfStandardSecurityHandler.cs
@@ -267,7 +267,15 @@ public void DecryptObject(PdfObject value)
switch (value)
{
case PdfDictionary vDict:
- DecryptDictionary(vDict);
+ // this check is required for object-streams
+ // which may be "lazily" loaded and decrypted in PdfReferenceToCompressedObject.ReadValue
+ // alternatively we could populate a list of already decrypted objects,
+ // but this would probably required more memory
+ if (!vDict.AlreadyDecrypted)
+ {
+ DecryptDictionary(vDict);
+ vDict.AlreadyDecrypted = true;
+ }
break;
case PdfArray vArray:
DecryptArray(vArray);
diff --git a/src/foundation/src/PDFsharp/src/PdfSharp/Pdf/PdfDictionary.cs b/src/foundation/src/PDFsharp/src/PdfSharp/Pdf/PdfDictionary.cs
index 171d99dc..fd8d02a3 100644
--- a/src/foundation/src/PDFsharp/src/PdfSharp/Pdf/PdfDictionary.cs
+++ b/src/foundation/src/PDFsharp/src/PdfSharp/Pdf/PdfDictionary.cs
@@ -44,6 +44,12 @@ public class PdfDictionary : PdfObject, IEnumerable
+ /// Determines, whether this instance was already decrypted.
+ /// (in case the document is protected)
+ ///
+ internal bool AlreadyDecrypted { get; set; }
+
///
/// Initializes a new instance of the class.
///
diff --git a/src/foundation/src/PDFsharp/src/PdfSharp/Pdf/PdfDocument.cs b/src/foundation/src/PDFsharp/src/PdfSharp/Pdf/PdfDocument.cs
index 6022d85d..3974c58b 100644
--- a/src/foundation/src/PDFsharp/src/PdfSharp/Pdf/PdfDocument.cs
+++ b/src/foundation/src/PDFsharp/src/PdfSharp/Pdf/PdfDocument.cs
@@ -120,6 +120,23 @@ void Initialize()
Trailer.CreateNewDocumentIDs();
}
+ ///
+ /// Gets a for an imported document.
+ /// Returns null, if the document was not imported.
+ /// If this method is called multiple times for the same document,
+ /// the same parser-instance is returned each time.
+ ///
+ /// The parser-instance for imported documents or null when the document was not imported
+ internal Parser? GetParser()
+ {
+ if (_parser == null)
+ {
+ if (_lexer != null)
+ _parser = new Parser(this, _lexer);
+ }
+ return _parser;
+ }
+
//~PdfDocument()
//{
// Dispose(false);
@@ -813,6 +830,7 @@ public void Flatten()
// Imported Document.
internal Lexer? _lexer;
+ internal Parser? _parser;
internal DateTime _creation;