diff --git a/src/UglyToad.PdfPig.Tests/Integration/Documents/issue_671.pdf b/src/UglyToad.PdfPig.Tests/Integration/Documents/issue_671.pdf new file mode 100644 index 000000000..60e4a120d Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Integration/Documents/issue_671.pdf differ diff --git a/src/UglyToad.PdfPig.Tests/Integration/IntegrationDocumentTests.cs b/src/UglyToad.PdfPig.Tests/Integration/IntegrationDocumentTests.cs index 6af240a36..a66df5695 100644 --- a/src/UglyToad.PdfPig.Tests/Integration/IntegrationDocumentTests.cs +++ b/src/UglyToad.PdfPig.Tests/Integration/IntegrationDocumentTests.cs @@ -9,6 +9,10 @@ public class IntegrationDocumentTests { private static readonly Lazy DocumentFolder = new Lazy(() => Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents"))); + private static readonly HashSet _documentsToIgnore = new HashSet() + { + "issue_671.pdf" + }; [Theory] [MemberData(nameof(GetAllDocuments))] @@ -101,7 +105,7 @@ public static IEnumerable GetAllDocuments var files = Directory.GetFiles(DocumentFolder.Value, "*.pdf"); // Return the shortname so we can see it in the test explorer. - return files.Select(x => new object[] { Path.GetFileName(x) }); + return files.Where(x => !_documentsToIgnore.Any(i => x.EndsWith(i))).Select(x => new object[] { Path.GetFileName(x) }); } } } diff --git a/src/UglyToad.PdfPig.Tests/Integration/XObjectFormTests.cs b/src/UglyToad.PdfPig.Tests/Integration/XObjectFormTests.cs new file mode 100644 index 000000000..fb0a0406c --- /dev/null +++ b/src/UglyToad.PdfPig.Tests/Integration/XObjectFormTests.cs @@ -0,0 +1,37 @@ +namespace UglyToad.PdfPig.Tests.Integration +{ + using UglyToad.PdfPig.Core; + using Xunit; + + public class XObjectFormTests + { + [Fact] + public void CanReadDocumentWithoutStackOverflowIssue671() + { + using (var document = PdfDocument.Open(IntegrationHelpers.GetDocumentPath("issue_671"))) + { + var page = document.GetPage(1); + } + } + + [Fact] + public void CanReadDocumentThrowsIssue671() + { + using (var document = PdfDocument.Open(IntegrationHelpers.GetDocumentPath("issue_671"), ParsingOptions.LenientParsingOff)) + { + var exception = Assert.Throws(() => document.GetPage(1)); + Assert.Contains("is referencing itself which can cause unexpected behaviour", exception.Message); + } + } + + [Fact] + public void CanReadDocumentMOZILLA_3136_0() + { + // This document does not actually contain circular references + using (var document = PdfDocument.Open(IntegrationHelpers.GetDocumentPath("MOZILLA-3136-0"), ParsingOptions.LenientParsingOff)) + { + var page = document.GetPage(1); + } + } + } +} diff --git a/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs b/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs index dcc9be2d8..a33b02186 100644 --- a/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs +++ b/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs @@ -486,7 +486,7 @@ public void ApplyXObject(NameToken xObjectName) } else if (subType.Equals(NameToken.Form)) { - ProcessFormXObject(xObjectStream); + ProcessFormXObject(xObjectStream, xObjectName); } else { @@ -494,7 +494,7 @@ public void ApplyXObject(NameToken xObjectName) } } - private void ProcessFormXObject(StreamToken formStream) + private void ProcessFormXObject(StreamToken formStream, NameToken xObjectName) { /* * When a form XObject is invoked the following should happen: @@ -603,6 +603,20 @@ private void ProcessFormXObject(StreamToken formStream) // 3. We don't respect clipping currently. // 4. Paint the objects. + bool hasCircularReference = HasFormXObjectCircularReference(formStream, xObjectName, operations); + if (hasCircularReference) + { + if (parsingOptions.UseLenientParsing) + { + operations = operations.Where(o => o is not InvokeNamedXObject xo || xo.Name != xObjectName).ToArray(); + parsingOptions.Logger.Warn($"An XObject form named '{xObjectName}' is referencing itself which can cause unexpected behaviour. The self reference was removed from the operations before further processing."); + } + else + { + throw new PdfDocumentFormatException($"An XObject form named '{xObjectName}' is referencing itself which can cause unexpected behaviour."); + } + } + ProcessOperations(operations); // 5. Restore saved state. @@ -614,6 +628,20 @@ private void ProcessFormXObject(StreamToken formStream) } } + /// + /// Check for circular reference in the XObject form. + /// + /// The original form stream. + /// The form's name. + /// The form operations parsed from original form stream. + private bool HasFormXObjectCircularReference(StreamToken formStream, NameToken xObjectName, IReadOnlyList operations) + { + return xObjectName != null + && operations.OfType()?.Any(o => o.Name == xObjectName) == true // operations contain another form with same name + && resourceStore.TryGetXObject(xObjectName, out var result) + && result.Data.SequenceEqual(formStream.Data); // The form contained in the operations has identical data to current form + } + public void BeginSubpath() { if (CurrentPath == null)