Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Check and handle circular references when processing XObject forms and fix #671 #676

Merged
merged 1 commit into from
Aug 5, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@
public class IntegrationDocumentTests
{
private static readonly Lazy<string> DocumentFolder = new Lazy<string>(() => Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents")));
private static readonly HashSet<string> _documentsToIgnore = new HashSet<string>()
{
"issue_671.pdf"
};

[Theory]
[MemberData(nameof(GetAllDocuments))]
Expand Down Expand Up @@ -101,7 +105,7 @@ public static IEnumerable<object[]> GetAllDocuments
var files = Directory.GetFiles(DocumentFolder.Value, "*.pdf");

// Return the shortname so we can see it in the test explorer.
return files.Select(x => new object[] { Path.GetFileName(x) });
return files.Where(x => !_documentsToIgnore.Any(i => x.EndsWith(i))).Select(x => new object[] { Path.GetFileName(x) });
}
}
}
Expand Down
37 changes: 37 additions & 0 deletions src/UglyToad.PdfPig.Tests/Integration/XObjectFormTests.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
namespace UglyToad.PdfPig.Tests.Integration
{
using UglyToad.PdfPig.Core;
using Xunit;

public class XObjectFormTests
{
[Fact]
public void CanReadDocumentWithoutStackOverflowIssue671()
{
using (var document = PdfDocument.Open(IntegrationHelpers.GetDocumentPath("issue_671")))
{
var page = document.GetPage(1);
}
}

[Fact]
public void CanReadDocumentThrowsIssue671()
{
using (var document = PdfDocument.Open(IntegrationHelpers.GetDocumentPath("issue_671"), ParsingOptions.LenientParsingOff))
{
var exception = Assert.Throws<PdfDocumentFormatException>(() => document.GetPage(1));
Assert.Contains("is referencing itself which can cause unexpected behaviour", exception.Message);
}
}

[Fact]
public void CanReadDocumentMOZILLA_3136_0()
{
// This document does not actually contain circular references
using (var document = PdfDocument.Open(IntegrationHelpers.GetDocumentPath("MOZILLA-3136-0"), ParsingOptions.LenientParsingOff))
{
var page = document.GetPage(1);
}
}
}
}
32 changes: 30 additions & 2 deletions src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs
Original file line number Diff line number Diff line change
Expand Up @@ -486,15 +486,15 @@ public void ApplyXObject(NameToken xObjectName)
}
else if (subType.Equals(NameToken.Form))
{
ProcessFormXObject(xObjectStream);
ProcessFormXObject(xObjectStream, xObjectName);
}
else
{
throw new InvalidOperationException($"XObject encountered with unexpected SubType {subType}. {xObjectStream.StreamDictionary}.");
}
}

private void ProcessFormXObject(StreamToken formStream)
private void ProcessFormXObject(StreamToken formStream, NameToken xObjectName)
{
/*
* When a form XObject is invoked the following should happen:
Expand Down Expand Up @@ -603,6 +603,20 @@ private void ProcessFormXObject(StreamToken formStream)
// 3. We don't respect clipping currently.

// 4. Paint the objects.
bool hasCircularReference = HasFormXObjectCircularReference(formStream, xObjectName, operations);
if (hasCircularReference)
{
if (parsingOptions.UseLenientParsing)
{
operations = operations.Where(o => o is not InvokeNamedXObject xo || xo.Name != xObjectName).ToArray();
parsingOptions.Logger.Warn($"An XObject form named '{xObjectName}' is referencing itself which can cause unexpected behaviour. The self reference was removed from the operations before further processing.");
}
else
{
throw new PdfDocumentFormatException($"An XObject form named '{xObjectName}' is referencing itself which can cause unexpected behaviour.");
}
}

ProcessOperations(operations);

// 5. Restore saved state.
Expand All @@ -614,6 +628,20 @@ private void ProcessFormXObject(StreamToken formStream)
}
}

/// <summary>
/// Check for circular reference in the XObject form.
/// </summary>
/// <param name="formStream">The original form stream.</param>
/// <param name="xObjectName">The form's name.</param>
/// <param name="operations">The form operations parsed from original form stream.</param>
private bool HasFormXObjectCircularReference(StreamToken formStream, NameToken xObjectName, IReadOnlyList<IGraphicsStateOperation> operations)
{
return xObjectName != null
&& operations.OfType<InvokeNamedXObject>()?.Any(o => o.Name == xObjectName) == true // operations contain another form with same name
&& resourceStore.TryGetXObject(xObjectName, out var result)
&& result.Data.SequenceEqual(formStream.Data); // The form contained in the operations has identical data to current form
}

public void BeginSubpath()
{
if (CurrentPath == null)
Expand Down
Loading