I have some code that takes 3 different PDF byte arrays and merges them. This code works great. The issue (some people) are having is that each PDF is considered to be a full page (if printed) even if there is only say 4 inches of content on it, thus leaving 7 inches of white space vertically. Then the middle document gets put in and may or may not have vertical white space at the end of it. Then the footer gets put on its own page as well.
Here is the code:
byte[] Bytes = rv.LocalReport.Render("PDF", null, out MimeType, out Encoding, out Extension, out StreamIDs, out Warnings);
List<byte[]> MergeSets = // This is filled prior to this code
// Append any other pages to this primary letter
if (MergeSets.Count > 0) {
MemoryStream ms = new MemoryStream();
Document document = new Document();
PdfCopy copy = new PdfCopy(document, ms);
document.Open();
PdfImportedPage page;
PdfReader reader = new PdfReader(Bytes); // read the generated primary Letter
int pages = reader.NumberOfPages;
for (int i = 0; i < pages; ) {
page = copy.GetImportedPage(reader, ++i);
copy.AddPage(page);
} // foreach of the pages in the Cover Letter
// Now append the merge sets
foreach (byte[] ba in MergeSets) {
reader = new PdfReader(ba);
pages = reader.NumberOfPages;
for (int i = 0; i < pages; ) {
page = copy.GetImportedPage(reader, ++i);
copy.AddPage(page);
} // foreach of the pages in the current merge set
} // foreach of the sets of data
document.Close();
ServerSaved = SaveGeneratedLetter(ms.GetBuffer(), DateTime.Now.Year, hl.LetterName, SaveName);
} // if there is anything to merge
Is there a way when I am merging each page to clip/remove/erase the vertical white space at the end of each pdf so it appears as one seamless document?
UPDATE: Here are some sample .pdf files I am trying to merge.
header, body, footer
UPDATE 2: USING THE ANSWER:
I have converted @mkl's code to C# and here it is.
The tool class:
public class PdfVeryDenseMergeTool {
private Rectangle PageSize;
private float TopMargin;
private float BottomMargin;
private float Gap;
private Document Document = null;
private PdfWriter Writer = null;
private float YPosition = 0;
public PdfVeryDenseMergeTool(Rectangle size, float top, float bottom, float gap) {
this.PageSize = size;
this.TopMargin = top;
this.BottomMargin = bottom;
this.Gap = gap;
} // PdfVeryDenseMergeTool
public void Merge(MemoryStream outputStream, List<PdfReader> inputs) {
try {
this.OpenDocument(outputStream);
foreach (PdfReader reader in inputs) {
this.Merge(reader);
} // foreach of the PDF files to merge
} finally {
this.CloseDocument();
} // try-catch-finally
} // Merge
public void OpenDocument(MemoryStream outputStream) {
this.Document = new Document(PageSize, 36, 36, this.TopMargin, this.BottomMargin);
this.Writer = PdfWriter.GetInstance(Document, outputStream);
this.Document.Open();
this.NewPage();
} // OpenDocument
public void CloseDocument() {
try {
this.Document.Close();
} finally {
this.Document = null;
this.Writer = null;
this.YPosition = 0;
} // try-finally
} // CloseDocument
public void NewPage() {
this.Document.NewPage();
this.YPosition = PageSize.GetTop(this.TopMargin);
} // Merge
public void Merge(PdfReader reader) {
PdfReaderContentParser parser = new PdfReaderContentParser(reader);
for (int pageIndex = 1; pageIndex <= reader.NumberOfPages; pageIndex++) {
this.Merge(reader, parser, pageIndex);
} // foreach of the pages of the current PDF
} // Merge
public void Merge(PdfReader reader, PdfReaderContentParser parser, int pageIndex) {
PdfImportedPage importedPage = Writer.GetImportedPage(reader, pageIndex);
PdfContentByte directContent = Writer.DirectContent;
PageVerticalAnalyzer finder = parser.ProcessContent(pageIndex, new PageVerticalAnalyzer());
if (finder.VerticalFlips.Count < 2)
return;
Rectangle pageSizeToImport = reader.GetPageSize(pageIndex);
int startFlip = finder.VerticalFlips.Count - 1;
bool first = true;
while (startFlip > 0) {
if (!first)
this.NewPage();
float freeSpace = this.YPosition - PageSize.GetBottom(BottomMargin);
int endFlip = startFlip + 1;
while ((endFlip > 1) && (finder.VerticalFlips[startFlip] - finder.VerticalFlips[endFlip - 2] < freeSpace))
endFlip -= 2;
if (endFlip < startFlip) {
float height = finder.VerticalFlips[startFlip] - finder.VerticalFlips[endFlip];
directContent.SaveState();
directContent.Rectangle(0, this.YPosition - height, pageSizeToImport.Width, height);
directContent.Clip();
directContent.NewPath();
this.Writer.DirectContent.AddTemplate(importedPage, 0, this.YPosition - (finder.VerticalFlips[startFlip] - pageSizeToImport.Bottom));
directContent.RestoreState();
this.YPosition -= height + this.Gap;
startFlip = endFlip - 1;
} else if (!first) {
throw new ArgumentException(string.Format("Page {0} content too large", pageIndex));
} // if
first = false;
} // while
} // Merge
} // PdfVeryDenseMergeTool
The RenderListener class:
UPDATE 3: FIXED 1 LINE OF CODE AND IT WORKS: See comment in code
public class PageVerticalAnalyzer : IRenderListener {
public PageVerticalAnalyzer() { }
public List<float> VerticalFlips = new List<float>();
public void AddVerticalUseSection(float from, float to) {
if (to < from) {
float temp = to;
to = from;
from = temp;
}
int i = 0;
int j = 0;
for (i = 0; i < VerticalFlips.Count; i++) {
float flip = VerticalFlips[i];
if (flip < from)
continue;
for (j = i; j < VerticalFlips.Count; j++) {
flip = VerticalFlips[j];
if (flip < to)
continue;
break;
}
break;
} // foreach of the vertical flips
bool fromOutsideInterval = i % 2 == 0;
bool toOutsideInterval = j % 2 == 0;
while (j-- > i)
VerticalFlips.RemoveAt(j); // This was the problem line with just .Remove(j)
if (toOutsideInterval)
VerticalFlips.Insert(i, to);
if (fromOutsideInterval)
VerticalFlips.Insert(i, from);
} // AddVerticalUseSection
public void BeginTextBlock() { /* Do nothing */ }
public void EndTextBlock() { /* Do nothing */ }
public void RenderImage(ImageRenderInfo renderInfo) {
Matrix ctm = renderInfo.GetImageCTM();
List<float> YCoords = new List<float>(4) { 0, 0, 0, 0 };
for (int x = 0; x < 2; x++) {
for (int y = 0; y < 2; y++) {
Vector corner = new Vector(x, y, 1).Cross(ctm);
YCoords[2 * x + y] = corner[Vector.I2];
}
}
YCoords.Sort();
AddVerticalUseSection(YCoords[0], YCoords[3]);
} // RenderImage
public void RenderText(TextRenderInfo renderInfo) {
LineSegment ascentLine = renderInfo.GetAscentLine();
LineSegment descentLine = renderInfo.GetDescentLine();
List<float> YCoords = new List<float>(4) {
ascentLine.GetStartPoint()[Vector.I2],
ascentLine.GetEndPoint()[Vector.I2],
descentLine.GetStartPoint()[Vector.I2],
descentLine.GetEndPoint()[Vector.I2],
};
YCoords.Sort();
AddVerticalUseSection(YCoords[0], YCoords[3]);
} // RenderText
} // PageVericalAnalyzer
Code to gather files and run the tool:
public void TestMergeDocuments() {
PdfVeryDenseMergeTool tool = new PdfVeryDenseMergeTool(iTextSharp.text.PageSize.A4, 18, 18, 10);
List<byte[]> Files = new List<byte[]>();
// Code to load each of the 3 files I need into this byte array list
using (MemoryStream ms = new MemoryStream()) {
List<PdfReader> files = new List<PdfReader>();
foreach (byte[] ba in Files) {
files.Add(new PdfReader(ba));
} // foreach of the sets of data
tool.Merge(ms, files);
// Save the file using: ms.GetBuffer()
} // using the memory stream
} // TestMergeDocuments
The following sample tool has been implemented along the ideas of the tool PdfDenseMergeTool
from this answer which the OP has commented to be SO close to what [he] NEEDs. Just like PdfDenseMergeTool
this tool here is implemented in Java/iText which I'm more at home with than C#/iTextSharp. As the OP has already translated PdfDenseMergeTool
to C#/iTextSharp, translating this tool here also should not be too great a problem.
This tool similarly to PdfDenseMergeTool
takes the page contents of pages from a number of PdfReader
instances and tries to merge them densely, i.e. putting contents of multiple source pages onto a single target page if there is enough free space to do so. In contrast to that earlier tool, this tool even splits source page contents to allow for an even denser merge.
Just like that other tool the PdfVeryDenseMergeTool
does not take vector graphics into account because the iText(Sharp) parsing API does only forward text and bitmap images
The PdfVeryDenseMergeTool
splits source pages which do not completely fit onto a target page at a horizontal line which is not intersected by the bounding boxes of text glyphs or bitmap graphics.
The tool class:
public class PdfVeryDenseMergeTool { public PdfVeryDenseMergeTool(Rectangle size, float top, float bottom, float gap) { this.pageSize = size; this.topMargin = top; this.bottomMargin = bottom; this.gap = gap; } public void merge(OutputStream outputStream, Iterable<PdfReader> inputs) throws DocumentException, IOException { try { openDocument(outputStream); for (PdfReader reader: inputs) { merge(reader); } } finally { closeDocument(); } } void openDocument(OutputStream outputStream) throws DocumentException { final Document document = new Document(pageSize, 36, 36, topMargin, bottomMargin); final PdfWriter writer = PdfWriter.getInstance(document, outputStream); document.open(); this.document = document; this.writer = writer; newPage(); } void closeDocument() { try { document.close(); } finally { this.document = null; this.writer = null; this.yPosition = 0; } } void newPage() { document.newPage(); yPosition = pageSize.getTop(topMargin); } void merge(PdfReader reader) throws IOException { PdfReaderContentParser parser = new PdfReaderContentParser(reader); for (int page = 1; page <= reader.getNumberOfPages(); page++) { merge(reader, parser, page); } } void merge(PdfReader reader, PdfReaderContentParser parser, int page) throws IOException { PdfImportedPage importedPage = writer.getImportedPage(reader, page); PdfContentByte directContent = writer.getDirectContent(); PageVerticalAnalyzer finder = parser.processContent(page, new PageVerticalAnalyzer()); if (finder.verticalFlips.size() < 2) return; Rectangle pageSizeToImport = reader.getPageSize(page); int startFlip = finder.verticalFlips.size() - 1; boolean first = true; while (startFlip > 0) { if (!first) newPage(); float freeSpace = yPosition - pageSize.getBottom(bottomMargin); int endFlip = startFlip + 1; while ((endFlip > 1) && (finder.verticalFlips.get(startFlip) - finder.verticalFlips.get(endFlip - 2) < freeSpace)) endFlip -=2; if (endFlip < startFlip) { float height = finder.verticalFlips.get(startFlip) - finder.verticalFlips.get(endFlip); directContent.saveState(); directContent.rectangle(0, yPosition - height, pageSizeToImport.getWidth(), height); directContent.clip(); directContent.newPath(); writer.getDirectContent().addTemplate(importedPage, 0, yPosition - (finder.verticalFlips.get(startFlip) - pageSizeToImport.getBottom())); directContent.restoreState(); yPosition -= height + gap; startFlip = endFlip - 1; } else if (!first) throw new IllegalArgumentException(String.format("Page %s content sections too large.", page)); first = false; } } Document document = null; PdfWriter writer = null; float yPosition = 0; final Rectangle pageSize; final float topMargin; final float bottomMargin; final float gap; }
(PdfVeryDenseMergeTool.java)
This tool makes use of a custom RenderListener
for use with the iText parser API:
public class PageVerticalAnalyzer implements RenderListener { @Override public void beginTextBlock() { } @Override public void endTextBlock() { } /* * @see RenderListener#renderText(TextRenderInfo) */ @Override public void renderText(TextRenderInfo renderInfo) { LineSegment ascentLine = renderInfo.getAscentLine(); LineSegment descentLine = renderInfo.getDescentLine(); float[] yCoords = new float[]{ ascentLine.getStartPoint().get(Vector.I2), ascentLine.getEndPoint().get(Vector.I2), descentLine.getStartPoint().get(Vector.I2), descentLine.getEndPoint().get(Vector.I2) }; Arrays.sort(yCoords); addVerticalUseSection(yCoords[0], yCoords[3]); } /* * @see RenderListener#renderImage(ImageRenderInfo) */ @Override public void renderImage(ImageRenderInfo renderInfo) { Matrix ctm = renderInfo.getImageCTM(); float[] yCoords = new float[4]; for (int x=0; x < 2; x++) for (int y=0; y < 2; y++) { Vector corner = new Vector(x, y, 1).cross(ctm); yCoords[2*x+y] = corner.get(Vector.I2); } Arrays.sort(yCoords); addVerticalUseSection(yCoords[0], yCoords[3]); } /** * This method marks the given interval as used. */ void addVerticalUseSection(float from, float to) { if (to < from) { float temp = to; to = from; from = temp; } int i=0, j=0; for (; i<verticalFlips.size(); i++) { float flip = verticalFlips.get(i); if (flip < from) continue; for (j=i; j<verticalFlips.size(); j++) { flip = verticalFlips.get(j); if (flip < to) continue; break; } break; } boolean fromOutsideInterval = i%2==0; boolean toOutsideInterval = j%2==0; while (j-- > i) verticalFlips.remove(j); if (toOutsideInterval) verticalFlips.add(i, to); if (fromOutsideInterval) verticalFlips.add(i, from); } final List<Float> verticalFlips = new ArrayList<Float>(); }
(PageVerticalAnalyzer.java)
It is used like this:
PdfVeryDenseMergeTool tool = new PdfVeryDenseMergeTool(PageSize.A4, 18, 18, 5); tool.merge(output, inputs);
(VeryDenseMerging.java)
Applied to the OP's sample documents
Header.pdf
Body.pdf
Footer.pdf
it generates
If one defines the target document page size to be A5 landscape:
PdfVeryDenseMergeTool tool = new PdfVeryDenseMergeTool(new RectangleReadOnly(595,421), 18, 18, 5); tool.merge(output, inputs);
(VeryDenseMerging.java)
it generates this:
Beware! This is only a proof of concept and it does not consider all possibilities. E.g. the case of source or target pages with a non-trivial Rotate value is not properly handled. Thus, it is not ready for production use yet.
The current iText development version towards 5.5.6 enhances the parser functionality to also signal vector graphics. Thus, I extended the PageVerticalAnalyzer
to make use of this:
public class PageVerticalAnalyzer implements ExtRenderListener { @Override public void beginTextBlock() { } @Override public void endTextBlock() { } @Override public void clipPath(int rule) { } ... static class SubPathSection { public SubPathSection(float x, float y, Matrix m) { float effectiveY = getTransformedY(x, y, m); pathFromY = effectiveY; pathToY = effectiveY; } void extendTo(float x, float y, Matrix m) { float effectiveY = getTransformedY(x, y, m); if (effectiveY < pathFromY) pathFromY = effectiveY; else if (effectiveY > pathToY) pathToY = effectiveY; } float getTransformedY(float x, float y, Matrix m) { return new Vector(x, y, 1).cross(m).get(Vector.I2); } float getFromY() { return pathFromY; } float getToY() { return pathToY; } private float pathFromY; private float pathToY; } /* * Beware: The implementation is not correct as it includes the control points of curves * which may be far outside the actual curve. * * @see ExtRenderListener#modifyPath(PathConstructionRenderInfo) */ @Override public void modifyPath(PathConstructionRenderInfo renderInfo) { Matrix ctm = renderInfo.getCtm(); List<Float> segmentData = renderInfo.getSegmentData(); switch (renderInfo.getOperation()) { case PathConstructionRenderInfo.MOVETO: subPath = null; case PathConstructionRenderInfo.LINETO: case PathConstructionRenderInfo.CURVE_123: case PathConstructionRenderInfo.CURVE_13: case PathConstructionRenderInfo.CURVE_23: for (int i = 0; i < segmentData.size()-1; i+=2) { if (subPath == null) { subPath = new SubPathSection(segmentData.get(i), segmentData.get(i+1), ctm); path.add(subPath); } else subPath.extendTo(segmentData.get(i), segmentData.get(i+1), ctm); } break; case PathConstructionRenderInfo.RECT: float x = segmentData.get(0); float y = segmentData.get(1); float w = segmentData.get(2); float h = segmentData.get(3); SubPathSection section = new SubPathSection(x, y, ctm); section.extendTo(x+w, y, ctm); section.extendTo(x, y+h, ctm); section.extendTo(x+w, y+h, ctm); path.add(section); case PathConstructionRenderInfo.CLOSE: subPath = null; break; default: } } /* * @see ExtRenderListener#renderPath(PathPaintingRenderInfo) */ @Override public Path renderPath(PathPaintingRenderInfo renderInfo) { if (renderInfo.getOperation() != PathPaintingRenderInfo.NO_OP) { for (SubPathSection section : path) addVerticalUseSection(section.getFromY(), section.getToY()); } path.clear(); subPath = null; return null; } List<SubPathSection> path = new ArrayList<SubPathSection>(); SubPathSection subPath = null; ... }
(PageVerticalAnalyzer.java)
A simple test (VeryDenseMerging.java method testMergeOnlyGraphics
) merges these files
into this:
But once again beware: this is a mere proof of concept. Especially modifyPath()
needs to be improved, the implementation is not correct as it includes the control points of curves which may be far outside the actual curve.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With