# This Section imports the necessary classes from the PyPDF2 library
from PyPDF2 import PdfFileReader, PdfFileWriter
from PyPDF2.generic import ContentStream, NameObject, TextStringObject
from PyPDF2.utils import b_
# The watermark says SAMPLE on it so I've tried different
# capitalization cases
wm_text = "Sample"
replace_with = ""
# I'm hoping to just replace the SAMPLE watermark with nothing
# so a space could suffice
# Load PDF into pyPDF
reader = PdfFileReader("input.pdf")
writer = PdfFileWriter()
for page in reader.pages:
# Get the current page's contents
content_object = page["/Contents"].getObject()
content = ContentStream(content_object, reader)
# Loop over all pdf elements
for operands, operator in content.operations:
# Was told to adapt this part dependent on my PDF file
if operator == b_("TJ"):
text = operands[0][0]
if isinstance(text, TextStringObject) and text.startswith(
wm_text
):
operands[0] = TextStringObject(replace_with)
# Set the modified content as content object on the page
page.__setitem__(NameObject("/Contents"), content)
# Add the page to the output
writer.addPage(page)
# Write the stream
with open("output.pdf", "wb") as fh:
writer.write(fh)
To remove watermarks from multiple PDFs, close any open PDFs and choose Tools > Edit PDF > Watermark > Remove.
Go to the Tools tab and select Edit PDF. The next set of options has Watermark listed. Click on this to get to the option to Add, Edit or Remove. Select this to remove the watermark word or image, You can also remove a watermark from multiple files, by loading Acrobat.
Step 1: Drag your watermarked PDF to the PDFelement; Step 2: Select Tool and then choose Add or Edit Watermark; Step 3: Click Edit Watermark and hit the Delete button on your right side; Step 4: Choose Yes to remove all watermarks from PDFs.
Using the code from the question here is a function that works in Python 3.
def remove_watermark(wm_text, inputFile, outputFile):
from PyPDF4 import PdfFileReader, PdfFileWriter
from PyPDF4.pdf import ContentStream
from PyPDF4.generic import TextStringObject, NameObject
from PyPDF4.utils import b_
with open(inputFile, "rb") as f:
source = PdfFileReader(f, "rb")
output = PdfFileWriter()
for page in range(source.getNumPages()):
page = source.getPage(page)
content_object = page["/Contents"].getObject()
content = ContentStream(content_object, source)
for operands, operator in content.operations:
if operator == b_("Tj"):
text = operands[0]
if isinstance(text, str) and text.startswith(wm_text):
operands[0] = TextStringObject('')
page.__setitem__(NameObject('/Contents'), content)
output.addPage(page)
with open(outputFile, "wb") as outputStream:
output.write(outputStream)
wm_text = 'wm_text'
inputFile = r'input.pdf'
outputFile = r"output.pdf"
remove_watermark(wm_text, inputFile, outputFile)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With