willsonlincake 发表于 2022-4-14 13:51:56

PDF转文本

import pdftotext

# Load your PDF
with open("lorem_ipsum.pdf", "rb") as f:
    pdf = pdftotext.PDF(f)

# If it's password-protected
with open("secure.pdf", "rb") as f:
    pdf = pdftotext.PDF(f, "secret")

# How many pages?
print(len(pdf))

# Iterate over all the pages
for page in pdf:
    print(page)

# Read some individual pages
print(pdf)
print(pdf)

# Read all the text into one string
print("\n\n".join(pdf))
页: [1]
查看完整版本: PDF转文本