[Hindi]NLP 05# Working with PDF File |NLP|Python 3|Natural Language Processing|2019
Code:
# -*- coding: utf-8 -*-
"""NLP_Ex4.ipynb
Automatically generated by Colaboratory.
"""
!pip install PyPDF2
from google.colab import files
import PyPDF2
upload = files.upload()
!ls
myfile = open('Sample1.pdf',mode='rb')
pdf_reader = PyPDF2.PdfFileReader(myfile)
pdf_reader.numPages
page_one = pdf_reader.getPage(0)
page_one.extractText()
print(page_one.extractText())
myfile.close()
p = open('Sample1.pdf',mode='rb')
pdf_reader = PyPDF2.PdfFileReader(p)
first_page = pdf_reader.getPage(0)
pdf_writer = PyPDF2.PdfFileWriter()
pdf_writer.addPage(first_page)
pdf_output = open('Sample2.pdf',mode='wb')
pdf_writer.write(pdf_output)
pdf_output.close()
p.close()
file_new = open('Sample2.pdf',mode='rb')
pdf_reader = PyPDF2.PdfFileReader(file_new)
pdf_reader.numPages
page_one = pdf_reader.getPage(0)
print(page_one.extractText())
p = open('Sample1.pdf',mode='rb')
pdf_text = []
pdf_reader = PyPDF2.PdfFileReader(p)
for q in range(pdf_reader.numPages):
page = pdf_reader.getPage(q)
pdf_text.append(page.extractText())
p.close()
len(pdf_text)
for page in pdf_text:
print(page)
print('\n')
print('\n')
0 Comments