-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathPDF.py
159 lines (146 loc) · 7.33 KB
/
PDF.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
from PyPDF2 import PdfFileReader, PdfFileWriter
import sys
import os
from pdf2image import convert_from_path
import shutil
class PDF:
def __init__(self):
self.infn = []
self.params = ''
self.message = ''
def pdf_info(self):
return [ 'ID:%d, 页数:%d, 宽×高:%d×%d ' % (i+1, p.getNumPages(), p.getPage(0).mediaBox.upperRight[0], p.getPage(0).mediaBox.upperRight[1]) for i, p in enumerate( [ PdfFileReader( open(p, 'rb') ) for p in self.infn ] ) ]
def split_pdf_each(self):
for infn in self.infn:
try:
(dir_, pdf_name) = os.path.split(infn)
pdf_input = PdfFileReader(open(infn, 'rb'))
pages = pdf_input.getNumPages()
self.message = '正在拆分...'
for i in range(pages):
pdf_output = PdfFileWriter()
pdf_output.addPage(pdf_input.getPage(i))
path = dir_ + '/' + pdf_name[:-4] + '单页拆分'
if not os.path.exists(path):
os.mkdir(path)
pdf_output.write(open(path + '/' + pdf_name[:-4] + '-' + str(i + 1) + '.pdf', 'wb'))
self.message = pdf_name + ': ' + str(i) + '/' + str(pages)
self.message = '完成'
except:
self.message = '出错: 如输入格式无误, 则不支持此文件'
def split_pdf_parts(self):
for infn in self.infn:
try:
(dir_, pdf_name) = os.path.split(infn)
pdf_input = PdfFileReader(open(infn, 'rb'))
self.message = '正在拆分...'
for part in self.params:
pdf_output = PdfFileWriter()
for i in range(part[0] - 1, part[1]):
pdf_output.addPage(pdf_input.getPage(i));
path = dir_ + '/' + pdf_name[:-4] + '部分拆分'
if not os.path.exists(path):
os.mkdir(path)
pdf_output.write(open(path + '/' + pdf_name[:-4] + '-' + str(part[0]) + '-' + str(part[1]) + '.pdf', 'wb'))
self.message = '第%d部分已拆分'%(self.params.index(part) + 1)
self.message = '完成'
except:
self.message = '出错: 如输入格式无误, 则不支持此文件'
def merge_pdf(self):
try:
pdf_output = PdfFileWriter()
self.message = '正在合并...'
for infn in self.infn:
self.message = infn
pdf_name = infn.split('\\')[-1];
pdf_input = PdfFileReader(open(infn, 'rb'))
pages = pdf_input.getNumPages()
for i in range(pages):
pdf_output.addPage(pdf_input.getPage(i))
self.message = pdf_name + ': ' + str(i) + '/' + str(pages)
pdf_output.write(open(self.params, 'wb'))
self.message = '合并完成,合并文件位于第一个文件所在地'
except:
self.message = '出错: 如输入格式无误, 则不支持此文件'
def cut_pdf(self):
try:
left, right, lower, upper, option, isTest = self.params;
self.message = '正在剪切'
pdf_name = self.infn[0].split('\\')[-1];
pdf_input = PdfFileReader(open(self.infn[0], 'rb'));
pdf_output = PdfFileWriter();
pages = pdf_input.getNumPages()
if isTest == 1:
pages = min(10 * isTest, pages)
for i in range(pages):
page = pdf_input.getPage(i);
if (option == 'all') or (option == 'odd' and i%2 == 0) or (option == 'even' and (i+1)%2 == 0):
page.mediaBox.upperLeft = (left, page.mediaBox.upperLeft[1] - upper)
page.mediaBox.upperRight = (page.mediaBox.upperRight[0] - right, page.mediaBox.upperRight[1] - upper)
page.mediaBox.lowerLeft = (left, lower)
page.mediaBox.lowerRight = (page.mediaBox.lowerRight[0] - right, lower)
pdf_output.addPage(page);
self.message = pdf_name + ': ' + str(i) + '/' + str(pages)
pdf_output.write(open(self.infn[0][:-4] + '-cut.pdf', 'wb'));
self.message = '剪切完成'
except:
self.message = '出错: 如输入格式无误, 则不支持此文件'
def rotate_pdf(self):
try:
pdf_name = self.infn[0].split('\\')[-1];
rotation, isTest = self.params
self.message = '正在旋转'
pdf_input = PdfFileReader(open(self.infn[0], 'rb'));
pdf_output = PdfFileWriter();
pages = isTest or pdf_input.getNumPages();
for i in range(pages):
page = pdf_input.getPage(i);
page.rotateClockwise(rotation);
pdf_output.addPage(page);
self.message = pdf_name + ': ' + str(i) + '/' + str(pages)
pdf_output.write(open(self.infn[0][:-4] + '-rotate.pdf', 'wb'));
self.message = '完成旋转'
except:
self.message = '出错了,请检查输入格式是否正确(旋转角度为90的倍数)'
def add_watermark(self):
self.message = '正在添加页码'
water_pdf = PdfFileReader(open('page-number.pdf', 'rb'));
water_pages = water_pdf.getNumPages()
for infn in self.infn:
try:
pdf_name = infn.split('\\')[-1];
pdf_input = PdfFileReader(open(infn, 'rb'));
pages = min(pdf_input.getNumPages(), water_pages);
pdf_output = PdfFileWriter();
for i in range(pages):
page = pdf_input.getPage(i);
water_page = water_pdf.getPage(i);
page.mergePage(water_page);
pdf_output.addPage(page);
self.message = pdf_name + ': ' + str(i) + '/' + str(pages)
pdf_output.write(open(infn[:-4] + '-number2.pdf', 'wb'));
self.message = '完成';
except:
self.message = '出错了,请检查输入格式是否正确(page-number.pdf文件要求和程序在同一目录)'
def pdf2image(self):
self.message = '正在转换,所需时间较长,请稍等'
for infn in self.infn:
try:
(dir_, pdf_name) = os.path.split(infn)
path = dir_ + '/' + pdf_name[:-4] + '-images/'
if os.path.exists(path):
shutil.rmtree(path)
os.mkdir(path)
convert_from_path(infn, dpi=self.params, output_folder=path, fmt='png', thread_count=4)
num = len(str(len(os.listdir(path))))
for fn in os.listdir(path):
os.rename(path + fn, path + pdf_name[:-4] + fn[-(5 + num):])
self.message = '完成';
except:
self.message = '出错: 如输入格式无误, 则不支持此文件'
if __name__=='__main__':
pdf = PDF()
pdf.infn = ['E:/github/pdfdo/page-number.pdf']
pdf.params = 100
pdf.pdf2image()
print(pdf.message)