-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathpizzgloss.py
56 lines (51 loc) · 1.28 KB
/
pizzgloss.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
import re
f = open('pizzi-tei.txt','r')
for l in f:
print(l,end='')
f.close()
f = open('pizzi-persian-nodouble.txt','r')
pizzinum = 0
lnum = 0
arabyods = 0
indel1 = ''
indel2 = ''
for l in f:
l = re.sub('^[ ]+','',l)
l = re.sub("^" + u'\u200c','',l)
l = re.sub("^" + u'\uFEFF','',l)
l = re.sub("^" + u'\u200f','',l)
l = re.sub( u'\u200f' + '$','',l)
if( re.search('Pizzi',l)):
lnum = 0
if( pizzinum > 0 ):
print("</div>")
pizzinum = pizzinum + 1
print('<div type="textpart" subtype="reading" n="' + str(pizzinum) + '">')
l = re.sub('[ \n]+$','',l)
if( re.search('^[ ]*[A-Z]',l)):
print("<head>" + l + "</head>")
continue
l = re.sub(u'\u200c' + '$','',l)
l = re.sub(u'\u200f' + '$','',l)
# replace Arabic yod with Persian!
if( re.search('ي',l)):
arabyods = arabyods + 1
l = re.sub('ي','ی',l)
if( re.search('[\[\]]',l)):
l = re.sub('\[\s*','',l)
l = re.sub('\s*\]','',l)
indel1 = '<del>'
indel2 = '</del>'
else:
indel = ''
if( re.search('[a-zA-Z]',l)):
lnum = 0
print(l)
continue
if( re.search('^$',l)):
continue
lnum = lnum + 1
print('<l n="'+str(lnum)+'">',indel1,l,indel2,"</l>",sep='\n')
f.close()
print('</div>\n</body>\n</text>\n</TEI>')
#print(arabyods)