OSDN Git Service

5367b9e56e621205711484d7530a174d70987579
[kp123/kp123.git] / data / merge.py
1 #!/usr/bin/env python3
2
3 import sys
4 import pdb
5
6 if len(sys.argv) < 3:
7         print("usage: %s filename" % sys.argv[0])
8         sys.exit(0)
9
10 def load_file(name):
11     l = []
12     f = name
13     fi = open(f, 'r')
14     for i in fi.readlines():
15             if len(i) < 3:
16                     continue
17             if i[1] == 'A' and i[3] == 'A':
18                     print(i)
19                     print(ord(i[0]))
20             if ord(i[0]) == 0xFEFF:
21                     i = i[1:]
22                     print(i)
23             if i[0:2] == "\xFE\xFF": i = i[2:]
24             l.append(i)
25     fi.close()
26     return l
27
28 l1 = load_file(sys.argv[1])
29 l2 = load_file(sys.argv[2])
30 print("l1", len(l1))
31 print("l2", len(l2))
32
33 def strokes_len(s):
34         n = s.find("|")
35         if n == -1:
36                 n = len(s)
37         ret = 0
38         for i in range(2, n):
39             if str.istitle(s[i]):
40                     ret += 1
41         return ret
42
43 for i in l2:
44         if len(i) < 3: continue
45         k = i[1]
46         add = True
47         for j in l1:
48             if len(j) < 3: continue
49             if i[1] == j[1]:
50                     li = strokes_len(i)
51                     lj = strokes_len(j)
52                     #if li != lj:
53                             #print(i, li, j, lj)
54                     add = False
55                     break
56         if add:
57             l1.append(i)
58
59 print("out", len(l1))
60 fo = open("merged.txt.utf8", 'w')
61 for i in l1:
62         fo.write(i)
63 fo.close()