import re
#import unicode
import hashlib
+#import base64
import solr
import yaml
import sqlite3
-confstr="/etc/libre10.conf"
-sqlpath="/tmp/libre10.db"
+confstr="/etc/rec10pdf.conf"
+sqlpath="/tmp/rec10pdf.db"
conf_dic=yaml.load(open(confstr).read())
dbcon=sqlite3.connect(sqlpath,isolation_level=None)
try:
title_g=titletxt.split("_Part")[0]
title_g_id=hashlib.sha224(title_g.encode("utf-8")).hexdigest()
print titletxt+" : "+str(pagenum)+"/"+str(pagemax)
- path_id=hashlib.sha224(os.path.abspath(pdfpath).encode("utf-8")).hexdigest()
+ path_id=idnum
#print data1
solrcon.add(id=idnum+"_"+str(pagenum),title=titletxt,title_group=title_g,
title_group_id=title_g_id,page=int(pagenum),pagemax=int(pagemax),
for st in argvs[1:]:
#print os.path.splitext(st)
if os.path.splitext(st)[1].upper() == ".PDF":
- rd=hashlib.sha224(os.path.splitext(st)[0]).hexdigest()
- rd2=hashlib.sha224(os.path.abspath(st)).hexdigest()
+ #rd=hashlib.sha224(os.path.splitext(st)[0]).hexdigest()
+ #rd2=hashlib.sha224(os.path.abspath(st)).hexdigest()
+ #rd = base64.b64encode(os.path.abspath(st))[]
+ rd = hashlib.md5()
+ rd.update(os.path.abspath(st))
+ rdl=rd.hexdigest()
c = dbcon.cursor()
- c.execute(u"select count(*) from pdffile where id=?",(rd2,))
- if int(c.fetchone()[0])<1:
- PDF2TEXT(st.decode("utf-8"),str(rd)+".txt",rd)
- dbcon.close()
+ c.execute(u"select count(*) from pdffile where id=?",(rdl,))
+ filecount = c.fetchone()[0]
+ print st
+ print rdl
+ print filecount
+ if int(filecount)<1:
+ PDF2TEXT(st.decode("utf-8"),str(rdl)+".txt",rdl)
+dbcon.close()