rd.update(title_group.encode('utf-8'))
rdl=rd.hexdigest()
for i in xrange(1,pagenum+1):
- tmpdstpath="\""+cacheFolder+"/page-"+str(i)+dstpath+"\""
- cmd=u"pdftotext -eol unix -enc UTF-8 -f "+str(i)+u" -l "+str(i)+" \""+pdfpath+u"\" "+tmpdstpath
+ tmpdstpath=cacheFolder+"/page-"+str(i)+dstpath
+ cmd=u"pdftotext -eol unix -enc UTF-8 -f "+str(i)+u" -l "+str(i)+" \""+pdfpath+u"\" \"+tmpdstpath+"\""
p=subprocess.Popen(cmd.encode("utf-8"),shell=True)
p.wait()
time.sleep(1)
TEXT2solr(s,os.path.splitext(os.path.basename(pdfpath))[0],tmpdstpath,i,pagenum,pdfpath,idnum)
generateCache(pdfpath.encode("utf-8"),os.path.splitext(os.path.basename(pdfpath))[0].encode("utf-8")+"_"+str(i),i)
- os.system("rm -f "+tmpdstpath)
+ os.system("rm -f \""+tmpdstpath+"\"")
sql = u"insert into pdffile (id,path,page,title,title_id,part) values (?,?,?,?,?,?)"
dbcon.execute(sql,(idnum,pdfpath,pagenum,title_group,rdl,parttext))
s.commit()