OSDN Git Service

fix filepath error.
authorgn64_jp <gn64@rec10.org>
Sun, 3 Nov 2013 09:51:24 +0000 (18:51 +0900)
committergn64_jp <gn64@rec10.org>
Sun, 3 Nov 2013 09:51:24 +0000 (18:51 +0900)
index/pdf2xml.py

index 7ce5b5c..ee2ab40 100644 (file)
@@ -120,14 +120,14 @@ def PDF2TEXT(pdfpath,dstpath,idnum):
     rd.update(title_group.encode('utf-8'))
     rdl=rd.hexdigest()
     for i in xrange(1,pagenum+1):
-        tmpdstpath="\""+cacheFolder+"/page-"+str(i)+dstpath+"\""
-        cmd=u"pdftotext -eol unix -enc UTF-8 -f "+str(i)+u" -l "+str(i)+" \""+pdfpath+u"\" "+tmpdstpath
+        tmpdstpath=cacheFolder+"/page-"+str(i)+dstpath
+        cmd=u"pdftotext -eol unix -enc UTF-8 -f "+str(i)+u" -l "+str(i)+" \""+pdfpath+u"\" \"+tmpdstpath+"\""
         p=subprocess.Popen(cmd.encode("utf-8"),shell=True)
         p.wait()
         time.sleep(1)
         TEXT2solr(s,os.path.splitext(os.path.basename(pdfpath))[0],tmpdstpath,i,pagenum,pdfpath,idnum)
         generateCache(pdfpath.encode("utf-8"),os.path.splitext(os.path.basename(pdfpath))[0].encode("utf-8")+"_"+str(i),i)
-       os.system("rm -f "+tmpdstpath)
+       os.system("rm -f \""+tmpdstpath+"\"")
     sql = u"insert into pdffile (id,path,page,title,title_id,part) values (?,?,?,?,?,?)"
     dbcon.execute(sql,(idnum,pdfpath,pagenum,title_group,rdl,parttext))
     s.commit()