#!/usr/bin/python #------------------------------------------------------------------------------ # # BiBTeX -> Pajek converter # # Vladimir Batagelj, April 2006 #------------------------------------------------------------------------------ from string import strip, split,replace,lower,index from sys import argv,exit import re, time def processRecord(bibRec,bibType): global nam, numRec, links, aTab, wTab numRec += 1 parts = bibRec.split('=') desKeys = ['head']; desVals = [] for i in range(len(parts)): part = strip(parts[i]); j = part.rfind(',') if j < 0: j = len(part)-1 desVal = strip(part[:j]); s = desVal[0] if (s == '"' or s == '{'): desVal = desVal[1:-1] desVals.append(desVal) desKeys.append(lower(strip(part[j+1:]))) work = desVals[0] = strip(desVals[0].split('{')[1]) try: i = desKeys.index('title'); title = desVals[i].replace('"',"''") except ValueError: title = 'UNKNOWN' nam.write('%s "%s"\n' % (numRec,title)) try: i = desKeys.index('year'); year = desVals[i] except ValueError: year = 'UNKNOWN' m = re.search('\D',year) if m: year = year[:m.start()] try: year = int(year) except ValueError: print 'bad year in record %s : %s' % (numRec,work) year = 0 if wTab.has_key(work): work = 'DUP-'+str(numRec) wTab[work]=[numRec,bibType,year,work] try: i = desKeys.index('author'); authors = desVals[i] except ValueError: authors = 'UNKNOWN' authors = authors.split(' and ') links[work]=[] for author in authors: author = strip(author) if aTab.has_key(author): aVal = aTab[author]; numAut = aVal[0] aTab[author] = [numAut,min(aVal[1],year),max(aVal[2],year),author] else: numAut = len(aTab) aTab[author] = [numAut,year,year,author] links[work].append(numAut) def run(workdir,input): global nam, numRec, links, aTab, wTab pubs=['article','book','booklet','inbook','incollection', 'inproceedings','manual','mastersthesis','misc','phdthesis', 'proceedings','techreport','unpublished'] try: # open input BiBTeX file bib = open(workdir+input,'r') except IOError , (errno, strerror): print "I/O error(%s): %s" % (errno, strerror) exit() print 'bibFile = ', workdir+input net = open(workdir+'bib.net','w') print 'networkFile = ', workdir+'bib.net' nam = open(workdir+'bib.nam','w') print 'titleFile = ', workdir+'bib.nam' nam.write('*vertices\n') vec = open(workdir+'bib.vec','w') print 'yearFile = ', workdir+'bib.vec' clu = open(workdir+'bib.clu','w') print 'typeFile = ', workdir+'bib.clu' aut = open(workdir+'bib.aut','w') eqv = open(workdir+'bibeqv.clu','w') links = {}; wTab = {'0':['0']}; aTab = {'0':['0']} numRec = 0; bibRec = ''; bibType = -1 numLine = 0; last = 0 while not last: line = bib.readline() if not line: line = '@'; last = 1 else: line = line.strip(); numLine += 1 if line != '': if line[0] == '@': # start of record if bibType >= 0: processRecord(bibRec,bibType) bibRec = '' head = line.split('{') head = replace(head[0],'@','') try: bibType = pubs.index(lower(head)) except ValueError: bibType = -1 print 'Unknown head, line=',numLine, head if bibType >= 0: bibRec += line+' ' del wTab['0']; del aTab['0']; numAut = len(aTab); numVer = numRec+numAut print '# of lines=%s, records=%s, authors=%s\n' % (numLine,numRec,numAut) net.write('% Bib2Pajek converter: BibTex -> Pajek\n') net.write('% by Vladimir Batagelj, April 2006\n') net.write('% BibTeX File = '+workdir+input+'\n% ') net.write('%s\n' % time.ctime(time.time())) net.write('*vertices %s %s\n' % (numVer,numRec)) vec.write('*vertices %s \n' % numRec) clu.write('% 0 - author\n') for i in range(len(pubs)): clu.write('% '); clu.write('%2i - %s\n' % (i+1,pubs[i])) clu.write('*vertices %s \n' % numVer) L=wTab.values(); L.sort() for t in L: net.write('%6i \"%s\" [%i]\n' % (t[0],t[3],t[2])) clu.write('%s\n' % (1+t[1])) vec.write('%s\n' % t[2]) L=aTab.values(); L.sort() for t in L: net.write('%6i \"%s\" [%i-%i]\n' % (numRec+t[0],t[3],t[1],t[2])) clu.write('%s\n' % 0) net.write('*Edges\n') for w in links.keys(): t = wTab[w] for a in links[w]: net.write('%5i %5i 1 [%4i]\n' % (t[0],numRec+a,t[2])) L.sort(lambda x,y: cmp(lower(x[3][x[3].rfind(' '):]),lower(y[3][y[3].rfind(' '):]))) # L.sort(lambda x,y: cmp(x[3],y[3])) for t in L: aut.write('%6i \"%s\"\n' % (numRec+t[0],t[3])) eqv.write('*vertices %s \n' % numVer) for i in range(numVer): eqv.write('%s\n' % (i+1)) net.flush(); nam.flush(); vec.flush(); clu.flush(); aut.flush(); eqv.flush() bib.close(); net.close(); nam.close(); vec.close(); clu.close(); aut.close(); eqv.close() print 'Bib2Pajek - done' # # run bib2pajek # if __name__ == '__main__': # run it from command line if len(sys.argv) == 3: run(argv[1],argv[2]) else: print "Module Bib2Pajek" print "Two arguments (WorkDir, BiBTeXfile) required to run !" exit() print else: # it is imported print "Module Bib2Pajek imported." print "To run, type: bib2pajek.run('D:\\vlado\\BibTeX\\','lexicon.bib')" print "where 'lexicon.bib' is your input BiBTeX file" #- End -------------------------------------------------------------------------------