#!/usr/bin/python #------------------------------------------------------------------------------ # # BiBTeX -> Pajek converter # # Vladimir Batagelj, April 2006 #------------------------------------------------------------------------------ from string import strip,split,replace,lower,index from sys import argv,exit def processRecord(bibRec,bibType): global net, nam, vec, clu, numRec, links, aTab, wTab numRec += 1 net.write('%s : %s > %s\n' % (numRec,bibType,bibRec)) parts = bibRec.split('=') desKeys = ['head']; desVals = [] for i in range(len(parts)): part = strip(parts[i]); j = part.rfind(',') if j < 0: j = len(part)-1 desVal = strip(part[:j]); s = desVal[0] if (s == '"' or s == '{'): desVal = desVal[1:-1] desVals.append(desVal) desKeys.append(lower(strip(part[j+1:]))) desVals[0] = strip(desVals[0].split('{')[1]) for i in range(len(desVals)): net.write(' %s = %s\n' % (desKeys[i],desVals[i])) net.write(' --- %s\n' % (bibType)) try: i = desKeys.index('title'); title = desVals[i].replace('"',"''") except ValueError: title = 'UNKNOWN' nam.write('%s "%s"\n' % (numRec,title)) try: i = desKeys.index('year'); year = desVals[i] except ValueError: year = 'UNKNOWN' net.write(' --- %s\n' % (year)) work = desVals[0] if wTab.has_key(work): work = 'UNKNOWN-'+str(numRec) wTab[work]=[numRec,bibType,year] try: i = desKeys.index('author'); authors = desVals[i] except ValueError: authors = 'UNKNOWN' authors = authors.split(' and ') links[work]=[] for author in authors: author = strip(author) net.write(' --- %s\n' % (author)) if aTab.has_key(author): aVal = aTab[author]; numAut = aVal[0] aTab[author] = [numAut,min(aVal[1],year),max(aVal[2],year)] else: numAut = len(aTab) aTab[author] = [numAut,year,year] links[work].append(numAut) def run(workdir,input): global net, nam, vec, clu, numRec, links, aTab, wTab pubs=['article','book','booklet','inbook','incollection', 'inproceedings','manual','mastersthesis','misc','phdthesis', 'proceedings','techreport','unpublished'] line=' ' try: # open input BiBTeX file bib = open(workdir+input,'r') except IOError , (errno, strerror): print "I/O error(%s): %s" % (errno, strerror) exit() print 'bibFile = ', workdir+input net = open(workdir+'bib.net','w') print 'networkFile = ', workdir+'bib.net' nam = open(workdir+'bib.nam','w') print 'titleFile = ', workdir+'bib.nam' nam.write('*vertices\n') vec = open(workdir+'bib.vec','w') print 'yearFile = ', workdir+'bib.vec' clu = open(workdir+'bib.clu','w') print 'typeFile = ', workdir+'bib.clu' links = {} wTab = {'0':['0']} aTab = {'0':['0']} numRec = 0; bibRec = ''; bibType = -1 numLine = 0; last = 0 while not last: line = bib.readline() if not line: line = '@'; last = 1 else: line = line.strip(); numLine += 1 if line != '': if line[0] == '@': # start of record if bibType >= 0: processRecord(bibRec,bibType) bibRec = '' head = line.split('{') head = replace(head[0],'@','') try: bibType = pubs.index(lower(head)) except ValueError: bibType = -1 print numLine, head if bibType >= 0: bibRec += line del wTab['0'] del aTab['0'] for w in wTab.keys(): t = wTab[w] net.write(' %s %s %s \"%s\"\n' % (t[0],t[1],t[2],w)) net.write('*authors\n') for a in aTab.keys(): t = aTab[a] net.write(' %s %s %s \"%s\"\n' % (t[0],t[1],t[2],a)) net.write('*Edgeslist\n') for w in links.keys(): net.write(' %s %s > ' % (w,wTab[w])) net.write('%s ' % links[w]) # for a in links[w]: net.write('%s ' % a) net.write('\n') print '# of records = %s lines = %s\n' % (numRec,numLine) net.flush(); nam.flush(); vec.flush(); clu.flush() bib.close(); net.close(); nam.close(); vec.close(); clu.close() # # run bib2pajek # if __name__ == '__main__': # run it from command line if len(sys.argv) == 3: run(argv[1],argv[2]) else: print "Module Bib2Pajek" print "Two arguments (WorkDir, BiBTeXfile) required to run !" exit() print else: # it is imported print "Module Bib2Pajek imported." print "To run, type: bib2paj04.run('D:\\vlado\\BibTeX\\','lexicon.bib')" print "where 'lexicon.bib' is your input BiBTeX file" #- End -------------------------------------------------------------------------------