#!/usr/bin/python #------------------------------------------------------------------------------ # # BiBTeX -> Pajek konverter # # Seminarska naloga za predmet Operacijske raziskave pri prof. Batagelju # Copy(L)eft , November 2001 # Stojan Rancic # #------------------------------------------------------------------------------ from string import strip,split,replace,lower # Inicializacija funkcij. from sys import argv,exit # Nastejemo, katere zunanje import re # funkcije bomo uporabljali # # Procedura blok # # Glavna funkcija, procesira BiBTeX bloke in iz njih izlusci naziv literature # in avtorje # Parameter: vrstica, v kateri je tip literature in njen naziv # Rezultat: ni # def blok(v): global VertCnt,WorkTable, bigtable, bf,AuthString # Definicija globalnih # spremenljivk temp=split(v,'{') # Naziv publikacije je v prebrani vrstici if len(temp)==1: # ali pa v naslednji v=strip(bf.readline()) temp=split(v,'{') ArticleId=strip(replace(temp[1],',','')) # Poiscemo naziv literature .. if WorkTable.has_key(ArticleId): # Ce najdemo obstojec kljuc, return # je to duplikat, preskocimo VertCnt+=1 WorkTable[ArticleId]=VertCnt # ..in ga dodamo v asociativno tabelo tock line2='' while (line2[-1:] != '}'): # Beremo do konca bloka line = bf.readline() line2=strip(line) sline2=split(line2) if line2 != '': if lower(sline2[0]) == 'author': # poiscemo vrstico, ki se zacne na 'author' authorz = replace(line,'\n','') authorz = AuthString.sub('',authorz) # in samo besedo 'author' if ( line2[-2:] == '},' or line2[-2:] == '",' or line2[-1:]=='}' or (line[-2:-1]==',' and line.count('"')==0)): # Ce se vrstica z avtorji konca z }, ali ", pass # so to vsi avtorji, sicer beremo se naslednje vrstice # print "Imamo celega avtorja" else: # print "Avtor se lomi !" EndOfAuthor = 0 while EndOfAuthor == 0: line = bf.readline() line2=strip(line) authorz = authorz + ' ' + replace(line2,'\n','') if (line2[-2:] == '",' or line2[-2:] == '},'): EndOfAuthor = 1 # print "Avtorji so: %s" % authorz auth = replace(replace(authorz,'",',''),'},','') # Pobrisemo nezeljene znake iz auth = strip(replace(replace(auth,'=',''),'#','')) # niza avtorjev # if (re.match('^"|^{',auth)): if (auth[0]=='"' or auth[0]=='{'): auth = auth[1:] # pobrisemo zacetni " ali { # print auth authtab=split(auth,' and ') # Izluscimo posamezne avtorje # in jih dodamo v asociativno for a in range(len(authtab)): # tabelo skupaj z literaturo if bigtable.has_key(strip(authtab[a])): bigtable[strip(authtab[a])].append(WorkTable[ArticleId]) else: bigtable[strip(authtab[a])]=[WorkTable[ArticleId]] # # Glavni del programa # def run(workdir,input): global VertCnt,bigtable,WorkTable,line,pubs,bf,AuthString pubs=['article','book','booklet','inbook','incollection', 'inproceedings','manual','mastersthesis','misc','phdthesis', 'proceedings','techreport','unpublished'] VertCnt,bigtable,WorkTable,AuthTable,line=0,{'0':['0']},{'0':['0']},{'0':['0']},' ' # Nastavitev zacetnih spremenljivk AuthString = re.compile('author', re.I) # Objekt za iskanje besede 'author', case insensitive try: bf = open(workdir+input,'r') # Preberemo ime BiBTeX datoteke iz ukazne vrstice except IOError , (errno, strerror): print "I/O error(%s): %s" % (errno, strerror) exit() print 'bibFile = ', workdir+input while line != '': # Beremo vrstico za vrstico.. line = bf.readline() vrsta = strip(line) if vrsta != '': if vrsta[0] == '@': # Ce najdemo zacetek bloka sv=split(vrsta,'{') pub=replace(sv[0],'@','') if pubs.count(lower(pub))>0: # in ce je to pravi blok blok(vrsta) # ga sprocesiramo bf.close del bigtable['0'] # Pobrisemo odvecne elemente del WorkTable['0'] del AuthTable['0'] BooksCount=VertCnt for a in bigtable.keys(): # Napolnimo avtorsko hash tabelo VertCnt+=1 AuthTable[a]=VertCnt print 'pajekFile = ', workdir+'output.net' pf = open(workdir+'output.net','w') # Kreiramo Pajkovo datoteko pf.write('*Vertices %s %s\n' % (VertCnt,BooksCount)) work_table=[] auth_table=[] for a in range(BooksCount+1): work_table.append('') for a in range(VertCnt+1): auth_table.append('') for b in WorkTable.keys(): work_table[WorkTable[b]]=b for b in AuthTable.keys(): auth_table[AuthTable[b]]=b for a in range(1,BooksCount+1): pf.write(' %s \"%s\" ic Green\n' % (a,work_table[a])) for a in range(BooksCount+1,VertCnt+1): pf.write(' %s \"%s\" ic Red\n' % (a,auth_table[a])) pf.write('*Edgeslist\n') for a in range(BooksCount+1,VertCnt+1): pf.write(' %s ' % a) for c in range(len(bigtable[auth_table[a]])): pf.write('%s ' % bigtable[auth_table[a]][c]) pf.write('\n') pf.close # # Inicializacija # if __name__ == '__main__': if len(sys.argv) == 3: run(argv[1]) else: print "Two arguments (WorkDir, BiBTeXfile) required to run !" exit() print else: print "Module Bib2Pajek imported." print "To run, type: bib2pajek.run('D:\\vlado\\BibTeX\\','lexicon.bib')" print "Where 'lexicon.bib' is your input BiBTeX file" #- End -------------------------------------------------------------------------------