#!/usr/bin/python
#------------------------------------------------------------------------------
#
#   BiBTeX -> Pajek konverter
#
#   Seminarska naloga za predmet Operacijske raziskave pri prof. Batagelju
#   Copy(L)eft , November 2001
#   Stojan Rancic <stojan.rancic@campus.fri.uni-lj.si>
#
#------------------------------------------------------------------------------

from string import strip,split,replace,lower            # Inicializacija funkcij.
from sys import argv,exit                               # Nastejemo, katere zunanje
import re                                               # funkcije bomo uporabljali

#
# Procedura blok
#
# Glavna funkcija, procesira BiBTeX bloke in iz njih izlusci naziv literature
# in avtorje
# Parameter: vrstica, v kateri je tip literature in njen naziv
# Rezultat: ni
#

def blok(v):
   global VertCnt,WorkTable, bigtable, bf,AuthString       # Definicija globalnih
                                                           # spremenljivk
   temp=split(v,'{')                # Naziv publikacije je v prebrani vrstici
   if len(temp)==1:                 # ali pa v naslednji
           v=strip(bf.readline())
           temp=split(v,'{')
   ArticleId=strip(replace(temp[1],',',''))        # Poiscemo naziv literature ..
   if WorkTable.has_key(ArticleId): # Ce najdemo obstojec kljuc,
           return                   # je to duplikat, preskocimo
   VertCnt+=1
   WorkTable[ArticleId]=VertCnt     # ..in ga dodamo v asociativno tabelo tock
   line2=''
   while (line2[-1:] != '}'):       # Beremo do konca bloka
      line = bf.readline()
      line2=strip(line)
      sline2=split(line2)
      if line2 != '':
         if lower(sline2[0]) == 'author':           # poiscemo vrstico, ki se zacne na 'author'
            authorz = replace(line,'\n','')
            authorz = AuthString.sub('',authorz)    # in samo besedo 'author'
            if ( line2[-2:] == '},' or line2[-2:] == '",' or line2[-1:]=='}' or (line[-2:-1]==',' and line.count('"')==0)): # Ce se vrstica z avtorji konca z }, ali ",
               pass                                 # so to vsi avtorji, sicer beremo se naslednje vrstice
#              print "Imamo celega avtorja"
            else:
#              print "Avtor se lomi !"
               EndOfAuthor = 0
               while EndOfAuthor == 0:
                  line = bf.readline()
                  line2=strip(line)
                  authorz = authorz + ' ' + replace(line2,'\n','')
                  if (line2[-2:] == '",' or line2[-2:] == '},'):
                     EndOfAuthor = 1
#           print "Avtorji so: %s" % authorz
            auth = replace(replace(authorz,'",',''),'},','')        # Pobrisemo nezeljene znake iz
            auth = strip(replace(replace(auth,'=',''),'#',''))      # niza avtorjev
#           if (re.match('^"|^{',auth)):
            if (auth[0]=='"' or auth[0]=='{'):
                    auth = auth[1:]                         # pobrisemo zacetni " ali {
#           print auth
            authtab=split(auth,' and ')             # Izluscimo posamezne avtorje
                                                    # in jih dodamo v asociativno
            for a in range(len(authtab)):           # tabelo skupaj z literaturo
               if bigtable.has_key(strip(authtab[a])):
                  bigtable[strip(authtab[a])].append(WorkTable[ArticleId])
               else:
                  bigtable[strip(authtab[a])]=[WorkTable[ArticleId]]

#
# Glavni del programa
#
def run(workdir,input):
   global VertCnt,bigtable,WorkTable,line,pubs,bf,AuthString
   pubs=['article','book','booklet','inbook','incollection',
         'inproceedings','manual','mastersthesis','misc','phdthesis',
         'proceedings','techreport','unpublished']
   VertCnt,bigtable,WorkTable,AuthTable,line=0,{'0':['0']},{'0':['0']},{'0':['0']},' '     # Nastavitev zacetnih spremenljivk
   AuthString = re.compile('author', re.I) # Objekt za iskanje besede 'author', case insensitive
   try:
      bf = open(workdir+input,'r')       # Preberemo ime BiBTeX datoteke iz ukazne vrstice
   except IOError , (errno, strerror):
      print "I/O error(%s): %s" % (errno, strerror)
      exit()
   print 'bibFile = ', workdir+input
   while line != '':             # Beremo vrstico za vrstico..
      line = bf.readline()
      vrsta = strip(line)
      if vrsta != '':
         if vrsta[0] == '@':     # Ce najdemo zacetek bloka
            sv=split(vrsta,'{')
            pub=replace(sv[0],'@','')
            if pubs.count(lower(pub))>0:    # in ce je to pravi blok
               blok(vrsta)       # ga sprocesiramo
   bf.close
   del bigtable['0']             # Pobrisemo odvecne elemente
   del WorkTable['0']
   del AuthTable['0']
   BooksCount=VertCnt
   for a in bigtable.keys():     # Napolnimo avtorsko hash tabelo
      VertCnt+=1
      AuthTable[a]=VertCnt
   print 'pajekFile = ', workdir+'output.net'
   pf = open(workdir+'output.net','w')   # Kreiramo Pajkovo datoteko
   pf.write('*Vertices %s %s\n' % (VertCnt,BooksCount))
   work_table=[]
   auth_table=[]
   for a in range(BooksCount+1):
      work_table.append('')
   for a in range(VertCnt+1):
      auth_table.append('')
   for b in WorkTable.keys():
      work_table[WorkTable[b]]=b
   for b in AuthTable.keys():
      auth_table[AuthTable[b]]=b
   for a in range(1,BooksCount+1):
      pf.write('  %s \"%s\" ic Green\n' % (a,work_table[a]))
   for a in range(BooksCount+1,VertCnt+1):
      pf.write('  %s \"%s\" ic Red\n' % (a,auth_table[a]))
   pf.write('*Edgeslist\n')
   for a in range(BooksCount+1,VertCnt+1):
      pf.write('  %s ' % a)
      for c in range(len(bigtable[auth_table[a]])):
         pf.write('%s ' % bigtable[auth_table[a]][c])
      pf.write('\n')
   pf.close

#
# Inicializacija
#
if __name__ == '__main__':
   if len(sys.argv) == 3:
      run(argv[1])
   else:
      print "Two arguments (WorkDir, BiBTeXfile) required to run !"
      exit()
   print
else:
   print "Module Bib2Pajek imported."
   print "To run, type: bib2pajek.run('D:\\vlado\\BibTeX\\','lexicon.bib')"
   print "Where 'lexicon.bib' is your input BiBTeX file"

#- End -------------------------------------------------------------------------------