U
    fa·                     @   sb  d dl Zd dlZd dlZd dlZd dlZd dlZd dl	m
Z
mZ d dlma d dlmZ d dlmZ d dlmZ d dlmZ dd	 Zd
d Zdd Zdd Zdd ZdAddZdd ZdBddZdd Zdd Zdd Zdd  Z d!d" Z!d#d$ Z"dCd&d'Z#d(d) Z$d*d+ Z%dDd,d-Z&d.d/ Z'd0d1 Z(dEd3d4Z)d5d6 Z*d7d8 Z+d9d: Z,d;d< Z-d=d> Z.d?d@ Z/dS )F    N)Markdowndisplay)	scholarly)AbstractRetrieval)	FreeProxy)Works)SequenceMatcherc              	   C   s   t | d}| }W 5 Q R X |d}|D ]D}t|}|i kr,z|j|dd}W q,   tj|dgd}Y q,X q,|tj	}||d 
  jj}|j|dd |S )	NrenditemTignore_indexr   indexZspeakerinplaceopenreadsplitentry_to_dictappendpd	DataFramefillnanpNaNisnar   valuesdrop	file_namefilecontentZall_entriesentrydictiodfind r'   0/var/www/nanotud/old_database/modules_webpage.pyread_seminars   s    
r)   c           
   	   C   s  t | d}| }W 5 Q R X |ddd }|D ]t}|d}i }|D ]"}|dd}z|d d	kr`|d
kr`d	|dd kr`d|dd kr|dd  |d< nd|kr|jdddd  |d< nd|kr|jdddd  |d< nd|jddd}t|dkr&|jddd}t|dkrHtj||d  < n|d  ||d  < W qL   Y qLX qL|i kr4z|	j|dd}	W q4   t	j
|dgd}	Y q4X q4|	tj}	|	S )u  
    Reads publication from database given by the format of the webpage.
    
    Parameters
    ----------
    file_name : name of the database file
        Structure: Two columns (1 = property, 2 = value )
                   Entries are separated by 'enditem
##################################'
            Example:
                '##########################################################################
                AG GC
                cvselected	no
                firstlast yes
                #filename
                cover ../pubs/reprints/covers/2020_Krok.jpg
                year 2020
                title		Modification of titanium implants using biofunctional nanodiamonds for enhanced antimicrobial properties
                authors Emilia Krok, Sascha Balakin, Jonas Jung, Frank Gross, J&ouml;rg Opitz, <b>G. Cuniberti</b>
                pubdate 2020.03.02
                type		regular
                reference	<i>Nanotechnology</i> <b>31</b>, 205603 (2020)
                DOI		10.1088/1361-6528/ab6d9b/
                abstract	The present study describes a novel antimicrobial surface using anodic oxidation of titanium and biofunctional
                detonation nanodiamonds (ND). ND have been loaded with antibiotics (amoxicillin or ampicillin) using poly
                (diallyldimethylammonium chloride)(PDDA). Successful conjugation with PDDA was determined by dynamic light scattering, …
                enditem
                ###########################################################################
                ...'

    Returns
    -------
    DataFrame
        Containing all publication entries from 'file_name'. Each row contains one publication and
        columns its values. If one property is not set in file_name, its value is set to 'np.NaN'.
    r	   r
      
    	# r      abstract   journalfullname maxsplitarxivpasswdTr   r   )r   r   r   replacestriplenr   r   r   r   r   r   )
r    r!   r"   Zall_publicationsarticle
attributesr$   attr   r%   r'   r'   r(   read_publications$   s>    $


*r@   c                 C   s   |  d}i }|D ]}|dd}z|d dkr|dkrd|dd kr|j dd	d
}t|d	kr|j dd	d
}t|d	krtj||d  < q|d  ||d  < n|d  ||d  < W q   Y qX q|S )Nr,   r-   r.   r/   r0   r1   r      r*   r7   r6   )r   r:   r<   r   r   r;   )r#   r>   r$   r?   r   r'   r'   r(   r   q   s     
$r   c              	   C   s   t | d}| }W 5 Q R X |d}|D ]D}t|}|i kr,z|j|dd}W q,   tj|dgd}Y q,X q,|tj	}||d 
 |d 
 @  jj}|j|dd	 |S )
Nr	   r
   Tr   r   r   ZvornameZnachnamer   r   r   r'   r'   r(   read_addresses   s    
 rB   c           	   	   C   s   d}t | d}||jdddd  | D ]t\}}||   }| D ]J\}}t|dkr~||d t| d  qN||d t| d  qN|| q0W 5 Q R X d S )	N:enditem
#################################################
wr,   r*   r7      r.   		)r   writer   iterrowsr   itemsr<   str)	r    r%   	seperatorr!   r   rowto_writeZindex_wvaluer'   r'   r(   write_addresses   s    rO   c                 C   s   d || }tt| d S )Nz<span style=color:{}>{}</span>)formatr   r   )stringcolorZcolorstrr'   r'   r(   printmd   s    rS   c           	      C   s  d}d}d}g }| j D ]}| j| j}t|s@t|d}t|sR|dkrt| j| j}|| j|df< t|st	dt| d | j| j |d7 }t|r|
tj | j| jdkr| j| jd	krt	d
| j| j d| j| j d qt|}|d kr$|
| |d7 }qt| j| j}t|sJ||krX|
tj qt	d| d|  |d7 }|| j|df< t|}|d kr|
| |d7 }q|
tj qtdt| dt| ddd t	d| || d< | S )Nr   r6   r1   DOIzFound the DOI(z) for:r*   ZpatentZthesiszNo authorlist found for  ()zFound the correct DOI:z for the old one:z**z new DOIs has been found and z& incorrect DOIs have been corrected**.redrR   z4We also have created {} formated author list entriesauthors_new)r   locrT   r   r   rJ   r;   find_doi_for_titletitleprintr   r   r   typeget_authors_doirS   rP   )	r%   Z
N_new_doisZN_corrected_doisZN_authorlistrY   r&   rT   Zauthor_Znew_doir'   r'   r(   correct_authors_and_doi   sJ    


 (





"r`   	Cunibertic                 C   sn   z8t d|  ddd}t|jdkr6|jd j}|W S W n   Y nX t| |dd}|dkrd|d	 S tjS dS )
z.Looks for the DOI in Scopus and Crossref.
    ztitle(rV   T)verboser*   r   N)author_searchjournalrT   )ZScopusSearchr<   resultsdoifind_crossref_for_titler   r   )r\   Zauthor_presetZdocument_searchZ
doi_scopusitemr'   r'   r(   r[      s    
r[   c                 C   sh   d}t  }|d kr"|j| |d}n|j| ||d}|D ],}td |d d |  }|dkr6|  S q6d S )Ng        )bibliographicauthor)ri   rj   Zcontainer_titler\   r   g?)r   queryr   ratio)r\   rc   rd   simworksZw1rh   Zsim_newr'   r'   r(   rg      s    
rg   c              	   C   s<   z
t | W S    zt| W  Y S    Y Y d S X Y nX d S )N)format_authors_scopusformat_authors_crossref)rT   r'   r'   r(   r_     s    
r_   c           	      C   s  t  }|| }d}dd |d D }t|D ]@\}}|t|d d krT|d7 }|d  dkrl|d	7 }|d
  D ]}|d dkr|d dkrt|ddkr|dD ]}|t|d  d 7 }q|dd d }n|t|d  d 7 }qx|dt|d   d 7 }qx|d  dkr4|d }q.|d  r\|t|d  d 7 }q.|t|d d 7 }q.|dd }|S )z
    For a given DOI the authors will be searched in Crossref and will be
    formated in the following way:
    'E. Krok, S. Balakin, J. Jung, F. Gross, J. Opitz, and <b>G. Cuniberti</b>''
    r1   c                 S   s   g | ]}d |kr|qS )familyr'   ).0xr'   r'   r(   
<listcomp>  s      z+format_authors_crossref.<locals>.<listcomp>rj   r*   and rq   ra   <b>givenr   -   –.-Nr/   r6   . Cuniberti</b>, , r+   )	r   rf   	enumerater<   
capitalizer   html_codingupperisupper)	rT   rn   rh   rQ   author_listipersonfirstspr'   r'   r(   rp     s0    

rp   c                 C   st  t | }d}t|jD ]J\}}|t|jd kr:|d7 }|j dkrP|d7 }|jdkr|jdkr|j D ]}|d d	kr|d d
krt|d	dkr|d	D ]}|t|d 	 d 7 }q|dd d }n|t|d 	 d 7 }qr|d	t|d 	  d 7 }qr|j dkr,|d }q|j
 rP|t|j d 7 }q|t|jd 7 }q|dd }|S )z
    For a given DOI the authors will be searched in Scopus and formated in the following way:
    'E. Krok, S. Balakin, J. Jung, F. Gross, J. Opitz, and <b>G. Cuniberti</b>''
    r1   r*   ru   ra   rv   NNoner   rx   ry   rz   r/   r6   r{   r|   r}   r+   )r   r~   authorsr<   surnamer   Z
given_namer   r   r   r   )rT   r=   rQ   r   r   r   r   r'   r'   r(   ro   3  s.    
ro   c                 C   s   t t| } | S )z&Converts string to HTML entities.
    )nenamed_entitieshtmlescape)rQ   r'   r'   r(   r   U  s    r   c              O   C   s  d}dddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddN}t |d}| jD ]}| D ]\}}|| jkrt| j| | s| j| | dkst| j| |  dkrnj|d	krdt| j| d
 sd|	|| t| j| d
  d  n$|	|| t| j| |  d  qq|	| qW 5 Q R X t
d| dS )z
    Writes the Dataframe df in the structure needed by the webpage to be imported.
    The database is saved in the file 'filename'.
    rC   rF   r.   r6   )NAG
cvselectedr^   	firstlastcoveryearr\   r   pubdate	referencerT   r3   filenamenonmieiECEMPCover	publisher
submisdateISBNbbllxbbllybburxbburyfieldhyperrefr5   
journalrefarxiv	arxivmeshendpagerefreport_no	report_no	startpagevolumejournalacronymWCUISSNissuepageIF
acceptdate
revisedate
webpubdateESFZIHpudate	booktitleeditorsorder_itpubtownreference_morepISSNaddress	WCUTOPTEN
stampamelovjntref
openaccessAG2journalcover	arxivdate	commentcvaddmaterial
receivdateIFyearPRaccessioncodePRstatusURLIFrefplacemisurelibrarycongressseriescoverdetailsreferenceshortIEEEcnINSPECaneconophysicsother_order_itsenzaISBNlinkrD   r   r1   r   rY   r,   zDatabase written to file N)r   r   rI   columnsr   r   rZ   rJ   r;   rG   r]   )r%   r   rK   sepr!   r&   keyspacingr'   r'   r(   write_corrected_db\  s                                                            

F &&
r   Fc                 C   s   |dkrt   td td t| }t|}|jdgd}t }|jD ]}|j	|j
ddd}qL|j|d< |jd	ddd
 ||d    }|d d|d< |S )z
    Receives all publications from Google Scholar for a given searchfield
    
    Returns
    -------
        DataFrame
            DataFrame with all publications on Google Scholar with short info (title, year, bib)
    TzOIf nothing happen in the next minute your IP might already be blocked by GooglezTry: proxy, VPN, TOR, ...publications)sectionsFr   sort
google_pubr   )	ascendingr   r\   rJ   )use_proxy_scholarlyr]   r   Zsearch_authornextfillr   r   r   r   bibsort_valuesisnullastype)Zsearchfield	use_proxyZsearch_queryrj   Z
pub_googlepubr'   r'   r(   get_pub_google  s    



r   c                  C   sD   t d tdddddgd } tj| | d td	t|   d S )
NzLet's try to use a proxy...Tr*   ZDEZUSCA)randtimeoutZ
country_id)httphttpsz(**proxy is used to access Google with:**)r]   r   getr   r   rS   rJ   )proxyr'   r'   r(   r     s
    r   c                 C   s|   t ddd t }| jD ]>}t| d j|  |j| d |  jddd}t	
d	 q|d
 d|d
< t ddd |S )NzCFill all informations available at **Google** for the publications:bluerX   r\   r   TFr      r   intz**Finished**)rS   r   r   r   r]   rZ   r   r   r   timesleepr   )ZDF
pub_filledr   r'   r'   r(   google_details_publication  s    
 r   c                 C   s  g g g g g g g g f\}}}}}}}}	| d   }
| d   }| d   }td |  D ]\}}t|d  t|d ||d }|dkrz>t|d d	d
}z||j W n   |tj Y nX W n,   t	|d d  d}|tj Y nX ||d  ||d d  || |t
| |	t| z0|jdkrZ||j n||d d  W n<   z||d d  W n   |tj Y nX Y nX || dkrHzB|jdkrt|j| j|df< n|d dd | j|df< W nJ   z |d dd | j|df< W n   tj| j|df< Y nX Y nX |
| dkrzt|d | j|df< W n`   z6|jdkrtj| j|df< nt|j| j|df< W n   tj| j|df< Y nX Y nX || dkrXz|d | j|df< W n`   z6|jdkrtj| j|df< nt|j| j|df< W n   tj| j|df< Y nX Y nX d|d dd ksd|d dd krt||}|dkr||d  n(t|dkr|| n||d  n||d  qX|d ||d  |d ||d  |d ||d  |tj |	tj qXt|| d< t|| d< t|| d< t|| d< t|| d< t|| d< t|| d< | d t| d< t|	| d< t| } | S )z
    Function that gets DOI and title from Crossref and/or Scopus and writes it into the dataframe.
    If volume or issue is not given by Google it will search for it at Crossref and Scopus.
    r   numberpagesz%searching in crossref and scopus for:r\   rd   NrT   ZFULL)viewz8**not found in scopus**. API key and permission correct?r   zshort-container-titleTr   rx   r   u   …r3      z	Full-Text   	new_titlerh   pub_datejournal_short
cover_date
first_last)r   r]   rH   rg   r   r   Z	coverDater   r   rS   pub_date_crossfirst_last_authorZsourcetitle_abbreviationZstartingPagerJ   rZ   r   r   ZissueIdentifierget_abstractr<   arrayr   clean_abstracts)r   rj   Zdoi_listZ
title_listZ	item_listr   r  Zabstract_listr  r  Zvolume_conditionZnumber_conditionZpages_conditionelementrL   item_CRitem_scopusr3   r'   r'   r(   get_details_crossref_scopus  s    
   

  ,




r  c                 C   s   d}z| d  dd}| dd}| dd}| dd}t|dk r|dkrT|W S |jdkrv|jdkrn|W S |j}n|j}|W S |W S    |dkr| Y S |jdkr|jdkr| Y S |j}n|j}Y nX |S )	z
    Receives abstracts for publication from Crossref and Scopus, given the item found by Scopus and Crossref.
    
    Returns
    -------
        String
            abstract
    Nr3   z	<jats:p> r1   z<jats:p>z
 </jats:p>z	</jats:p>r   )r:   r<   descriptionr3   )r
  r  r3   r'   r'   r(   r  (  s2    	



r  c                 C   s"  | j D ]}d}| j| j}|d dkshd|dd kshd|d d kshd|dd kshd	|d d
 kr|ddkr|||dd d   d }nD|ddkr|||dd d   }n|||dd d   }n2d|dd  kr||d |d  }n|| }|| j|df< q| S )Nr1   r      ©Z	Copyright   u   c○   Z	COPYRIGHTzThis journal is   ZWeinheimr/      r,   ZSwitzerland   .r*   ir3   )r   rZ   r3   find)r%   numrQ   txtr'   r'   r(   r  Q  s    L r  add.database.papersc              P   C   s&  g }| j tjddd} d}ddddd	d
dddddddddg}ddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddddO}t|d0}|  D ]\}}z2t|d s*|d dkr*|d }	nt W n6   zt|d }	W n   t	|d }	Y nX Y nX t
|}
t|d }t|d }d|d d! kr||d"  || d#d$d%|d& d'd(t|d) ||	|d* |
|d+ |d" |d'g}t||D ]\}}||| d,  qqW 5 Q R X |S )-a=  
    Write publications from Google to filenname with the structure needed by the webpage of the chair.
    It aslo returns the urls of the articles with incomplete abstracts.
    'XXX' is written in the database where no data can be found. By searching for 'XXX' later we can complete
    those entries by hand.
    XXXT)regexrC   zAG		zcvselected	ztype		z
firstlast	z
#filename	z#cover		zyear		ztitle		z	authors		z	pubdate		z
reference	zDOI		z	hyperref	z	abstract	r
   rF   r.   r6   z	 )Or   r   r^   r   r   r   r\   r   r   r   rT   r3   r   r   r   r   r   r   r   r   r   r   r   r   r   r5   r   r   r   r9   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r   rD   rY   rh   rj   r   r3   z&hellip;iNurlZGCnoZtypeXXXr  r1   z../pubs/reprints/covers/XXXr   r   rT   r,   )r:   r   nanr   rH   r   r   
ValueErrorrp   format_authors_googleformat_referencer   r   rG   rJ   zip)r   r   Zurl_abstract_incompleterK   Zname_rowr   r!   r   rL   Zauthor_shortr   r   r3   r"   namerN   r'   r'   r(   add_pubs_to_filee  s    
                                                                 


 
      "r#  c              	   C   s   d}|dt | d  d 7 }z |dtt| d  d 7 }W n$   |dt| d  d 7 }Y nX z0|tt| d d	 tt| d
  d 7 }W n4   |t| d d	 tt| d
  d 7 }Y nX |S )Nr1   z<i>r  z</i>z <b>r   z</b> r   rU   r   rV   r   )r   rJ   r   )rL   rQ   r'   r'   r(   r     s     0.r   c                 C   s4   | d d d dks(| d d d dkr,dS dS dS )	z8 Checks if Cuniberti is either first or last author
    rj   r   rq   ra   r/   yesr  Nr'   )rh   r'   r'   r(   r    s    (r  c                 C   s4   t | D ]&\}}|dkr$t| qt| qdS )Nr   T)r~   
webbrowserr   Zopen_new_tab)Zurlsr   r  r'   r'   r(   	open_urls  s
    r&  c              	   C   s   z| d d d \}}}W n>   z| d d d \}}}W n   t j Y  Y S X Y nX tt|dkrvdt| }tt|dkrdt| }d|||S )Nzpublished-onlinez
date-partsr   Zissuedr*   0z{0}.{1}.{2})r   r   r<   rJ   rP   )rh   r   monthdayr'   r'   r(   r    s    r  c           	      C   s   d}| }| d}t|D ]\}}| dd }|t|d krH|d }|dkrX|d }| dd d }d}|D ]J}|d d	kr|d d
kr|t|d d 7 }qr|d	t|d  d 7 }qr|| t| }|dkr|d }|d }q|d d }|S )Nr1   z and r6   r/   r*   ra   rv   r   rx   ry   r{   z</b>r}   r+   )r   r~   r<   r   )	ZautrQ   r   r   r"  r   Zfirst_namesZfirst_shortr   r'   r'   r(   r    s*    

r  c              	   C   s   d}dddddddd	g}d
dd
d
d
dddg}t dd}| jD ]}| j| d dksf| j| d dkr>t|D ]X\}}t| j| | s| j| | dkrqn||||  t| j| |  d  qn|| q>q>W 5 Q R X d S )NrC   r^   r   r   r\   r   rT   r   r3   rF   r.   z			r6   Zdatabase_newrD   regularletterr   r,   )r   r   rZ   r~   r   r   rG   rJ   )r%   rK   r"   distr!   r&   r   contr'   r'   r(   write_corrected_database_short  s    
$&*r.  )N)ra   )F)ra   )r  )0pandasr   numpyr   r   namedentitiesr   r   r%  IPython.displayr   r   r   Zpybliometrics.scopusr   Zfp.fpr   Zcrossref.restfulr   difflibr   r)   r@   r   rB   rO   rS   r`   r[   rg   r_   rp   ro   r   r   r   r   r   r  r  r  r#  r   r  r&  r  r  r.  r'   r'   r'   r(   <module>   sL   M
-
$".
	
l)
>		