o S=h/#@sddlZddlZddlmZddlmZddZddZdd Zd d Z d d Z ddZ ddZ ddZ ddZddZddZddZddZddZdS) N)jsonifySKIPPED_OBJ_ATTRIBUTEScCs|dddS)NX)versionrrL/home/cmiss/Jenkins/workspace/SPARC-API-DEV/app/scicrunch_process_results.py_convert_patch_to_xsr c Csg}|dd}t|D]\}}z|dddd}t|}Wn ty+d}Ynwd|dd }td |}t|d }t|d } t||} t| d | d <|d| d<d|d vrpdd|ddD| d<ng| d<z|dddd| d<Wn tyd| d<Ynwz |ddd| d<Wn tyd| d<Ynwt | d| | | d| d=| | q |S)Nhits_sourceitemrkeyword undefinedscicrunch_processing_v_._app.ZATTRIBUTES_MAPsort_files_by_mime_typedoiZtookobjectscSs6g|]}d|vr|ddddkr|ddqS)Zadditional_mimetypenamezabi.context-informationrdatasetpath)find).0filerrr $s  z$_prepare_results..zabi-contextual-informationreadme descriptionrtitlefiles) enumerater KeyErrorreplace importlib import_modulegetattr_transform_attributes_convert_doi_to_urlkeys _remove_unused_files_informationupdateappend) resultsoutputr ihitrpackage_versionmZattributes_maprattrrrr _prepare_results sH               r6cCs.|sdS|D]}tD] }||vr||=q qdS)Nr)Zobj_listobjkeyrrr r,Asr,cCst|ddt|dS)Nr total)Z numberOfHitsr/)rr6)r/rrr process_resultsKsr:c Cst|}|D]Q}d|vrWt|ddkrWz7|dddd}d}t|ddkr3|dddd}|d}|d}|d }t|||||d WStyVYdSwqdS) Nzabi-scaffold-metadata-filerrrr zabi-context-fileZdataset_identifierZdataset_versions3uri)ridrr;Z contextinfo)r6lenrr$)r/resultrZ context_infor<rr;rrr process_get_first_scaffold_infoPs" r?cCs8g}|dd}t|D] \}}||dq d|iS)Nr r r>)r#r.)r/processed_outputsr r1r2rrr reform_anatomy_resultses  rAc Csg}t|}|D]5}z |d}t|}Wn tyd}Ynwd|dd}td|}t|d}|||qd|iS) Nrrrrrrprocess_resultr>)r6r r$r%r&r'r(r.)r/r@Z kb_resultsZ kb_resultrr3r4rBrrr reform_dataset_resultsns   rCcCsg}d|vr |d}|S)N aggregationsr)r/Zprocessed_resultsrrr reform_aggregation_resultssrEcCs|s|S|ddS)NzDOI:zhttps://doi.org/)r%)rrrr r*s r*c Csri}|D]0\}}|d}d}t|D]\}}t|tr1||vr1||}|dt|ur1|}q|||<q|S)Nr F)itemsr# isinstancedictr+r=) Z attributes_rZ found_attrkr5subsetZkey_attrnr8rrr r)s   r)cCsrd|vr7d|vr7|dD]*}|dd}t|d|}d}d|vr&t|d|}|dur.||d<|dur6||d<q |S)N scaffoldszabi-scaffold-filerr<zabi-scaffold-thumbnailZ meta_file thumbnail)_extract_dataset_path_remote_id)r0scaffoldid_Zscaffold_meta_fileZscaffold_thumbnailrrr _manipulate_attrs    rRcCsRd}||D] }|dd|kr&d}d|vr|d}|dd|d}|Sq|S)Nrr<r identifierr)r remote_idr)datar8rQZextracted_dataZdataset_path_remote_idrTrrr rOs   rOc Csddgii}i}|dddD];}z0d}d}t||d}|r&|d }d }d}t||d}|r9|d }|rA|rA|||<WqtyKYqw|D]}|||d } |dd| qN|S) NuberonarrayrDnames_and_curiesbucketsz curie=(.*?),r r8rFz name=(.*?),r<r)researchgroupr$r.) rUr> id_name_mapr patterncuriematchrr8pairrrr reform_curies_resultss:   rccCsddgii}i}d|vr|dD] }|d||d<qntd|vr?|dD]}|d||dd}|dd|q&|St) NrVrWnodesZlblr<edgesr7rZ) BaseExceptionr.)rUr>r^r rbrrr reform_related_termss$   rg)r&r[flaskrZapp.scicrunch_processing_commonrr r6r,r:r?rArCrEr*r)rRrOrcrgrrrr s$  4   )