a EgP#@sddlZddlZddlZddlmZddlmZddlmZddZ ddZ d d Z d d Z d dZ ddZddZddZddZddZddZddZddZdd ZdS)!N)Config)jsonifySKIPPED_OBJ_ATTRIBUTEScCs|dddS)NX)versionrrH/home/cmiss/Jenkins/workspace/SPARC-API/app/scicrunch_process_results.pyconvert_patch_to_X sr c Csg}|dd}t|D]n\}}z |dddd}t|}WntyXd}Yn0d|dd }td |}t|d }t|d } t||} t| d | d <|d| d<d|d vrdd|ddD| d<ng| d<z|dddd| d<Wnty"d| d<Yn0z|ddd| d<WntyXd| d<Yn0t | d| | | d| d=| | q|S)Nhits_sourceitemr keyword undefinedscicrunch_processing_v_._app.ZATTRIBUTES_MAPsort_files_by_mime_typedoiZtookobjectscSs6g|].}d|vr|ddddkr|ddqS)Zadditional_mimetypenamezabi.context-informationrdatasetpath)find).0filerrr $sz$_prepare_results..zabi-contextual-informationreadme descriptionrtitlefiles) enumerater KeyErrorreplace importlib import_modulegetattr_transform_attributes_convert_doi_to_urlkeys _remove_unused_files_informationupdateappend) resultsoutputr ihitr package_versionmZattributes_maprattrrrr _prepare_results sB           r7cCs.|sdS|D]}tD]}||vr||=qq dS)Nr)Zobj_listobjkeyrrr r-As r-cCst|ddt|dS)Nr total)Z numberOfHitsr0)rr7)r0rrr process_resultsKsr;c Cst|}|D]}d|vr t|ddkr zn|dddd}d}t|ddkrf|dddd}|d}|d}|d }t|||||d WStyYdS0q dS) Nzabi-scaffold-metadata-filerrrr!zabi-context-fileZdataset_identifierZdataset_versions3uri)ridr r<Z contextinfo)r7lenrr%)r0resultrZ context_infor=r r<rrr process_get_first_scaffold_infoOs  r@cCs8g}|dd}t|D]\}}||dqd|iS)Nr r r?)r$r/)r0processed_outputsr r2r3rrr reform_anatomy_resultscs  rBc Csg}t|}|D]j}z|d}t|}Wnty>d}Yn0d|dd}td|}t|d}|||qd|iS) Nr rrrrrprocess_resultr?)r7r r%r&r'r(r)r/)r0rAZ kb_resultsZ kb_resultr r4r5rCrrr reform_dataset_resultsls    rDcCsg}d|vr|d}|S)N aggregationsr)r0Zprocessed_resultsrrr reform_aggregation_results~srFcCs|s|S|ddS)NzDOI:zhttps://doi.org/)r&)rrrr r+sr+c Csri}|D]`\}}|d}d}t|D]:\}}t|tr(||vr(||}|dt|ur(|}q(|||<q |S)Nr F)itemsr$ isinstancedictr,r>) Z attributes_rZ found_attrkr6subsetZkey_attrnr9rrr r*s   r*cCsrd|vrnd|vrn|dD]T}|dd}t|d|}d}d|vrLt|d|}|dur\||d<|dur||d<q|S)N scaffoldszabi-scaffold-filerr=zabi-scaffold-thumbnailZ meta_file thumbnail)_extract_dataset_path_remote_id)r1scaffoldid_Zscaffold_meta_fileZscaffold_thumbnailrrr _manipulate_attrs     rScCsPd}||D]>}|dd|kr d}d|vr4|d}|dd|d}qLq |S)Nrr=r! identifierr)r remote_idr)datar9rRZextracted_dataZdataset_path_remote_idrUrrr rPs  rPc Csddgii}i}|dddD]|}z`d}d}t||d}|rL|d }d }d}t||d}|rr|d }|r|r|||<Wq tyYq Yq 0q |D]$}|||d } |dd| q|S) NuberonarrayrEnames_and_curiesbucketsz curie=(.*?),r!r9rGz name=(.*?),r=r)researchgroupr%r/) rVr? id_name_maprpatterncuriematchrr9pairrrr reform_curies_resultss6     rdcCsddgii}i}d|vr8|dD]}|d||d<q ntd|vr||dD],}|d||dd}|dd|qLnt|S) NrWrXnodesZlblr=edgesr8r[) BaseExceptionr/)rVr?r_rrcrrr reform_related_termss"   rh)r'jsonr\ app.configrflaskrZapp.scicrunch_processing_commonrr r7r-r;r@rBrDrFr+r*rSrPrdrhrrrr s&   4  )