import json
import unittest
import requests

from algoliasearch.search_client import SearchClient
from urllib.parse import urljoin

from tests.config import Config


SCICRUNCH_DOI_AGGREGATION = {
    "from": 0,
    "size": 0,
    "aggregations": {
        "doi": {
            "composite": {
                "size": 1000,
                "sources": [
                    {
                        "curie": {"terms": {"field": "item.curie.aggregate"}}
                    }
                ],
                "after": {"curie": ""}
            }
        }
    }
}

def checkResult(client, result1, result2, name_doi_map, name):
    not_found_doi = []
    for test_doi in result1:
        if f'doi:{test_doi}' not in result2:
            not_found_doi.append(test_doi)

    if len(not_found_doi):
        print(f"{name}: Not found datasets report:")
    for doi in not_found_doi:
        print(f"  {name_doi_map[doi]['id']} - {doi} - {name_doi_map[doi]['name']}")
    # Can everything in discover be found on SciCrunch?
    client.assertEqual([], not_found_doi, name)


class ComparisonTestCase(unittest.TestCase):

    def __init__(self, *args, **kwds):
        super().__init__(*args, **kwds)
        self.__scicrunch_doi = []
        headers = {'accept': 'application/json'}
        params = {'api_key': Config.SCICRUNCH_API_KEY}

        scicrunch_host = Config.SCICRUNCH_API_HOST + '/'

        scicrunch_response = requests.post(urljoin(scicrunch_host, '_search'), json=SCICRUNCH_DOI_AGGREGATION, params=params, headers=headers)
        self.assertEqual(200, scicrunch_response.status_code)

        json_data = scicrunch_response.json()
        aggregations = json_data['aggregations']
        buckets = aggregations['doi']['buckets']
        for bucket in buckets:
            self.__scicrunch_doi.append(bucket['key']['curie'])

    def test_doi_information(self):
        pennsieve_host = Config.PENNSIEVE_API_HOST + '/'

        scicrunch_doi = self.__scicrunch_doi

        headers = {'accept': 'application/json'}
        params = {'limit': 0, 'embargo': False}
        find_total_response = requests.get(urljoin(pennsieve_host, 'datasets'), params=params, headers=headers)
        self.assertEqual(200, find_total_response.status_code)

        test_response = requests.get(urljoin(pennsieve_host, f'organizations/{Config.SPARC_PENNSIEVE_ORGANISATION_ID}/datasets/metrics'), headers=headers)
        json_data = test_response.json()
        datasets = json_data['datasets']
        sparc_dataset_ids = []
        for dataset in datasets:
            sparc_dataset_ids.append(dataset['id'])

        json_data = find_total_response.json()
        total_count = json_data['totalCount']

        #break the call to datasets into multiple chunks as one single call
        #seems to be breaking the tests
        remain = total_count
        offset = 0
        count = 100
        discover_doi = []
        name_doi_map = {}
        while remain > 0:
            if count > remain:
                remain = 0
                count = remain
            remain = remain - count

            params = {'limit': count, 'offset': offset, 'embargo': False}
            response = requests.get(urljoin(pennsieve_host, 'datasets'), params=params, headers=headers)
            self.assertEqual(200, response.status_code)
            json_data = response.json()
            self.assertEqual(count, len(json_data['datasets']))
            for dataset in json_data['datasets']:
                if dataset['id'] in sparc_dataset_ids:
                    discover_doi.append(dataset['doi'])
                    name_doi_map[dataset['doi']] = {'name': dataset['name'], 'id': dataset['id']}
            offset = offset + count
        # self.assertEqual(total_count, len(discover_doi))

        checkResult(self, discover_doi, scicrunch_doi, name_doi_map, 'Pennsieve vs SciCrunch')


    def test_aloglia_information(self):
        scicrunch_doi = self.__scicrunch_doi

        client = SearchClient.create(Config.ALGOLIA_ID, Config.ALGOLIA_KEY)
        index = client.init_index(Config.ALGOLIA_INDEX)

        res = index.search('', {
            'attributesToRetrieve': [
                'item.curie',
                'item.name'
            ],
            'attributesToHighlight': [],
            'length': 1000,
            'offset': 0
        })

        self.assertFalse('status' in res)
        self.assertTrue('hits' in res)

        client.close()

        algolia_doi = []
        name_doi_map = {}
        for item in res['hits']:
            doi = item['item']['curie'].replace('DOI:', '', 1)
            algolia_doi.append(doi)
            name_doi_map[doi] = {'name': item['item']['name'], 'id': item['objectID']}

        checkResult(self, algolia_doi, scicrunch_doi, name_doi_map, 'Algolia vs SciCrunch')


if __name__ == '__main__':
    unittest.main()