import os
import unittest
import elasticsearch
from topik.fileio.base_output import load_output
from topik.fileio.reader import read_input
from topik.fileio.tests import test_data_path
from topik.fileio.out_elastic import ElasticSearchOutput
from topik.fileio.out_memory import InMemoryOutput
INDEX = "topik_unittest"
SAVE_FILENAME = "test_save.topikdata"
CONTENT_FIELD = "abstract"
# make logging quiet during testing, to keep Travis CI logs short.
import logging
logging.basicConfig()
logging.getLogger('elasticsearch').setLevel(logging.ERROR)
logging.getLogger('urllib3').setLevel(logging.ERROR)
[docs]class BaseOutputTest(object):
test_raw_data = None
[docs] def test_get_filtered_data(self):
data = list(self.test_raw_data.get_filtered_data(CONTENT_FIELD))
self.assertEqual(len(data), 100)
self.assertFalse(data[0] == data[1])
[docs] def test_save_file(self):
self.test_raw_data.save(SAVE_FILENAME)
self.assertTrue(os.path.exists(SAVE_FILENAME))
os.remove(SAVE_FILENAME)
[docs] def test_load_file(self):
self.test_raw_data.save(SAVE_FILENAME)
self.test_raw_data = load_output(SAVE_FILENAME)
data = list(self.test_raw_data.get_filtered_data(CONTENT_FIELD))
self.assertEqual(len(data), 100)
os.remove(SAVE_FILENAME)
[docs] def test_get_date_filtered_data(self):
result_list = list(self.test_raw_data.get_date_filtered_data(field_to_get=CONTENT_FIELD,
start=1975,
end=1999,
filter_field="year"))
self.assertEqual(len(result_list), 25)
self.assertTrue(-1611117933394825767 in [int(item[0]) for item in
result_list])
[docs]class TestInMemoryOutput(unittest.TestCase, BaseOutputTest):
[docs] def setUp(self):
self.test_raw_data = InMemoryOutput()
self.test_raw_data.import_from_iterable(read_input(
'{}/test_data_json_stream.json'.format(test_data_path)),
field_to_hash=CONTENT_FIELD)
[docs]class TestElasticSearchOutput(unittest.TestCase, BaseOutputTest):
[docs] def setUp(self):
self.test_raw_data = ElasticSearchOutput(
source='localhost',
index=INDEX,
content_field='abstract'
)
self.test_raw_data.import_from_iterable(read_input(
'{}/test_data_json_stream.json'.format(test_data_path)),
field_to_hash=CONTENT_FIELD)
[docs] def tearDown(self):
instance = elasticsearch.Elasticsearch("localhost")
instance.indices.delete(INDEX)
if instance.indices.exists("{}_year_alias_date".format(INDEX)):
instance.indices.delete("{}_year_alias_date".format(INDEX))