diff --git a/direct_test/servicex/__init__.py b/direct_test/direct_test/servicex/__init__.py similarity index 100% rename from direct_test/servicex/__init__.py rename to direct_test/direct_test/servicex/__init__.py diff --git a/direct_test/servicex/did_finder/__init__.py b/direct_test/direct_test/servicex/did_finder/__init__.py similarity index 100% rename from direct_test/servicex/did_finder/__init__.py rename to direct_test/direct_test/servicex/did_finder/__init__.py diff --git a/direct_test/servicex/did_finder/lookup_request.py b/direct_test/direct_test/servicex/did_finder/lookup_request.py similarity index 87% rename from direct_test/servicex/did_finder/lookup_request.py rename to direct_test/direct_test/servicex/did_finder/lookup_request.py index ee312d6..e14cf40 100644 --- a/direct_test/servicex/did_finder/lookup_request.py +++ b/direct_test/direct_test/servicex/did_finder/lookup_request.py @@ -20,19 +20,21 @@ def lookup_files(self): ds_size = 0 total_paths = 0 + avg_replicas = 0 for af in all_files: ds_size += af['file_size'] total_paths += len(af['file_path']) if self.prefix: af['file_path'] = [self.prefix+fp for fp in af['file_path']] - + if len(all_files): + avg_replicas = float(total_paths)/len(all_files) print( f"Dataset contains {len(all_files)} files.", f"Lookup took {str(lookup_finish-lookup_start)}", { 'requestId': self.request_id, 'size': ds_size, - 'avg_replicas': float(total_paths)/len(all_files) + 'avg_replicas': avg_replicas }) return all_files diff --git a/direct_test/servicex/did_finder/rucio_adapter.py b/direct_test/direct_test/servicex/did_finder/rucio_adapter.py similarity index 74% rename from direct_test/servicex/did_finder/rucio_adapter.py rename to direct_test/direct_test/servicex/did_finder/rucio_adapter.py index e639f22..c06e625 100644 --- a/direct_test/servicex/did_finder/rucio_adapter.py +++ b/direct_test/direct_test/servicex/did_finder/rucio_adapter.py @@ -1,5 +1,4 @@ import xmltodict -from rucio.common.exception import DataIdentifierNotFound class RucioAdapter: @@ -39,15 +38,15 @@ def list_files_for_did(self, did): together with checksum and filesize. """ parsed_did = self.parse_did(did) - try: - reps = self.replica_client.list_replicas( - [{'scope': parsed_did['scope'], 'name': parsed_did['name']}], - schemes=['root'], - metalink=True, - sort='geoip' - ) - g_files = [] - d = xmltodict.parse(reps) + reps = self.replica_client.list_replicas( + [{'scope': parsed_did['scope'], 'name': parsed_did['name']}], + schemes=['root'], + metalink=True, + sort='geoip' + ) + g_files = [] + d = xmltodict.parse(reps) + if 'file' in d['metalink']: for f in d['metalink']['file']: g_files.append( { @@ -57,7 +56,4 @@ def list_files_for_did(self, did): 'file_path': self.get_paths(f['url']) } ) - return g_files - except DataIdentifierNotFound: - self.logger.warning(f"{did} not found") - return None + return g_files diff --git a/direct_test/test1.py b/direct_test/direct_test/test1.py similarity index 76% rename from direct_test/test1.py rename to direct_test/direct_test/test1.py index e4fb781..fd2ed3a 100644 --- a/direct_test/test1.py +++ b/direct_test/direct_test/test1.py @@ -12,6 +12,17 @@ replica_client = ReplicaClient() rucio_adapter = RucioAdapter(replica_client) + +scope = 'dataset' +name = 'unexisting' +ds = scope+':'+name +lookup_request = LookupRequest( + did=ds, + rucio_adapter=rucio_adapter, + request_id='request-id' +) +lookup_request.lookup_files() + scope = 'data15_13TeV' name = 'data15_13TeV.00283429.physics_Main.deriv.DAOD_PHYS.r9264_p3083_p4165_tid21568817_00' ds = scope+':'+name @@ -20,5 +31,4 @@ rucio_adapter=rucio_adapter, request_id='request-id' ) - lookup_request.lookup_files() diff --git a/src/servicex/did_finder/lookup_request.py b/src/servicex/did_finder/lookup_request.py index 0021ecc..34aeae9 100644 --- a/src/servicex/did_finder/lookup_request.py +++ b/src/servicex/did_finder/lookup_request.py @@ -61,11 +61,10 @@ async def lookup_files(self): lookup_start = datetime.now() all_files = self.rucio_adapter.list_files_for_did(self.did) lookup_finish = datetime.now() - if not all_files: - raise Exception(f'DID not found {self.did}') ds_size = 0 total_paths = 0 + avg_replicas = 0 for af in all_files: ds_size += af['file_size'] total_paths += len(af['file_path']) @@ -77,12 +76,15 @@ async def lookup_files(self): """ af['file_path'] = af['file_path'][0] + if len(all_files): + avg_replicas = float(total_paths)/len(all_files) + self.logger.info(f"Dataset contains {len(all_files)} files. " + f"Lookup took {str(lookup_finish-lookup_start)}", extra={ 'requestId': self.request_id, 'size': ds_size, - 'avg_replicas': float(total_paths)/len(all_files) + 'avg_replicas': avg_replicas }) return all_files diff --git a/src/servicex/did_finder/rucio_adapter.py b/src/servicex/did_finder/rucio_adapter.py index 5dd440f..643a19c 100644 --- a/src/servicex/did_finder/rucio_adapter.py +++ b/src/servicex/did_finder/rucio_adapter.py @@ -29,8 +29,6 @@ import logging import xmltodict -from rucio.common.exception import DataIdentifierNotFound - class RucioAdapter: def __init__(self, replica_client): @@ -73,15 +71,15 @@ def list_files_for_did(self, did): together with checksum and filesize. """ parsed_did = self.parse_did(did) - try: - reps = self.replica_client.list_replicas( - [{'scope': parsed_did['scope'], 'name': parsed_did['name']}], - schemes=['root'], - metalink=True, - sort='geoip' - ) - g_files = [] - d = xmltodict.parse(reps) + reps = self.replica_client.list_replicas( + [{'scope': parsed_did['scope'], 'name': parsed_did['name']}], + schemes=['root'], + metalink=True, + sort='geoip' + ) + g_files = [] + d = xmltodict.parse(reps) + if 'file' in d['metalink']: for f in d['metalink']['file']: g_files.append( { @@ -91,7 +89,4 @@ def list_files_for_did(self, did): 'file_path': self.get_paths(f['url']) } ) - return g_files - except DataIdentifierNotFound: - self.logger.warning(f"{did} not found") - return None + return g_files diff --git a/tests/did_finder/test_lookup_request.py b/tests/did_finder/test_lookup_request.py index a435310..85219dd 100644 --- a/tests/did_finder/test_lookup_request.py +++ b/tests/did_finder/test_lookup_request.py @@ -38,60 +38,39 @@ def test_init(self, mocker): assert request.rucio_adapter == mock_rucio assert request.did == 'my-did' - @pytest - def test_lookup_files(self, mocker): - 'Good lookup, chunk size is same as file size' + def test_lookup_files(self, mocker): + 'Good lookup, chunk size is same as file size' + mock_rucio = mocker.MagicMock(RucioAdapter) + rucio_file_list = [ + { + "scope": "my-scope", + "name": "file"+str(i), + "bytes": 31400, + "events": 5000 + } for i in range(10) + ] + + mock_rucio.list_files_for_did.return_value = rucio_file_list + + mock_rucio.find_replicas.return_value = [ + { + 'adler32': 21231, + 'bytes': 1122233344, + } + ] + + request = LookupRequest("my-did", mock_rucio) + + assert len(request.lookup_files()) == 10 + + mock_rucio.list_files_for_did.assert_called_with("my-did") + + @pytest.mark.asyncio + async def test_lookup_files_empty_did(self, mocker): + 'Make sure that a DID with zero files correctly returns zero files' mock_rucio = mocker.MagicMock(RucioAdapter) - rucio_file_list = [ - { - "scope": "my-scope", - "name": "file"+str(i), - "bytes": 31400, - "events": 5000 - } for i in range(10) - ] + mock_rucio.list_files_for_did.return_value = [] - mock_rucio.list_files_for_did.return_value = rucio_file_list - - mock_rucio.find_replicas.return_value = [ - { - 'adler32': 21231, - 'bytes': 1122233344, - } - ] - - request = LookupRequest( - "my-did", - mock_rucio, - chunk_size=1) - - assert len(request.lookup_files()) == 10 - - mock_rucio.list_files_for_did.assert_called_with("my-did") - - @pytest - def test_lookup_files_empty_did(self, mocker): - 'Make sure that a DID with zero files correctly returns zero files from its iterator' - mock_rucio = mocker.MagicMock(RucioAdapter) - mock_rucio.list_files_for_did.return_value = iter([]) - - request = LookupRequest( - "my-did", - mock_rucio) - - assert len(request.lookup_files()) == 0 - - @pytest - def test_lookup_files_did_not_found(self, mocker): - 'Make sure exception is thrown if DID is not found' - mock_rucio = mocker.MagicMock(RucioAdapter) - mock_rucio.list_files_for_did.return_value = None - - request = LookupRequest( - "my-did", - mock_rucio) - - with pytest.raises(Exception) as e: - request.lookup_files() + request = LookupRequest("my-did", mock_rucio) - assert "DID not found" in str(e) + assert len(await request.lookup_files()) == 0