Skip to content
This repository has been archived by the owner on Oct 1, 2022. It is now read-only.

Commit

Permalink
fixing tests
Browse files Browse the repository at this point in the history
  • Loading branch information
Ilija Vukotic committed Sep 20, 2021
1 parent ad7efd4 commit 0e71b62
Show file tree
Hide file tree
Showing 8 changed files with 73 additions and 89 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,19 +20,21 @@ def lookup_files(self):

ds_size = 0
total_paths = 0
avg_replicas = 0
for af in all_files:
ds_size += af['file_size']
total_paths += len(af['file_path'])
if self.prefix:
af['file_path'] = [self.prefix+fp for fp in af['file_path']]

if len(all_files):
avg_replicas = float(total_paths)/len(all_files)
print(
f"Dataset contains {len(all_files)} files.",
f"Lookup took {str(lookup_finish-lookup_start)}",
{
'requestId': self.request_id,
'size': ds_size,
'avg_replicas': float(total_paths)/len(all_files)
'avg_replicas': avg_replicas
})

return all_files
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import xmltodict
from rucio.common.exception import DataIdentifierNotFound


class RucioAdapter:
Expand Down Expand Up @@ -39,15 +38,15 @@ def list_files_for_did(self, did):
together with checksum and filesize.
"""
parsed_did = self.parse_did(did)
try:
reps = self.replica_client.list_replicas(
[{'scope': parsed_did['scope'], 'name': parsed_did['name']}],
schemes=['root'],
metalink=True,
sort='geoip'
)
g_files = []
d = xmltodict.parse(reps)
reps = self.replica_client.list_replicas(
[{'scope': parsed_did['scope'], 'name': parsed_did['name']}],
schemes=['root'],
metalink=True,
sort='geoip'
)
g_files = []
d = xmltodict.parse(reps)
if 'file' in d['metalink']:
for f in d['metalink']['file']:
g_files.append(
{
Expand All @@ -57,7 +56,4 @@ def list_files_for_did(self, did):
'file_path': self.get_paths(f['url'])
}
)
return g_files
except DataIdentifierNotFound:
self.logger.warning(f"{did} not found")
return None
return g_files
12 changes: 11 additions & 1 deletion direct_test/test1.py → direct_test/direct_test/test1.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,17 @@

replica_client = ReplicaClient()
rucio_adapter = RucioAdapter(replica_client)

scope = 'dataset'
name = 'unexisting'
ds = scope+':'+name
lookup_request = LookupRequest(
did=ds,
rucio_adapter=rucio_adapter,
request_id='request-id'
)
lookup_request.lookup_files()

scope = 'data15_13TeV'
name = 'data15_13TeV.00283429.physics_Main.deriv.DAOD_PHYS.r9264_p3083_p4165_tid21568817_00'
ds = scope+':'+name
Expand All @@ -20,5 +31,4 @@
rucio_adapter=rucio_adapter,
request_id='request-id'
)

lookup_request.lookup_files()
8 changes: 5 additions & 3 deletions src/servicex/did_finder/lookup_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,11 +61,10 @@ async def lookup_files(self):
lookup_start = datetime.now()
all_files = self.rucio_adapter.list_files_for_did(self.did)
lookup_finish = datetime.now()
if not all_files:
raise Exception(f'DID not found {self.did}')

ds_size = 0
total_paths = 0
avg_replicas = 0
for af in all_files:
ds_size += af['file_size']
total_paths += len(af['file_path'])
Expand All @@ -77,12 +76,15 @@ async def lookup_files(self):
"""
af['file_path'] = af['file_path'][0]

if len(all_files):
avg_replicas = float(total_paths)/len(all_files)

self.logger.info(f"Dataset contains {len(all_files)} files. " +
f"Lookup took {str(lookup_finish-lookup_start)}",
extra={
'requestId': self.request_id,
'size': ds_size,
'avg_replicas': float(total_paths)/len(all_files)
'avg_replicas': avg_replicas
})

return all_files
25 changes: 10 additions & 15 deletions src/servicex/did_finder/rucio_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,6 @@
import logging
import xmltodict

from rucio.common.exception import DataIdentifierNotFound


class RucioAdapter:
def __init__(self, replica_client):
Expand Down Expand Up @@ -73,15 +71,15 @@ def list_files_for_did(self, did):
together with checksum and filesize.
"""
parsed_did = self.parse_did(did)
try:
reps = self.replica_client.list_replicas(
[{'scope': parsed_did['scope'], 'name': parsed_did['name']}],
schemes=['root'],
metalink=True,
sort='geoip'
)
g_files = []
d = xmltodict.parse(reps)
reps = self.replica_client.list_replicas(
[{'scope': parsed_did['scope'], 'name': parsed_did['name']}],
schemes=['root'],
metalink=True,
sort='geoip'
)
g_files = []
d = xmltodict.parse(reps)
if 'file' in d['metalink']:
for f in d['metalink']['file']:
g_files.append(
{
Expand All @@ -91,7 +89,4 @@ def list_files_for_did(self, did):
'file_path': self.get_paths(f['url'])
}
)
return g_files
except DataIdentifierNotFound:
self.logger.warning(f"{did} not found")
return None
return g_files
87 changes: 33 additions & 54 deletions tests/did_finder/test_lookup_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,60 +38,39 @@ def test_init(self, mocker):
assert request.rucio_adapter == mock_rucio
assert request.did == 'my-did'

@pytest
def test_lookup_files(self, mocker):
'Good lookup, chunk size is same as file size'
def test_lookup_files(self, mocker):
'Good lookup, chunk size is same as file size'
mock_rucio = mocker.MagicMock(RucioAdapter)
rucio_file_list = [
{
"scope": "my-scope",
"name": "file"+str(i),
"bytes": 31400,
"events": 5000
} for i in range(10)
]

mock_rucio.list_files_for_did.return_value = rucio_file_list

mock_rucio.find_replicas.return_value = [
{
'adler32': 21231,
'bytes': 1122233344,
}
]

request = LookupRequest("my-did", mock_rucio)

assert len(request.lookup_files()) == 10

mock_rucio.list_files_for_did.assert_called_with("my-did")

@pytest.mark.asyncio
async def test_lookup_files_empty_did(self, mocker):
'Make sure that a DID with zero files correctly returns zero files'
mock_rucio = mocker.MagicMock(RucioAdapter)
rucio_file_list = [
{
"scope": "my-scope",
"name": "file"+str(i),
"bytes": 31400,
"events": 5000
} for i in range(10)
]
mock_rucio.list_files_for_did.return_value = []

mock_rucio.list_files_for_did.return_value = rucio_file_list

mock_rucio.find_replicas.return_value = [
{
'adler32': 21231,
'bytes': 1122233344,
}
]

request = LookupRequest(
"my-did",
mock_rucio,
chunk_size=1)

assert len(request.lookup_files()) == 10

mock_rucio.list_files_for_did.assert_called_with("my-did")

@pytest
def test_lookup_files_empty_did(self, mocker):
'Make sure that a DID with zero files correctly returns zero files from its iterator'
mock_rucio = mocker.MagicMock(RucioAdapter)
mock_rucio.list_files_for_did.return_value = iter([])

request = LookupRequest(
"my-did",
mock_rucio)

assert len(request.lookup_files()) == 0

@pytest
def test_lookup_files_did_not_found(self, mocker):
'Make sure exception is thrown if DID is not found'
mock_rucio = mocker.MagicMock(RucioAdapter)
mock_rucio.list_files_for_did.return_value = None

request = LookupRequest(
"my-did",
mock_rucio)

with pytest.raises(Exception) as e:
request.lookup_files()
request = LookupRequest("my-did", mock_rucio)

assert "DID not found" in str(e)
assert len(await request.lookup_files()) == 0

0 comments on commit 0e71b62

Please sign in to comment.