[PATCH v3] fetch: Support GLEP 75 mirror structure

classic Classic list List threaded Threaded
1 message Options
Reply | Threaded
Open this post in threaded view
|

[PATCH v3] fetch: Support GLEP 75 mirror structure

Michał Górny-5
Add a support for the subset of GLEP 75 needed by Gentoo Infra.  This
includes fetching and parsing layout.conf, and support for flat layout
and filename-hash layout with cutoffs being multiplies of 4.

Bug: https://bugs.gentoo.org/646898
Signed-off-by: Michał Górny <[hidden email]>
---
 lib/portage/package/ebuild/fetch.py    | 158 ++++++++++++++++++++++++-
 lib/portage/tests/ebuild/test_fetch.py |  94 ++++++++++++++-
 2 files changed, 247 insertions(+), 5 deletions(-)

Chages in v3:
- mirrors are evaluated lazily (i.e. Portage doesn't fetch layouts
  for all mirrors prematurely),
- garbage layout.conf is handled gracefully,
- cache updates are done atomically,
- layout.conf argument verification has been moved to invidual classes,
- a few unit and integration tests have been added.

diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py
index 227bf45ae..fa250535f 100644
--- a/lib/portage/package/ebuild/fetch.py
+++ b/lib/portage/package/ebuild/fetch.py
@@ -6,13 +6,17 @@ from __future__ import print_function
 __all__ = ['fetch']
 
 import errno
+import functools
 import io
+import itertools
+import json
 import logging
 import random
 import re
 import stat
 import sys
 import tempfile
+import time
 
 from collections import OrderedDict
 
@@ -27,14 +31,19 @@ portage.proxy.lazyimport.lazyimport(globals(),
  'portage.package.ebuild.doebuild:doebuild_environment,' + \
  '_doebuild_spawn',
  'portage.package.ebuild.prepare_build_dirs:prepare_build_dirs',
+ 'portage.util:atomic_ofstream',
+ 'portage.util.configparser:SafeConfigParser,read_configs,' +
+ 'NoOptionError,ConfigParserError',
+ 'portage.util._urlopen:urlopen',
 )
 
 from portage import os, selinux, shutil, _encodings, \
  _movefile, _shell_quote, _unicode_encode
 from portage.checksum import (get_valid_checksum_keys, perform_md5, verify_all,
- _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter)
+ _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter,
+ checksum_str)
 from portage.const import BASH_BINARY, CUSTOM_MIRRORS_FILE, \
- GLOBAL_CONFIG_PATH
+ GLOBAL_CONFIG_PATH, CACHE_PATH
 from portage.data import portage_gid, portage_uid, secpass, userpriv_groups
 from portage.exception import FileNotFound, OperationNotPermitted, \
  PortageException, TryAgain
@@ -253,6 +262,144 @@ _size_suffix_map = {
  'Y' : 80,
 }
 
+
+class FlatLayout(object):
+ def get_path(self, filename):
+ return filename
+
+ @staticmethod
+ def verify_args(args):
+ return len(args) == 1
+
+
+class FilenameHashLayout(object):
+ def __init__(self, algo, cutoffs):
+ self.algo = algo
+ self.cutoffs = [int(x) for x in cutoffs.split(':')]
+
+ def get_path(self, filename):
+ fnhash = checksum_str(filename.encode('utf8'), self.algo)
+ ret = ''
+ for c in self.cutoffs:
+ assert c % 4 == 0
+ c = c // 4
+ ret += fnhash[:c] + '/'
+ fnhash = fnhash[c:]
+ return ret + filename
+
+ @staticmethod
+ def verify_args(args):
+ if len(args) != 3:
+ return False
+ if args[1] not in get_valid_checksum_keys():
+ return False
+ # argsidate cutoffs
+ for c in args[2].split(':'):
+ try:
+ c = int(c)
+ except ValueError:
+ break
+ else:
+ if c % 4 != 0:
+ break
+ else:
+ return True
+ return False
+
+
+class MirrorLayoutConfig(object):
+ """
+ Class to read layout.conf from a mirror.
+ """
+
+ def __init__(self):
+ self.structure = ()
+
+ def read_from_file(self, f):
+ cp = SafeConfigParser()
+ read_configs(cp, [f])
+ vals = []
+ for i in itertools.count():
+ try:
+ vals.append(tuple(cp.get('structure', '%d' % i).split()))
+ except NoOptionError:
+ break
+ self.structure = tuple(vals)
+
+ def serialize(self):
+ return self.structure
+
+ def deserialize(self, data):
+ self.structure = data
+
+ @staticmethod
+ def validate_structure(val):
+ if val[0] == 'flat':
+ return FlatLayout.verify_args(val)
+ if val[0] == 'filename-hash':
+ return FilenameHashLayout.verify_args(val)
+ return False
+
+ def get_best_supported_layout(self):
+ for val in self.structure:
+ if self.validate_structure(val):
+ if val[0] == 'flat':
+ return FlatLayout(*val[1:])
+ elif val[0] == 'filename-hash':
+ return FilenameHashLayout(*val[1:])
+ else:
+ # fallback
+ return FlatLayout()
+
+
+def get_mirror_url(mirror_url, filename, eroot):
+ """
+ Get correct fetch URL for a given file, accounting for mirror
+ layout configuration.
+
+ @param mirror_url: Base URL to the mirror (without '/distfiles')
+ @param filename: Filename to fetch
+ @param eroot: EROOT to use for the cache file
+ @return: Full URL to fetch
+ """
+
+ mirror_conf = MirrorLayoutConfig()
+
+ cache_file = os.path.join(eroot, CACHE_PATH, 'mirror-metadata.json')
+ try:
+ with open(cache_file, 'r') as f:
+ cache = json.load(f)
+ except (IOError, ValueError):
+ cache = {}
+
+ ts, data = cache.get(mirror_url, (0, None))
+ # refresh at least daily
+ if ts >= time.time() - 86400:
+ mirror_conf.deserialize(data)
+ else:
+ try:
+ f = urlopen(mirror_url + '/distfiles/layout.conf')
+ try:
+ data = io.StringIO(f.read().decode('utf8'))
+ finally:
+ f.close()
+
+ try:
+ mirror_conf.read_from_file(data)
+ except ConfigParserError:
+ pass
+ except IOError:
+ pass
+
+ cache[mirror_url] = (time.time(), mirror_conf.serialize())
+ f = atomic_ofstream(cache_file, 'w')
+ json.dump(cache, f)
+ f.close()
+
+ return (mirror_url + "/distfiles/" +
+ mirror_conf.get_best_supported_layout().get_path(filename))
+
+
 def fetch(myuris, mysettings, listonly=0, fetchonly=0,
  locks_in_subdir=".locks", use_locks=1, try_mirrors=1, digests=None,
  allow_missing_digests=True):
@@ -434,8 +581,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
  for myfile, myuri in file_uri_tuples:
  if myfile not in filedict:
  filedict[myfile]=[]
- for y in range(0,len(locations)):
- filedict[myfile].append(locations[y]+"/distfiles/"+myfile)
+ for l in locations:
+ filedict[myfile].append(functools.partial(
+ get_mirror_url, l, myfile, mysettings["EROOT"]))
  if myuri is None:
  continue
  if myuri[:9]=="mirror://":
@@ -895,6 +1043,8 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
  tried_locations = set()
  while uri_list:
  loc = uri_list.pop()
+ if isinstance(loc, functools.partial):
+ loc = loc()
  # Eliminate duplicates here in case we've switched to
  # "primaryuri" mode on the fly due to a checksum failure.
  if loc in tried_locations:
diff --git a/lib/portage/tests/ebuild/test_fetch.py b/lib/portage/tests/ebuild/test_fetch.py
index 83321fed7..f2254c468 100644
--- a/lib/portage/tests/ebuild/test_fetch.py
+++ b/lib/portage/tests/ebuild/test_fetch.py
@@ -4,6 +4,7 @@
 from __future__ import unicode_literals
 
 import functools
+import io
 import tempfile
 
 import portage
@@ -11,12 +12,14 @@ from portage import shutil, os
 from portage.tests import TestCase
 from portage.tests.resolver.ResolverPlayground import ResolverPlayground
 from portage.tests.util.test_socks5 import AsyncHTTPServer
+from portage.util.configparser import ConfigParserError
 from portage.util.futures.executor.fork import ForkExecutor
 from portage.util._async.SchedulerInterface import SchedulerInterface
 from portage.util._eventloop.global_event_loop import global_event_loop
 from portage.package.ebuild.config import config
 from portage.package.ebuild.digestgen import digestgen
-from portage.package.ebuild.fetch import _download_suffix
+from portage.package.ebuild.fetch import (_download_suffix, FlatLayout,
+ FilenameHashLayout, MirrorLayoutConfig)
 from _emerge.EbuildFetcher import EbuildFetcher
 from _emerge.Package import Package
 
@@ -228,3 +231,92 @@ class EbuildFetchTestCase(TestCase):
  finally:
  shutil.rmtree(ro_distdir)
  playground.cleanup()
+
+ def test_flat_layout(self):
+ self.assertTrue(FlatLayout.verify_args(('flat',)))
+ self.assertFalse(FlatLayout.verify_args(('flat', 'extraneous-arg')))
+ self.assertEqual(FlatLayout().get_path('foo-1.tar.gz'), 'foo-1.tar.gz')
+
+ def test_filename_hash_layout(self):
+ self.assertFalse(FilenameHashLayout.verify_args(('filename-hash',)))
+ self.assertTrue(FilenameHashLayout.verify_args(('filename-hash', 'SHA1', '8')))
+ self.assertFalse(FilenameHashLayout.verify_args(('filename-hash', 'INVALID-HASH', '8')))
+ self.assertTrue(FilenameHashLayout.verify_args(('filename-hash', 'SHA1', '4:8:12')))
+ self.assertFalse(FilenameHashLayout.verify_args(('filename-hash', 'SHA1', '3')))
+ self.assertFalse(FilenameHashLayout.verify_args(('filename-hash', 'SHA1', 'junk')))
+ self.assertFalse(FilenameHashLayout.verify_args(('filename-hash', 'SHA1', '4:8:junk')))
+
+ self.assertEqual(FilenameHashLayout('SHA1', '4').get_path('foo-1.tar.gz'),
+ '1/foo-1.tar.gz')
+ self.assertEqual(FilenameHashLayout('SHA1', '8').get_path('foo-1.tar.gz'),
+ '19/foo-1.tar.gz')
+ self.assertEqual(FilenameHashLayout('SHA1', '8:16').get_path('foo-1.tar.gz'),
+ '19/c3b6/foo-1.tar.gz')
+ self.assertEqual(FilenameHashLayout('SHA1', '8:16:24').get_path('foo-1.tar.gz'),
+ '19/c3b6/37a94b/foo-1.tar.gz')
+
+ def test_mirror_layout_config(self):
+ mlc = MirrorLayoutConfig()
+ self.assertEqual(mlc.serialize(), ())
+ self.assertIsInstance(mlc.get_best_supported_layout(), FlatLayout)
+
+ conf = '''
+[structure]
+0=flat
+'''
+ mlc.read_from_file(io.StringIO(conf))
+ self.assertEqual(mlc.serialize(), (('flat',),))
+ self.assertIsInstance(mlc.get_best_supported_layout(), FlatLayout)
+ self.assertEqual(mlc.get_best_supported_layout().get_path('foo-1.tar.gz'),
+ 'foo-1.tar.gz')
+
+ conf = '''
+[structure]
+0=filename-hash SHA1 8:16
+1=flat
+'''
+ mlc.read_from_file(io.StringIO(conf))
+ self.assertEqual(mlc.serialize(), (
+ ('filename-hash', 'SHA1', '8:16'),
+ ('flat',)
+ ))
+ self.assertIsInstance(mlc.get_best_supported_layout(), FilenameHashLayout)
+ self.assertEqual(mlc.get_best_supported_layout().get_path('foo-1.tar.gz'),
+ '19/c3b6/foo-1.tar.gz')
+ serialized = mlc.serialize()
+
+ # test fallback
+ conf = '''
+[structure]
+0=filename-hash INVALID-HASH 8:16
+1=filename-hash SHA1 32
+2=flat
+'''
+ mlc.read_from_file(io.StringIO(conf))
+ self.assertEqual(mlc.serialize(), (
+ ('filename-hash', 'INVALID-HASH', '8:16'),
+ ('filename-hash', 'SHA1', '32'),
+ ('flat',)
+ ))
+ self.assertIsInstance(mlc.get_best_supported_layout(), FilenameHashLayout)
+ self.assertEqual(mlc.get_best_supported_layout().get_path('foo-1.tar.gz'),
+ '19c3b637/foo-1.tar.gz')
+
+ # test deserialization
+ mlc.deserialize(serialized)
+ self.assertEqual(mlc.serialize(), (
+ ('filename-hash', 'SHA1', '8:16'),
+ ('flat',)
+ ))
+ self.assertIsInstance(mlc.get_best_supported_layout(), FilenameHashLayout)
+ self.assertEqual(mlc.get_best_supported_layout().get_path('foo-1.tar.gz'),
+ '19/c3b6/foo-1.tar.gz')
+
+ # test erraneous input
+ conf = '''
+[#(*DA*&*F
+[structure]
+0=filename-hash SHA1 32
+'''
+ self.assertRaises(ConfigParserError, mlc.read_from_file,
+ io.StringIO(conf))
--
2.23.0