[PATCH v2] fetch: Support GLEP 75 mirror structure

classic Classic list List threaded Threaded
3 messages Options
Reply | Threaded
Open this post in threaded view
|

[PATCH v2] fetch: Support GLEP 75 mirror structure

Michał Górny-5
Add a support for the subset of GLEP 75 needed by Gentoo Infra.  This
includes fetching and parsing layout.conf, and support for flat layout
and filename-hash layout with cutoffs being multiplies of 4.

Bug: https://bugs.gentoo.org/646898
Signed-off-by: Michał Górny <[hidden email]>
---
 lib/portage/package/ebuild/fetch.py | 139 +++++++++++++++++++++++++++-
 1 file changed, 135 insertions(+), 4 deletions(-)

Changes in v2: switched to a more classy layout to make the code
reusable in emirrordist.

diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py
index 227bf45ae..18e3d390a 100644
--- a/lib/portage/package/ebuild/fetch.py
+++ b/lib/portage/package/ebuild/fetch.py
@@ -7,12 +7,15 @@ __all__ = ['fetch']
 
 import errno
 import io
+import itertools
+import json
 import logging
 import random
 import re
 import stat
 import sys
 import tempfile
+import time
 
 from collections import OrderedDict
 
@@ -27,14 +30,17 @@ portage.proxy.lazyimport.lazyimport(globals(),
  'portage.package.ebuild.doebuild:doebuild_environment,' + \
  '_doebuild_spawn',
  'portage.package.ebuild.prepare_build_dirs:prepare_build_dirs',
+ 'portage.util.configparser:SafeConfigParser,read_configs,NoOptionError',
+ 'portage.util._urlopen:urlopen',
 )
 
 from portage import os, selinux, shutil, _encodings, \
  _movefile, _shell_quote, _unicode_encode
 from portage.checksum import (get_valid_checksum_keys, perform_md5, verify_all,
- _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter)
+ _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter,
+ checksum_str)
 from portage.const import BASH_BINARY, CUSTOM_MIRRORS_FILE, \
- GLOBAL_CONFIG_PATH
+ GLOBAL_CONFIG_PATH, CACHE_PATH
 from portage.data import portage_gid, portage_uid, secpass, userpriv_groups
 from portage.exception import FileNotFound, OperationNotPermitted, \
  PortageException, TryAgain
@@ -253,6 +259,130 @@ _size_suffix_map = {
  'Y' : 80,
 }
 
+
+class FlatLayout(object):
+ def get_path(self, filename):
+ return filename
+
+
+class FilenameHashLayout(object):
+ def __init__(self, algo, cutoffs):
+ self.algo = algo
+ self.cutoffs = [int(x) for x in cutoffs.split(':')]
+
+ def get_path(self, filename):
+ fnhash = checksum_str(filename.encode('utf8'), self.algo)
+ ret = ''
+ for c in self.cutoffs:
+ assert c % 4 == 0
+ c = c // 4
+ ret += fnhash[:c] + '/'
+ fnhash = fnhash[c:]
+ return ret + filename
+
+
+class MirrorLayoutConfig(object):
+ """
+ Class to read layout.conf from a mirror.
+ """
+
+ def __init__(self):
+ self.structure = ()
+
+ def read_from_file(self, f):
+ cp = SafeConfigParser()
+ read_configs(cp, [f])
+ vals = []
+ for i in itertools.count():
+ try:
+ vals.append(tuple(cp.get('structure', '%d' % i).split()))
+ except NoOptionError:
+ break
+ self.structure = tuple(vals)
+
+ def serialize(self):
+ return self.structure
+
+ def deserialize(self, data):
+ self.structure = data
+
+ @staticmethod
+ def validate_structure(val):
+ if val == ('flat',):
+ return True
+ if val[0] == 'filename-hash' and len(val) == 3:
+ if val[1] not in get_valid_checksum_keys():
+ return False
+ # validate cutoffs
+ for c in val[2].split(':'):
+ try:
+ c = int(c)
+ except ValueError:
+ break
+ else:
+ if c % 4 != 0:
+ break
+ else:
+ return True
+ return False
+ return False
+
+ def get_best_supported_layout(self):
+ for val in self.structure:
+ if self.validate_structure(val):
+ if val[0] == 'flat':
+ return FlatLayout()
+ elif val[0] == 'filename-hash':
+ return FilenameHashLayout(val[1], val[2])
+ else:
+ # fallback
+ return FlatLayout()
+
+
+def get_mirror_url(mirror_url, filename, eroot):
+ """
+ Get correct fetch URL for a given file, accounting for mirror
+ layout configuration.
+
+ @param mirror_url: Base URL to the mirror (without '/distfiles')
+ @param filename: Filename to fetch
+ @param eroot: EROOT to use for the cache file
+ @return: Full URL to fetch
+ """
+
+ mirror_conf = MirrorLayoutConfig()
+
+ cache_file = os.path.join(eroot, CACHE_PATH, 'mirror-metadata.json')
+ try:
+ with open(cache_file, 'r') as f:
+ cache = json.load(f)
+ except (IOError, ValueError):
+ cache = {}
+
+ ts, data = cache.get(mirror_url, (0, None))
+ # refresh at least daily
+ if ts >= time.time() - 86400:
+ mirror_conf.deserialize(data)
+ else:
+ try:
+ f = urlopen(mirror_url + '/distfiles/layout.conf')
+ try:
+ data = io.StringIO(f.read().decode('utf8'))
+ finally:
+ f.close()
+
+ mirror_conf.read_from_file(data)
+ except IOError:
+ pass
+
+ cache[mirror_url] = (time.time(), mirror_conf.serialize())
+ with open(cache_file, 'w') as f:
+ json.dump(cache, f)
+
+ return (mirror_url + "/distfiles/" +
+ mirror_conf.get_best_supported_layout().get_path(filename))
+
+
 def fetch(myuris, mysettings, listonly=0, fetchonly=0,
  locks_in_subdir=".locks", use_locks=1, try_mirrors=1, digests=None,
  allow_missing_digests=True):
@@ -434,8 +564,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
  for myfile, myuri in file_uri_tuples:
  if myfile not in filedict:
  filedict[myfile]=[]
- for y in range(0,len(locations)):
- filedict[myfile].append(locations[y]+"/distfiles/"+myfile)
+ for l in locations:
+ filedict[myfile].append(get_mirror_url(l, myfile,
+ mysettings["EROOT"]))
  if myuri is None:
  continue
  if myuri[:9]=="mirror://":
--
2.23.0


Reply | Threaded
Open this post in threaded view
|

Re: [PATCH v2] fetch: Support GLEP 75 mirror structure

Alec Warner-2


On Thu, Oct 3, 2019 at 9:37 AM Michał Górny <[hidden email]> wrote:
Add a support for the subset of GLEP 75 needed by Gentoo Infra.  This
includes fetching and parsing layout.conf, and support for flat layout
and filename-hash layout with cutoffs being multiplies of 4.

Bug: https://bugs.gentoo.org/646898
Signed-off-by: Michał Górny <[hidden email]>
---
 lib/portage/package/ebuild/fetch.py | 139 +++++++++++++++++++++++++++-
 1 file changed, 135 insertions(+), 4 deletions(-)

Changes in v2: switched to a more classy layout to make the code
reusable in emirrordist.

diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py
index 227bf45ae..18e3d390a 100644
--- a/lib/portage/package/ebuild/fetch.py
+++ b/lib/portage/package/ebuild/fetch.py
@@ -7,12 +7,15 @@ __all__ = ['fetch']

 import errno
 import io
+import itertools
+import json
 import logging
 import random
 import re
 import stat
 import sys
 import tempfile
+import time

 from collections import OrderedDict

@@ -27,14 +30,17 @@ portage.proxy.lazyimport.lazyimport(globals(),
        'portage.package.ebuild.doebuild:doebuild_environment,' + \
                '_doebuild_spawn',
        'portage.package.ebuild.prepare_build_dirs:prepare_build_dirs',
+       'portage.util.configparser:SafeConfigParser,read_configs,NoOptionError',
+       'portage.util._urlopen:urlopen',
 )

 from portage import os, selinux, shutil, _encodings, \
        _movefile, _shell_quote, _unicode_encode
 from portage.checksum import (get_valid_checksum_keys, perform_md5, verify_all,
-       _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter)
+       _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter,
+       checksum_str)
 from portage.const import BASH_BINARY, CUSTOM_MIRRORS_FILE, \
-       GLOBAL_CONFIG_PATH
+       GLOBAL_CONFIG_PATH, CACHE_PATH
 from portage.data import portage_gid, portage_uid, secpass, userpriv_groups
 from portage.exception import FileNotFound, OperationNotPermitted, \
        PortageException, TryAgain
@@ -253,6 +259,130 @@ _size_suffix_map = {
        'Y' : 80,
 }

+
+class FlatLayout(object):
+       def get_path(self, filename):
+               return filename
+
+
+class FilenameHashLayout(object):
+       def __init__(self, algo, cutoffs):
+               self.algo = algo
+               self.cutoffs = [int(x) for x in cutoffs.split(':')]
+
+       def get_path(self, filename):
+               fnhash = checksum_str(filename.encode('utf8'), self.algo)
+               ret = ''
+               for c in self.cutoffs:
+                       assert c % 4 == 0

I'm not quite sure what this assert is doing. I'm not super in favor of asserts (I'd rather see an exception like raise FooError("..."), but if you are going to use it please use something like:

assert c %4 == 0, "Some description of why we put this assert here so if it fires we can do something useful."

+                       c = c // 4
+                       ret += fnhash[:c] + '/'
+                       fnhash = fnhash[c:]
+               return ret + filename
+
+
+class MirrorLayoutConfig(object):
+       """
+       Class to read layout.conf from a mirror.
+       """
+
+       def __init__(self):
+               self.structure = ()
+
+       def read_from_file(self, f):
+               cp = SafeConfigParser()
+               read_configs(cp, [f])
+               vals = []
+               for i in itertools.count():
+                       try:
+                               vals.append(tuple(cp.get('structure', '%d' % i).split()))
+                       except NoOptionError:
+                               break
+               self.structure = tuple(vals)
+
+       def serialize(self):
+               return self.structure
+
+       def deserialize(self, data):
+               self.structure = data
+
+       @staticmethod
+       def validate_structure(val):
+               if val == ('flat',):
+                       return True
+               if val[0] == 'filename-hash' and len(val) == 3:
+                       if val[1] not in get_valid_checksum_keys():
+                               return False
+                       # validate cutoffs
+                       for c in val[2].split(':'):
+                               try:
+                                       c = int(c)
+                               except ValueError:
+                                       break
+                               else:
+                                       if c % 4 != 0:
+                                               break
+                       else:
+                               return True
+                       return False
+               return False
+
+       def get_best_supported_layout(self):
+               for val in self.structure:
+                       if self.validate_structure(val):
+                               if val[0] == 'flat':
+                                       return FlatLayout()
+                               elif val[0] == 'filename-hash':
+                                       return FilenameHashLayout(val[1], val[2])
+               else:
+                       # fallback
+                       return FlatLayout()
+
+
+def get_mirror_url(mirror_url, filename, eroot):
+       """
+       Get correct fetch URL for a given file, accounting for mirror
+       layout configuration.
+
+       @param mirror_url: Base URL to the mirror (without '/distfiles')
+       @param filename: Filename to fetch
+       @param eroot: EROOT to use for the cache file
+       @return: Full URL to fetch
+       """
+
+       mirror_conf = MirrorLayoutConfig()
+
+       cache_file = os.path.join(eroot, CACHE_PATH, 'mirror-metadata.json')
+       try:
+               with open(cache_file, 'r') as f:
+                       cache = json.load(f)
+       except (IOError, ValueError):
+               cache = {}
+
+       ts, data = cache.get(mirror_url, (0, None))
+       # refresh at least daily
+       if ts >= time.time() - 86400:
+               mirror_conf.deserialize(data)
+       else:
+               try:
+                       f = urlopen(mirror_url + '/distfiles/layout.conf')
+                       try:
+                               data = io.StringIO(f.read().decode('utf8'))
+                       finally:
+                               f.close()
+
+                       mirror_conf.read_from_file(data)
+               except IOError:
+                       pass
+
+               cache[mirror_url] = (time.time(), mirror_conf.serialize())
+               with open(cache_file, 'w') as f:
+                       json.dump(cache, f)
+
+       return (mirror_url + "/distfiles/" +
+                       mirror_conf.get_best_supported_layout().get_path(filename))
+
+
 def fetch(myuris, mysettings, listonly=0, fetchonly=0,
        locks_in_subdir=".locks", use_locks=1, try_mirrors=1, digests=None,
        allow_missing_digests=True):
@@ -434,8 +564,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
        for myfile, myuri in file_uri_tuples:
                if myfile not in filedict:
                        filedict[myfile]=[]
-                       for y in range(0,len(locations)):
-                               filedict[myfile].append(locations[y]+"/distfiles/"+myfile)
+                       for l in locations:
+                               filedict[myfile].append(get_mirror_url(l, myfile,
+                                               mysettings["EROOT"]))
                if myuri is None:
                        continue
                if myuri[:9]=="mirror://":
--
2.23.0


Reply | Threaded
Open this post in threaded view
|

Re: [PATCH v2] fetch: Support GLEP 75 mirror structure

Michał Górny-5
On Thu, 2019-10-03 at 22:01 -0700, Alec Warner wrote:

> On Thu, Oct 3, 2019 at 9:37 AM Michał Górny <[hidden email]> wrote:
>
> > Add a support for the subset of GLEP 75 needed by Gentoo Infra.  This
> > includes fetching and parsing layout.conf, and support for flat layout
> > and filename-hash layout with cutoffs being multiplies of 4.
> >
> > Bug: https://bugs.gentoo.org/646898
> > Signed-off-by: Michał Górny <[hidden email]>
> > ---
> >  lib/portage/package/ebuild/fetch.py | 139 +++++++++++++++++++++++++++-
> >  1 file changed, 135 insertions(+), 4 deletions(-)
> >
> > Changes in v2: switched to a more classy layout to make the code
> > reusable in emirrordist.
> >
> > diff --git a/lib/portage/package/ebuild/fetch.py
> > b/lib/portage/package/ebuild/fetch.py
> > index 227bf45ae..18e3d390a 100644
> > --- a/lib/portage/package/ebuild/fetch.py
> > +++ b/lib/portage/package/ebuild/fetch.py
> > @@ -7,12 +7,15 @@ __all__ = ['fetch']
> >
> >  import errno
> >  import io
> > +import itertools
> > +import json
> >  import logging
> >  import random
> >  import re
> >  import stat
> >  import sys
> >  import tempfile
> > +import time
> >
> >  from collections import OrderedDict
> >
> > @@ -27,14 +30,17 @@ portage.proxy.lazyimport.lazyimport(globals(),
> >         'portage.package.ebuild.doebuild:doebuild_environment,' + \
> >                 '_doebuild_spawn',
> >         'portage.package.ebuild.prepare_build_dirs:prepare_build_dirs',
> > +
> >  'portage.util.configparser:SafeConfigParser,read_configs,NoOptionError',
> > +       'portage.util._urlopen:urlopen',
> >  )
> >
> >  from portage import os, selinux, shutil, _encodings, \
> >         _movefile, _shell_quote, _unicode_encode
> >  from portage.checksum import (get_valid_checksum_keys, perform_md5,
> > verify_all,
> > -       _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter)
> > +       _filter_unaccelarated_hashes, _hash_filter, _apply_hash_filter,
> > +       checksum_str)
> >  from portage.const import BASH_BINARY, CUSTOM_MIRRORS_FILE, \
> > -       GLOBAL_CONFIG_PATH
> > +       GLOBAL_CONFIG_PATH, CACHE_PATH
> >  from portage.data import portage_gid, portage_uid, secpass,
> > userpriv_groups
> >  from portage.exception import FileNotFound, OperationNotPermitted, \
> >         PortageException, TryAgain
> > @@ -253,6 +259,130 @@ _size_suffix_map = {
> >         'Y' : 80,
> >  }
> >
> > +
> > +class FlatLayout(object):
> > +       def get_path(self, filename):
> > +               return filename
> > +
> > +
> > +class FilenameHashLayout(object):
> > +       def __init__(self, algo, cutoffs):
> > +               self.algo = algo
> > +               self.cutoffs = [int(x) for x in cutoffs.split(':')]
> > +
> > +       def get_path(self, filename):
> > +               fnhash = checksum_str(filename.encode('utf8'), self.algo)
> > +               ret = ''
> > +               for c in self.cutoffs:
> > +                       assert c % 4 == 0
> >
>
> I'm not quite sure what this assert is doing. I'm not super in favor of
> asserts (I'd rather see an exception like raise FooError("..."), but if you
> are going to use it please use something like:
>
> assert c %4 == 0, "Some description of why we put this assert here so if it
> fires we can do something useful."
It's already checked in validate_structure().  Maybe I could indirect it
to this class to make things clearer.

>
> +                       c = c // 4
> > +                       ret += fnhash[:c] + '/'
> > +                       fnhash = fnhash[c:]
> > +               return ret + filename
> > +
> > +
> > +class MirrorLayoutConfig(object):
> > +       """
> > +       Class to read layout.conf from a mirror.
> > +       """
> > +
> > +       def __init__(self):
> > +               self.structure = ()
> > +
> > +       def read_from_file(self, f):
> > +               cp = SafeConfigParser()
> > +               read_configs(cp, [f])
> > +               vals = []
> > +               for i in itertools.count():
> > +                       try:
> > +                               vals.append(tuple(cp.get('structure', '%d'
> > % i).split()))
> > +                       except NoOptionError:
> > +                               break
> > +               self.structure = tuple(vals)
> > +
> > +       def serialize(self):
> > +               return self.structure
> > +
> > +       def deserialize(self, data):
> > +               self.structure = data
> > +
> > +       @staticmethod
> > +       def validate_structure(val):
> > +               if val == ('flat',):
> > +                       return True
> > +               if val[0] == 'filename-hash' and len(val) == 3:
> > +                       if val[1] not in get_valid_checksum_keys():
> > +                               return False
> > +                       # validate cutoffs
> > +                       for c in val[2].split(':'):
> > +                               try:
> > +                                       c = int(c)
> > +                               except ValueError:
> > +                                       break
> > +                               else:
> > +                                       if c % 4 != 0:
> > +                                               break
> > +                       else:
> > +                               return True
> > +                       return False
> > +               return False
> > +
> > +       def get_best_supported_layout(self):
> > +               for val in self.structure:
> > +                       if self.validate_structure(val):
> > +                               if val[0] == 'flat':
> > +                                       return FlatLayout()
> > +                               elif val[0] == 'filename-hash':
> > +                                       return FilenameHashLayout(val[1],
> > val[2])
> > +               else:
> > +                       # fallback
> > +                       return FlatLayout()
> > +
> > +
> > +def get_mirror_url(mirror_url, filename, eroot):
> > +       """
> > +       Get correct fetch URL for a given file, accounting for mirror
> > +       layout configuration.
> > +
> > +       @param mirror_url: Base URL to the mirror (without '/distfiles')
> > +       @param filename: Filename to fetch
> > +       @param eroot: EROOT to use for the cache file
> > +       @return: Full URL to fetch
> > +       """
> > +
> > +       mirror_conf = MirrorLayoutConfig()
> > +
> > +       cache_file = os.path.join(eroot, CACHE_PATH,
> > 'mirror-metadata.json')
> > +       try:
> > +               with open(cache_file, 'r') as f:
> > +                       cache = json.load(f)
> > +       except (IOError, ValueError):
> > +               cache = {}
> > +
> > +       ts, data = cache.get(mirror_url, (0, None))
> > +       # refresh at least daily
> > +       if ts >= time.time() - 86400:
> > +               mirror_conf.deserialize(data)
> > +       else:
> > +               try:
> > +                       f = urlopen(mirror_url + '/distfiles/layout.conf')
> > +                       try:
> > +                               data = io.StringIO(f.read().decode('utf8'))
> > +                       finally:
> > +                               f.close()
> > +
> > +                       mirror_conf.read_from_file(data)
> > +               except IOError:
> > +                       pass
> > +
> > +               cache[mirror_url] = (time.time(), mirror_conf.serialize())
> > +               with open(cache_file, 'w') as f:
> > +                       json.dump(cache, f)
> > +
> > +       return (mirror_url + "/distfiles/" +
> > +
> >  mirror_conf.get_best_supported_layout().get_path(filename))
> > +
> > +
> >  def fetch(myuris, mysettings, listonly=0, fetchonly=0,
> >         locks_in_subdir=".locks", use_locks=1, try_mirrors=1, digests=None,
> >         allow_missing_digests=True):
> > @@ -434,8 +564,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
> >         for myfile, myuri in file_uri_tuples:
> >                 if myfile not in filedict:
> >                         filedict[myfile]=[]
> > -                       for y in range(0,len(locations)):
> > -
> >  filedict[myfile].append(locations[y]+"/distfiles/"+myfile)
> > +                       for l in locations:
> > +                               filedict[myfile].append(get_mirror_url(l,
> > myfile,
> > +                                               mysettings["EROOT"]))
> >                 if myuri is None:
> >                         continue
> >                 if myuri[:9]=="mirror://":
> > --
> > 2.23.0
> >
> >
> >
--
Best regards,
Michał Górny


signature.asc (631 bytes) Download Attachment