[PATCH] Improve handling of percent-signs in SRC_URI

classic Classic list List threaded Threaded
12 messages Options
Reply | Threaded
Open this post in threaded view
|

[PATCH] Improve handling of percent-signs in SRC_URI

Mike Gilbert-2
Unquote the URL basename when fetching from upstream.
Quote the filename when fetching from mirrors.

Bug: https://bugs.gentoo.org/719810
Signed-off-by: Mike Gilbert <[hidden email]>
---
 lib/portage/dbapi/porttree.py       | 7 ++++++-
 lib/portage/package/ebuild/fetch.py | 9 +++++++--
 2 files changed, 13 insertions(+), 3 deletions(-)

diff --git a/lib/portage/dbapi/porttree.py b/lib/portage/dbapi/porttree.py
index 08af17bcd..984263039 100644
--- a/lib/portage/dbapi/porttree.py
+++ b/lib/portage/dbapi/porttree.py
@@ -55,6 +55,11 @@ try:
 except ImportError:
  from urlparse import urlparse
 
+try:
+ from urllib.parse import unquote as urlunquote
+except ImportError:
+ from urllib import unquote as urlunquote
+
 if sys.hexversion >= 0x3000000:
  # pylint: disable=W0622
  basestring = str
@@ -1547,7 +1552,7 @@ def _parse_uri_map(cpv, metadata, use=None):
  myuris.pop()
  distfile = myuris.pop()
  else:
- distfile = os.path.basename(uri)
+ distfile = urlunquote(os.path.basename(uri))
  if not distfile:
  raise portage.exception.InvalidDependString(
  ("getFetchMap(): '%s' SRC_URI has no file " + \
diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py
index 28e7caf53..47c3ad28f 100644
--- a/lib/portage/package/ebuild/fetch.py
+++ b/lib/portage/package/ebuild/fetch.py
@@ -26,6 +26,11 @@ try:
 except ImportError:
  from urlparse import urlparse
 
+try:
+ from urllib.parse import quote as urlquote
+except ImportError:
+ from urllib import quote as urlquote
+
 import portage
 portage.proxy.lazyimport.lazyimport(globals(),
  'portage.package.ebuild.config:check_config_instance,config',
@@ -351,7 +356,7 @@ _size_suffix_map = {
 
 class FlatLayout(object):
  def get_path(self, filename):
- return filename
+ return urlquote(filename)
 
  def get_filenames(self, distdir):
  for dirpath, dirnames, filenames in os.walk(distdir,
@@ -382,7 +387,7 @@ class FilenameHashLayout(object):
  c = c // 4
  ret += fnhash[:c] + '/'
  fnhash = fnhash[c:]
- return ret + filename
+ return ret + urlquote(filename)
 
  def get_filenames(self, distdir):
  pattern = ''
--
2.27.0.rc2


Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Improve handling of percent-signs in SRC_URI

Mike Gilbert-2
On Sun, May 31, 2020 at 9:17 AM Mike Gilbert <[hidden email]> wrote:
>
> Unquote the URL basename when fetching from upstream.

I ran "repoman manifest" for the entire Gentoo repo, and this affected
one package that had a space encoded as "%20" in SRC_URI.

Fixed with this commit:
https://gitweb.gentoo.org/repo/gentoo.git/commit/?id=f5467ab161757777fe9095f0b5a0a1c8e56e4250

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Improve handling of percent-signs in SRC_URI

Zac Medico-2
In reply to this post by Mike Gilbert-2
On 5/31/20 6:17 AM, Mike Gilbert wrote:

> Unquote the URL basename when fetching from upstream.
> Quote the filename when fetching from mirrors.
>
> Bug: https://bugs.gentoo.org/719810
> Signed-off-by: Mike Gilbert <[hidden email]>
> ---
>  lib/portage/dbapi/porttree.py       | 7 ++++++-
>  lib/portage/package/ebuild/fetch.py | 9 +++++++--
>  2 files changed, 13 insertions(+), 3 deletions(-)
>
> diff --git a/lib/portage/dbapi/porttree.py b/lib/portage/dbapi/porttree.py
> index 08af17bcd..984263039 100644
> --- a/lib/portage/dbapi/porttree.py
> +++ b/lib/portage/dbapi/porttree.py
> @@ -55,6 +55,11 @@ try:
>  except ImportError:
>   from urlparse import urlparse
>  
> +try:
> + from urllib.parse import unquote as urlunquote
> +except ImportError:
> + from urllib import unquote as urlunquote
> +
>  if sys.hexversion >= 0x3000000:
>   # pylint: disable=W0622
>   basestring = str
> @@ -1547,7 +1552,7 @@ def _parse_uri_map(cpv, metadata, use=None):
>   myuris.pop()
>   distfile = myuris.pop()
>   else:
> - distfile = os.path.basename(uri)
> + distfile = urlunquote(os.path.basename(uri))
>   if not distfile:
>   raise portage.exception.InvalidDependString(
>   ("getFetchMap(): '%s' SRC_URI has no file " + \
> diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py
> index 28e7caf53..47c3ad28f 100644
> --- a/lib/portage/package/ebuild/fetch.py
> +++ b/lib/portage/package/ebuild/fetch.py
> @@ -26,6 +26,11 @@ try:
>  except ImportError:
>   from urlparse import urlparse
>  
> +try:
> + from urllib.parse import quote as urlquote
> +except ImportError:
> + from urllib import quote as urlquote
> +
>  import portage
>  portage.proxy.lazyimport.lazyimport(globals(),
>   'portage.package.ebuild.config:check_config_instance,config',
> @@ -351,7 +356,7 @@ _size_suffix_map = {
>  
>  class FlatLayout(object):
>   def get_path(self, filename):
> - return filename
> + return urlquote(filename)
>  
>   def get_filenames(self, distdir):
>   for dirpath, dirnames, filenames in os.walk(distdir,
> @@ -382,7 +387,7 @@ class FilenameHashLayout(object):
>   c = c // 4
>   ret += fnhash[:c] + '/'
>   fnhash = fnhash[c:]
> - return ret + filename
> + return ret + urlquote(filename)
>  
>   def get_filenames(self, distdir):
>   pattern = ''
>
We've also got these other basename calls in fetch.py:

> diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py
> index 28e7caf53..56b375d58 100644
> --- a/lib/portage/package/ebuild/fetch.py
> +++ b/lib/portage/package/ebuild/fetch.py
> @@ -730,9 +730,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
>         else:
>                 for myuri in myuris:
>                         if urlparse(myuri).scheme:
> -                               file_uri_tuples.append((os.path.basename(myuri), myuri))
> +                               file_uri_tuples.append((urlunquote(os.path.basename(myuri)), myuri))
>                         else:
> -                               file_uri_tuples.append((os.path.basename(myuri), None))
> +                               file_uri_tuples.append((urlunquote(os.path.basename(myuri)), None))
The case with no scheme is not a URI, so we need to decide whether
or not to unquote, and we should make _parse_uri_map behavior
consistent for this case.

>  
>         filedict = OrderedDict()
>         primaryuri_dict = {}
> @@ -1229,7 +1229,7 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
>                                                 "information about how to\n!!! correctly specify "
>                                                 "FETCHCOMMAND and RESUMECOMMAND.\n"),
>                                                 level=logging.ERROR, noiselevel=-1)
> -                                       if myfile != os.path.basename(loc):
> +                                       if myfile != urlunquote(os.path.basename(loc)):
>                                                 return 0
>  
>                                 if not can_fetch:

--
Thanks,
Zac


signature.asc (1000 bytes) Download Attachment
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Improve handling of percent-signs in SRC_URI

Mike Gilbert-2
On Sun, May 31, 2020 at 2:01 PM Zac Medico <[hidden email]> wrote:

>
> On 5/31/20 6:17 AM, Mike Gilbert wrote:
> > Unquote the URL basename when fetching from upstream.
> > Quote the filename when fetching from mirrors.
> >
> > Bug: https://bugs.gentoo.org/719810
> > Signed-off-by: Mike Gilbert <[hidden email]>
> > ---
> >  lib/portage/dbapi/porttree.py       | 7 ++++++-
> >  lib/portage/package/ebuild/fetch.py | 9 +++++++--
> >  2 files changed, 13 insertions(+), 3 deletions(-)
> >
> > diff --git a/lib/portage/dbapi/porttree.py b/lib/portage/dbapi/porttree.py
> > index 08af17bcd..984263039 100644
> > --- a/lib/portage/dbapi/porttree.py
> > +++ b/lib/portage/dbapi/porttree.py
> > @@ -55,6 +55,11 @@ try:
> >  except ImportError:
> >       from urlparse import urlparse
> >
> > +try:
> > +     from urllib.parse import unquote as urlunquote
> > +except ImportError:
> > +     from urllib import unquote as urlunquote
> > +
> >  if sys.hexversion >= 0x3000000:
> >       # pylint: disable=W0622
> >       basestring = str
> > @@ -1547,7 +1552,7 @@ def _parse_uri_map(cpv, metadata, use=None):
> >                       myuris.pop()
> >                       distfile = myuris.pop()
> >               else:
> > -                     distfile = os.path.basename(uri)
> > +                     distfile = urlunquote(os.path.basename(uri))
> >                       if not distfile:
> >                               raise portage.exception.InvalidDependString(
> >                                       ("getFetchMap(): '%s' SRC_URI has no file " + \
> > diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py
> > index 28e7caf53..47c3ad28f 100644
> > --- a/lib/portage/package/ebuild/fetch.py
> > +++ b/lib/portage/package/ebuild/fetch.py
> > @@ -26,6 +26,11 @@ try:
> >  except ImportError:
> >       from urlparse import urlparse
> >
> > +try:
> > +     from urllib.parse import quote as urlquote
> > +except ImportError:
> > +     from urllib import quote as urlquote
> > +
> >  import portage
> >  portage.proxy.lazyimport.lazyimport(globals(),
> >       'portage.package.ebuild.config:check_config_instance,config',
> > @@ -351,7 +356,7 @@ _size_suffix_map = {
> >
> >  class FlatLayout(object):
> >       def get_path(self, filename):
> > -             return filename
> > +             return urlquote(filename)
> >
> >       def get_filenames(self, distdir):
> >               for dirpath, dirnames, filenames in os.walk(distdir,
> > @@ -382,7 +387,7 @@ class FilenameHashLayout(object):
> >                       c = c // 4
> >                       ret += fnhash[:c] + '/'
> >                       fnhash = fnhash[c:]
> > -             return ret + filename
> > +             return ret + urlquote(filename)
> >
> >       def get_filenames(self, distdir):
> >               pattern = ''
> >
>
> We've also got these other basename calls in fetch.py:
>
> > diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py
> > index 28e7caf53..56b375d58 100644
> > --- a/lib/portage/package/ebuild/fetch.py
> > +++ b/lib/portage/package/ebuild/fetch.py
> > @@ -730,9 +730,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
> >         else:
> >                 for myuri in myuris:
> >                         if urlparse(myuri).scheme:
> > -                               file_uri_tuples.append((os.path.basename(myuri), myuri))
> > +                               file_uri_tuples.append((urlunquote(os.path.basename(myuri)), myuri))
> >                         else:
> > -                               file_uri_tuples.append((os.path.basename(myuri), None))
> > +                               file_uri_tuples.append((urlunquote(os.path.basename(myuri)), None))
>
> The case with no scheme is not a URI, so we need to decide whether
> or not to unquote, and we should make _parse_uri_map behavior
> consistent for this case.

Backing up, I think unquoting the basename is really a separate issue
from quoting the filename in Layout.get_path(). The latter fixes a
known bug when fetching from mirrors, whereas the former seems like a
cosmetic issue. I don't think we should mix these two up into the same
commit.

Could I get your approval to push my original patch (Escape
percent-signs in filename when fetching from mirrors)?

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Improve handling of percent-signs in SRC_URI

Zac Medico-2
On 5/31/20 12:21 PM, Mike Gilbert wrote:

> On Sun, May 31, 2020 at 2:01 PM Zac Medico <[hidden email]> wrote:
>>
>> On 5/31/20 6:17 AM, Mike Gilbert wrote:
>>> Unquote the URL basename when fetching from upstream.
>>> Quote the filename when fetching from mirrors.
>>>
>>> Bug: https://bugs.gentoo.org/719810
>>> Signed-off-by: Mike Gilbert <[hidden email]>
>>> ---
>>>  lib/portage/dbapi/porttree.py       | 7 ++++++-
>>>  lib/portage/package/ebuild/fetch.py | 9 +++++++--
>>>  2 files changed, 13 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/lib/portage/dbapi/porttree.py b/lib/portage/dbapi/porttree.py
>>> index 08af17bcd..984263039 100644
>>> --- a/lib/portage/dbapi/porttree.py
>>> +++ b/lib/portage/dbapi/porttree.py
>>> @@ -55,6 +55,11 @@ try:
>>>  except ImportError:
>>>       from urlparse import urlparse
>>>
>>> +try:
>>> +     from urllib.parse import unquote as urlunquote
>>> +except ImportError:
>>> +     from urllib import unquote as urlunquote
>>> +
>>>  if sys.hexversion >= 0x3000000:
>>>       # pylint: disable=W0622
>>>       basestring = str
>>> @@ -1547,7 +1552,7 @@ def _parse_uri_map(cpv, metadata, use=None):
>>>                       myuris.pop()
>>>                       distfile = myuris.pop()
>>>               else:
>>> -                     distfile = os.path.basename(uri)
>>> +                     distfile = urlunquote(os.path.basename(uri))
>>>                       if not distfile:
>>>                               raise portage.exception.InvalidDependString(
>>>                                       ("getFetchMap(): '%s' SRC_URI has no file " + \
>>> diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py
>>> index 28e7caf53..47c3ad28f 100644
>>> --- a/lib/portage/package/ebuild/fetch.py
>>> +++ b/lib/portage/package/ebuild/fetch.py
>>> @@ -26,6 +26,11 @@ try:
>>>  except ImportError:
>>>       from urlparse import urlparse
>>>
>>> +try:
>>> +     from urllib.parse import quote as urlquote
>>> +except ImportError:
>>> +     from urllib import quote as urlquote
>>> +
>>>  import portage
>>>  portage.proxy.lazyimport.lazyimport(globals(),
>>>       'portage.package.ebuild.config:check_config_instance,config',
>>> @@ -351,7 +356,7 @@ _size_suffix_map = {
>>>
>>>  class FlatLayout(object):
>>>       def get_path(self, filename):
>>> -             return filename
>>> +             return urlquote(filename)
>>>
>>>       def get_filenames(self, distdir):
>>>               for dirpath, dirnames, filenames in os.walk(distdir,
>>> @@ -382,7 +387,7 @@ class FilenameHashLayout(object):
>>>                       c = c // 4
>>>                       ret += fnhash[:c] + '/'
>>>                       fnhash = fnhash[c:]
>>> -             return ret + filename
>>> +             return ret + urlquote(filename)
>>>
>>>       def get_filenames(self, distdir):
>>>               pattern = ''
>>>
>>
>> We've also got these other basename calls in fetch.py:
>>
>>> diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py
>>> index 28e7caf53..56b375d58 100644
>>> --- a/lib/portage/package/ebuild/fetch.py
>>> +++ b/lib/portage/package/ebuild/fetch.py
>>> @@ -730,9 +730,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
>>>         else:
>>>                 for myuri in myuris:
>>>                         if urlparse(myuri).scheme:
>>> -                               file_uri_tuples.append((os.path.basename(myuri), myuri))
>>> +                               file_uri_tuples.append((urlunquote(os.path.basename(myuri)), myuri))
>>>                         else:
>>> -                               file_uri_tuples.append((os.path.basename(myuri), None))
>>> +                               file_uri_tuples.append((urlunquote(os.path.basename(myuri)), None))
>>
>> The case with no scheme is not a URI, so we need to decide whether
>> or not to unquote, and we should make _parse_uri_map behavior
>> consistent for this case.
>
> Backing up, I think unquoting the basename is really a separate issue
> from quoting the filename in Layout.get_path(). The latter fixes a
> known bug when fetching from mirrors, whereas the former seems like a
> cosmetic issue. I don't think we should mix these two up into the same
> commit.
>
> Could I get your approval to push my original patch (Escape
> percent-signs in filename when fetching from mirrors)?
Separate patches are fine, but both patches really should be merged at
the same time since the quoting and unquoting are inherently coupled.
--
Thanks,
Zac


signature.asc (1000 bytes) Download Attachment
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Improve handling of percent-signs in SRC_URI

Mike Gilbert-2
On Sun, May 31, 2020 at 3:36 PM Zac Medico <[hidden email]> wrote:

>
> On 5/31/20 12:21 PM, Mike Gilbert wrote:
> > On Sun, May 31, 2020 at 2:01 PM Zac Medico <[hidden email]> wrote:
> >>
> >> On 5/31/20 6:17 AM, Mike Gilbert wrote:
> >>> Unquote the URL basename when fetching from upstream.
> >>> Quote the filename when fetching from mirrors.
> >>>
> >>> Bug: https://bugs.gentoo.org/719810
> >>> Signed-off-by: Mike Gilbert <[hidden email]>
> >>> ---
> >>>  lib/portage/dbapi/porttree.py       | 7 ++++++-
> >>>  lib/portage/package/ebuild/fetch.py | 9 +++++++--
> >>>  2 files changed, 13 insertions(+), 3 deletions(-)
> >>>
> >>> diff --git a/lib/portage/dbapi/porttree.py b/lib/portage/dbapi/porttree.py
> >>> index 08af17bcd..984263039 100644
> >>> --- a/lib/portage/dbapi/porttree.py
> >>> +++ b/lib/portage/dbapi/porttree.py
> >>> @@ -55,6 +55,11 @@ try:
> >>>  except ImportError:
> >>>       from urlparse import urlparse
> >>>
> >>> +try:
> >>> +     from urllib.parse import unquote as urlunquote
> >>> +except ImportError:
> >>> +     from urllib import unquote as urlunquote
> >>> +
> >>>  if sys.hexversion >= 0x3000000:
> >>>       # pylint: disable=W0622
> >>>       basestring = str
> >>> @@ -1547,7 +1552,7 @@ def _parse_uri_map(cpv, metadata, use=None):
> >>>                       myuris.pop()
> >>>                       distfile = myuris.pop()
> >>>               else:
> >>> -                     distfile = os.path.basename(uri)
> >>> +                     distfile = urlunquote(os.path.basename(uri))
> >>>                       if not distfile:
> >>>                               raise portage.exception.InvalidDependString(
> >>>                                       ("getFetchMap(): '%s' SRC_URI has no file " + \
> >>> diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py
> >>> index 28e7caf53..47c3ad28f 100644
> >>> --- a/lib/portage/package/ebuild/fetch.py
> >>> +++ b/lib/portage/package/ebuild/fetch.py
> >>> @@ -26,6 +26,11 @@ try:
> >>>  except ImportError:
> >>>       from urlparse import urlparse
> >>>
> >>> +try:
> >>> +     from urllib.parse import quote as urlquote
> >>> +except ImportError:
> >>> +     from urllib import quote as urlquote
> >>> +
> >>>  import portage
> >>>  portage.proxy.lazyimport.lazyimport(globals(),
> >>>       'portage.package.ebuild.config:check_config_instance,config',
> >>> @@ -351,7 +356,7 @@ _size_suffix_map = {
> >>>
> >>>  class FlatLayout(object):
> >>>       def get_path(self, filename):
> >>> -             return filename
> >>> +             return urlquote(filename)
> >>>
> >>>       def get_filenames(self, distdir):
> >>>               for dirpath, dirnames, filenames in os.walk(distdir,
> >>> @@ -382,7 +387,7 @@ class FilenameHashLayout(object):
> >>>                       c = c // 4
> >>>                       ret += fnhash[:c] + '/'
> >>>                       fnhash = fnhash[c:]
> >>> -             return ret + filename
> >>> +             return ret + urlquote(filename)
> >>>
> >>>       def get_filenames(self, distdir):
> >>>               pattern = ''
> >>>
> >>
> >> We've also got these other basename calls in fetch.py:
> >>
> >>> diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py
> >>> index 28e7caf53..56b375d58 100644
> >>> --- a/lib/portage/package/ebuild/fetch.py
> >>> +++ b/lib/portage/package/ebuild/fetch.py
> >>> @@ -730,9 +730,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
> >>>         else:
> >>>                 for myuri in myuris:
> >>>                         if urlparse(myuri).scheme:
> >>> -                               file_uri_tuples.append((os.path.basename(myuri), myuri))
> >>> +                               file_uri_tuples.append((urlunquote(os.path.basename(myuri)), myuri))
> >>>                         else:
> >>> -                               file_uri_tuples.append((os.path.basename(myuri), None))
> >>> +                               file_uri_tuples.append((urlunquote(os.path.basename(myuri)), None))
> >>
> >> The case with no scheme is not a URI, so we need to decide whether
> >> or not to unquote, and we should make _parse_uri_map behavior
> >> consistent for this case.
> >
> > Backing up, I think unquoting the basename is really a separate issue
> > from quoting the filename in Layout.get_path(). The latter fixes a
> > known bug when fetching from mirrors, whereas the former seems like a
> > cosmetic issue. I don't think we should mix these two up into the same
> > commit.
> >
> > Could I get your approval to push my original patch (Escape
> > percent-signs in filename when fetching from mirrors)?
>
> Separate patches are fine, but both patches really should be merged at
> the same time since the quoting and unquoting are inherently coupled.

I think that they are not actually coupled at all. It's two sets of
code used in different contexts.

My original patch could and should be pushed independently of any
subsequent changes.

The unquoting code in this subsequent discussion only affects how a
URL is translated to a filename on disk when fetching from upstream
and generating the Manifest. Ebuild developers can always override
this translation with an arrow operator in SRC_URI, as happens in
go-module.eclass.

The quoting code deals with escaping whatever on-disk filename portage
happens to be using (based on the URL basename or the right-hand side
of the SRC_URI arrow) so that it can be fetched from a mirror. It
doesn't matter if that disk filename has some ugly percent signs or
not, so long as we can send an appropriate request to the mirror web
server.

If you have a counter-example where things will fail with my original
patch, please elaborate.

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Improve handling of percent-signs in SRC_URI

Zac Medico-2
On 5/31/20 12:51 PM, Mike Gilbert wrote:

> On Sun, May 31, 2020 at 3:36 PM Zac Medico <[hidden email]> wrote:
>>
>> On 5/31/20 12:21 PM, Mike Gilbert wrote:
>>> On Sun, May 31, 2020 at 2:01 PM Zac Medico <[hidden email]> wrote:
>>>>
>>>> On 5/31/20 6:17 AM, Mike Gilbert wrote:
>>>>> Unquote the URL basename when fetching from upstream.
>>>>> Quote the filename when fetching from mirrors.
>>>>>
>>>>> Bug: https://bugs.gentoo.org/719810
>>>>> Signed-off-by: Mike Gilbert <[hidden email]>
>>>>> ---
>>>>>  lib/portage/dbapi/porttree.py       | 7 ++++++-
>>>>>  lib/portage/package/ebuild/fetch.py | 9 +++++++--
>>>>>  2 files changed, 13 insertions(+), 3 deletions(-)
>>>>>
>>>>> diff --git a/lib/portage/dbapi/porttree.py b/lib/portage/dbapi/porttree.py
>>>>> index 08af17bcd..984263039 100644
>>>>> --- a/lib/portage/dbapi/porttree.py
>>>>> +++ b/lib/portage/dbapi/porttree.py
>>>>> @@ -55,6 +55,11 @@ try:
>>>>>  except ImportError:
>>>>>       from urlparse import urlparse
>>>>>
>>>>> +try:
>>>>> +     from urllib.parse import unquote as urlunquote
>>>>> +except ImportError:
>>>>> +     from urllib import unquote as urlunquote
>>>>> +
>>>>>  if sys.hexversion >= 0x3000000:
>>>>>       # pylint: disable=W0622
>>>>>       basestring = str
>>>>> @@ -1547,7 +1552,7 @@ def _parse_uri_map(cpv, metadata, use=None):
>>>>>                       myuris.pop()
>>>>>                       distfile = myuris.pop()
>>>>>               else:
>>>>> -                     distfile = os.path.basename(uri)
>>>>> +                     distfile = urlunquote(os.path.basename(uri))
>>>>>                       if not distfile:
>>>>>                               raise portage.exception.InvalidDependString(
>>>>>                                       ("getFetchMap(): '%s' SRC_URI has no file " + \
>>>>> diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py
>>>>> index 28e7caf53..47c3ad28f 100644
>>>>> --- a/lib/portage/package/ebuild/fetch.py
>>>>> +++ b/lib/portage/package/ebuild/fetch.py
>>>>> @@ -26,6 +26,11 @@ try:
>>>>>  except ImportError:
>>>>>       from urlparse import urlparse
>>>>>
>>>>> +try:
>>>>> +     from urllib.parse import quote as urlquote
>>>>> +except ImportError:
>>>>> +     from urllib import quote as urlquote
>>>>> +
>>>>>  import portage
>>>>>  portage.proxy.lazyimport.lazyimport(globals(),
>>>>>       'portage.package.ebuild.config:check_config_instance,config',
>>>>> @@ -351,7 +356,7 @@ _size_suffix_map = {
>>>>>
>>>>>  class FlatLayout(object):
>>>>>       def get_path(self, filename):
>>>>> -             return filename
>>>>> +             return urlquote(filename)
>>>>>
>>>>>       def get_filenames(self, distdir):
>>>>>               for dirpath, dirnames, filenames in os.walk(distdir,
>>>>> @@ -382,7 +387,7 @@ class FilenameHashLayout(object):
>>>>>                       c = c // 4
>>>>>                       ret += fnhash[:c] + '/'
>>>>>                       fnhash = fnhash[c:]
>>>>> -             return ret + filename
>>>>> +             return ret + urlquote(filename)
>>>>>
>>>>>       def get_filenames(self, distdir):
>>>>>               pattern = ''
>>>>>
>>>>
>>>> We've also got these other basename calls in fetch.py:
>>>>
>>>>> diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py
>>>>> index 28e7caf53..56b375d58 100644
>>>>> --- a/lib/portage/package/ebuild/fetch.py
>>>>> +++ b/lib/portage/package/ebuild/fetch.py
>>>>> @@ -730,9 +730,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
>>>>>         else:
>>>>>                 for myuri in myuris:
>>>>>                         if urlparse(myuri).scheme:
>>>>> -                               file_uri_tuples.append((os.path.basename(myuri), myuri))
>>>>> +                               file_uri_tuples.append((urlunquote(os.path.basename(myuri)), myuri))
>>>>>                         else:
>>>>> -                               file_uri_tuples.append((os.path.basename(myuri), None))
>>>>> +                               file_uri_tuples.append((urlunquote(os.path.basename(myuri)), None))
>>>>
>>>> The case with no scheme is not a URI, so we need to decide whether
>>>> or not to unquote, and we should make _parse_uri_map behavior
>>>> consistent for this case.
>>>
>>> Backing up, I think unquoting the basename is really a separate issue
>>> from quoting the filename in Layout.get_path(). The latter fixes a
>>> known bug when fetching from mirrors, whereas the former seems like a
>>> cosmetic issue. I don't think we should mix these two up into the same
>>> commit.
>>>
>>> Could I get your approval to push my original patch (Escape
>>> percent-signs in filename when fetching from mirrors)?
>>
>> Separate patches are fine, but both patches really should be merged at
>> the same time since the quoting and unquoting are inherently coupled.
>
> I think that they are not actually coupled at all. It's two sets of
> code used in different contexts.
>
> My original patch could and should be pushed independently of any
> subsequent changes.
>
> The unquoting code in this subsequent discussion only affects how a
> URL is translated to a filename on disk when fetching from upstream
> and generating the Manifest. Ebuild developers can always override
> this translation with an arrow operator in SRC_URI, as happens in
> go-module.eclass.
>
> The quoting code deals with escaping whatever on-disk filename portage
> happens to be using (based on the URL basename or the right-hand side
> of the SRC_URI arrow) so that it can be fetched from a mirror. It
> doesn't matter if that disk filename has some ugly percent signs or
> not, so long as we can send an appropriate request to the mirror web
> server.
>
> If you have a counter-example where things will fail with my original
> patch, please elaborate.
Okay, yeah the ability to fix with SRC_URI arrows decouples them enough,
and all ::gentoo ebuilds are currently compliant.
--
Thanks,
Zac


signature.asc (1000 bytes) Download Attachment
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Improve handling of percent-signs in SRC_URI

Ulrich Mueller-2
In reply to this post by Zac Medico-2
>>>>> On Sun, 31 May 2020, Zac Medico wrote:

> We've also got these other basename calls in fetch.py:

>> diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py
>> index 28e7caf53..56b375d58 100644
>> --- a/lib/portage/package/ebuild/fetch.py
>> +++ b/lib/portage/package/ebuild/fetch.py
>> @@ -730,9 +730,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
>> else:
>> for myuri in myuris:
>> if urlparse(myuri).scheme:
>> -                               file_uri_tuples.append((os.path.basename(myuri), myuri))
>> +                               file_uri_tuples.append((urlunquote(os.path.basename(myuri)), myuri))
>> else:
>> -                               file_uri_tuples.append((os.path.basename(myuri), None))
>> +                               file_uri_tuples.append((urlunquote(os.path.basename(myuri)), None))

> The case with no scheme is not a URI, so we need to decide whether
> or not to unquote, and we should make _parse_uri_map behavior
> consistent for this case.

I think that this follows from PMS, section 8.2 [1]:

| The following elements are recognised in at least one class of
| specification. All elements must be surrounded on both sides by
| whitespace, except at the start and end of the string.
|
| [...]
| - A URI, in the form proto://host/path. Permitted in SRC_URI and
| HOMEPAGE. In EAPIs listed in table 8.4 as supporting SRC_URI arrows,
| may optionally be followed by whitespace, then ->, then whitespace,
| then a simple filename when in SRC_URI. For SRC_URI behaviour, see
| section 8.2.10.
| - A flat filename. Permitted in SRC_URI.

So if it isn't an URI then it is a "flat filename" which IMHO is to be
interpreted literally without any unquoting.

Ulrich

[1] https://projects.gentoo.org/pms/7/pms.html#x1-690008.2

signature.asc (517 bytes) Download Attachment
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Improve handling of percent-signs in SRC_URI

Zac Medico-2
On 5/31/20 1:24 PM, Ulrich Mueller wrote:

>>>>>> On Sun, 31 May 2020, Zac Medico wrote:
>
>> We've also got these other basename calls in fetch.py:
>
>>> diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py
>>> index 28e7caf53..56b375d58 100644
>>> --- a/lib/portage/package/ebuild/fetch.py
>>> +++ b/lib/portage/package/ebuild/fetch.py
>>> @@ -730,9 +730,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
>>> else:
>>> for myuri in myuris:
>>> if urlparse(myuri).scheme:
>>> -                               file_uri_tuples.append((os.path.basename(myuri), myuri))
>>> +                               file_uri_tuples.append((urlunquote(os.path.basename(myuri)), myuri))
>>> else:
>>> -                               file_uri_tuples.append((os.path.basename(myuri), None))
>>> +                               file_uri_tuples.append((urlunquote(os.path.basename(myuri)), None))
>
>> The case with no scheme is not a URI, so we need to decide whether
>> or not to unquote, and we should make _parse_uri_map behavior
>> consistent for this case.
>
> I think that this follows from PMS, section 8.2 [1]:
>
> | The following elements are recognised in at least one class of
> | specification. All elements must be surrounded on both sides by
> | whitespace, except at the start and end of the string.
> |
> | [...]
> | - A URI, in the form proto://host/path. Permitted in SRC_URI and
> | HOMEPAGE. In EAPIs listed in table 8.4 as supporting SRC_URI arrows,
> | may optionally be followed by whitespace, then ->, then whitespace,
> | then a simple filename when in SRC_URI. For SRC_URI behaviour, see
> | section 8.2.10.
> | - A flat filename. Permitted in SRC_URI.
>
> So if it isn't an URI then it is a "flat filename" which IMHO is to be
> interpreted literally without any unquoting.
>
> Ulrich
>
> [1] https://projects.gentoo.org/pms/7/pms.html#x1-690008.2
>
This interpretation sounds good to me.
--
Thanks,
Zac


signature.asc (1000 bytes) Download Attachment
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Improve handling of percent-signs in SRC_URI

Mike Gilbert-2
On Sun, May 31, 2020 at 4:32 PM Zac Medico <[hidden email]> wrote:

>
> On 5/31/20 1:24 PM, Ulrich Mueller wrote:
> >>>>>> On Sun, 31 May 2020, Zac Medico wrote:
> >
> >> We've also got these other basename calls in fetch.py:
> >
> >>> diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py
> >>> index 28e7caf53..56b375d58 100644
> >>> --- a/lib/portage/package/ebuild/fetch.py
> >>> +++ b/lib/portage/package/ebuild/fetch.py
> >>> @@ -730,9 +730,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
> >>> else:
> >>> for myuri in myuris:
> >>> if urlparse(myuri).scheme:
> >>> -                               file_uri_tuples.append((os.path.basename(myuri), myuri))
> >>> +                               file_uri_tuples.append((urlunquote(os.path.basename(myuri)), myuri))
> >>> else:
> >>> -                               file_uri_tuples.append((os.path.basename(myuri), None))
> >>> +                               file_uri_tuples.append((urlunquote(os.path.basename(myuri)), None))
> >
> >> The case with no scheme is not a URI, so we need to decide whether
> >> or not to unquote, and we should make _parse_uri_map behavior
> >> consistent for this case.
> >
> > I think that this follows from PMS, section 8.2 [1]:
> >
> > | The following elements are recognised in at least one class of
> > | specification. All elements must be surrounded on both sides by
> > | whitespace, except at the start and end of the string.
> > |
> > | [...]
> > | - A URI, in the form proto://host/path. Permitted in SRC_URI and
> > | HOMEPAGE. In EAPIs listed in table 8.4 as supporting SRC_URI arrows,
> > | may optionally be followed by whitespace, then ->, then whitespace,
> > | then a simple filename when in SRC_URI. For SRC_URI behaviour, see
> > | section 8.2.10.
> > | - A flat filename. Permitted in SRC_URI.
> >
> > So if it isn't an URI then it is a "flat filename" which IMHO is to be
> > interpreted literally without any unquoting.
> >
> > Ulrich
> >
> > [1] https://projects.gentoo.org/pms/7/pms.html#x1-690008.2
> >
>
> This interpretation sounds good to me.

Agreed.

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Improve handling of percent-signs in SRC_URI

Mike Gilbert-2
In reply to this post by Zac Medico-2
On Sun, May 31, 2020 at 2:01 PM Zac Medico <[hidden email]> wrote:

>
> On 5/31/20 6:17 AM, Mike Gilbert wrote:
> > Unquote the URL basename when fetching from upstream.
> > Quote the filename when fetching from mirrors.
> >
> > Bug: https://bugs.gentoo.org/719810
> > Signed-off-by: Mike Gilbert <[hidden email]>
> > ---
> >  lib/portage/dbapi/porttree.py       | 7 ++++++-
> >  lib/portage/package/ebuild/fetch.py | 9 +++++++--
> >  2 files changed, 13 insertions(+), 3 deletions(-)
> >
> > diff --git a/lib/portage/dbapi/porttree.py b/lib/portage/dbapi/porttree.py
> > index 08af17bcd..984263039 100644
> > --- a/lib/portage/dbapi/porttree.py
> > +++ b/lib/portage/dbapi/porttree.py
> > @@ -55,6 +55,11 @@ try:
> >  except ImportError:
> >       from urlparse import urlparse
> >
> > +try:
> > +     from urllib.parse import unquote as urlunquote
> > +except ImportError:
> > +     from urllib import unquote as urlunquote
> > +
> >  if sys.hexversion >= 0x3000000:
> >       # pylint: disable=W0622
> >       basestring = str
> > @@ -1547,7 +1552,7 @@ def _parse_uri_map(cpv, metadata, use=None):
> >                       myuris.pop()
> >                       distfile = myuris.pop()
> >               else:
> > -                     distfile = os.path.basename(uri)
> > +                     distfile = urlunquote(os.path.basename(uri))
> >                       if not distfile:
> >                               raise portage.exception.InvalidDependString(
> >                                       ("getFetchMap(): '%s' SRC_URI has no file " + \
> > diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py
> > index 28e7caf53..47c3ad28f 100644
> > --- a/lib/portage/package/ebuild/fetch.py
> > +++ b/lib/portage/package/ebuild/fetch.py
> > @@ -26,6 +26,11 @@ try:
> >  except ImportError:
> >       from urlparse import urlparse
> >
> > +try:
> > +     from urllib.parse import quote as urlquote
> > +except ImportError:
> > +     from urllib import quote as urlquote
> > +
> >  import portage
> >  portage.proxy.lazyimport.lazyimport(globals(),
> >       'portage.package.ebuild.config:check_config_instance,config',
> > @@ -351,7 +356,7 @@ _size_suffix_map = {
> >
> >  class FlatLayout(object):
> >       def get_path(self, filename):
> > -             return filename
> > +             return urlquote(filename)
> >
> >       def get_filenames(self, distdir):
> >               for dirpath, dirnames, filenames in os.walk(distdir,
> > @@ -382,7 +387,7 @@ class FilenameHashLayout(object):
> >                       c = c // 4
> >                       ret += fnhash[:c] + '/'
> >                       fnhash = fnhash[c:]
> > -             return ret + filename
> > +             return ret + urlquote(filename)
> >
> >       def get_filenames(self, distdir):
> >               pattern = ''
> >
>
> We've also got these other basename calls in fetch.py:
>
> > diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py
> > index 28e7caf53..56b375d58 100644
> > --- a/lib/portage/package/ebuild/fetch.py
> > +++ b/lib/portage/package/ebuild/fetch.py
> > @@ -730,9 +730,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
> >         else:
> >                 for myuri in myuris:
> >                         if urlparse(myuri).scheme:
> > -                               file_uri_tuples.append((os.path.basename(myuri), myuri))
> > +                               file_uri_tuples.append((urlunquote(os.path.basename(myuri)), myuri))
> >                         else:
> > -                               file_uri_tuples.append((os.path.basename(myuri), None))
> > +                               file_uri_tuples.append((urlunquote(os.path.basename(myuri)), None))

I'm not sure how to reach this particular code path. In my testing,
the fetch() function gets passed an OrderedDict in myuris, and the
filenames have already been unquoted, so we don't want to do it again.

Any idea how this "else" block would ever be executed?

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Improve handling of percent-signs in SRC_URI

Zac Medico-2
On 5/31/20 1:53 PM, Mike Gilbert wrote:

> On Sun, May 31, 2020 at 2:01 PM Zac Medico <[hidden email]> wrote:
>>
>> On 5/31/20 6:17 AM, Mike Gilbert wrote:
>>> Unquote the URL basename when fetching from upstream.
>>> Quote the filename when fetching from mirrors.
>>>
>>> Bug: https://bugs.gentoo.org/719810
>>> Signed-off-by: Mike Gilbert <[hidden email]>
>>> ---
>>>  lib/portage/dbapi/porttree.py       | 7 ++++++-
>>>  lib/portage/package/ebuild/fetch.py | 9 +++++++--
>>>  2 files changed, 13 insertions(+), 3 deletions(-)
>>>
>>> diff --git a/lib/portage/dbapi/porttree.py b/lib/portage/dbapi/porttree.py
>>> index 08af17bcd..984263039 100644
>>> --- a/lib/portage/dbapi/porttree.py
>>> +++ b/lib/portage/dbapi/porttree.py
>>> @@ -55,6 +55,11 @@ try:
>>>  except ImportError:
>>>       from urlparse import urlparse
>>>
>>> +try:
>>> +     from urllib.parse import unquote as urlunquote
>>> +except ImportError:
>>> +     from urllib import unquote as urlunquote
>>> +
>>>  if sys.hexversion >= 0x3000000:
>>>       # pylint: disable=W0622
>>>       basestring = str
>>> @@ -1547,7 +1552,7 @@ def _parse_uri_map(cpv, metadata, use=None):
>>>                       myuris.pop()
>>>                       distfile = myuris.pop()
>>>               else:
>>> -                     distfile = os.path.basename(uri)
>>> +                     distfile = urlunquote(os.path.basename(uri))
>>>                       if not distfile:
>>>                               raise portage.exception.InvalidDependString(
>>>                                       ("getFetchMap(): '%s' SRC_URI has no file " + \
>>> diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py
>>> index 28e7caf53..47c3ad28f 100644
>>> --- a/lib/portage/package/ebuild/fetch.py
>>> +++ b/lib/portage/package/ebuild/fetch.py
>>> @@ -26,6 +26,11 @@ try:
>>>  except ImportError:
>>>       from urlparse import urlparse
>>>
>>> +try:
>>> +     from urllib.parse import quote as urlquote
>>> +except ImportError:
>>> +     from urllib import quote as urlquote
>>> +
>>>  import portage
>>>  portage.proxy.lazyimport.lazyimport(globals(),
>>>       'portage.package.ebuild.config:check_config_instance,config',
>>> @@ -351,7 +356,7 @@ _size_suffix_map = {
>>>
>>>  class FlatLayout(object):
>>>       def get_path(self, filename):
>>> -             return filename
>>> +             return urlquote(filename)
>>>
>>>       def get_filenames(self, distdir):
>>>               for dirpath, dirnames, filenames in os.walk(distdir,
>>> @@ -382,7 +387,7 @@ class FilenameHashLayout(object):
>>>                       c = c // 4
>>>                       ret += fnhash[:c] + '/'
>>>                       fnhash = fnhash[c:]
>>> -             return ret + filename
>>> +             return ret + urlquote(filename)
>>>
>>>       def get_filenames(self, distdir):
>>>               pattern = ''
>>>
>>
>> We've also got these other basename calls in fetch.py:
>>
>>> diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py
>>> index 28e7caf53..56b375d58 100644
>>> --- a/lib/portage/package/ebuild/fetch.py
>>> +++ b/lib/portage/package/ebuild/fetch.py
>>> @@ -730,9 +730,9 @@ def fetch(myuris, mysettings, listonly=0, fetchonly=0,
>>>         else:
>>>                 for myuri in myuris:
>>>                         if urlparse(myuri).scheme:
>>> -                               file_uri_tuples.append((os.path.basename(myuri), myuri))
>>> +                               file_uri_tuples.append((urlunquote(os.path.basename(myuri)), myuri))
>>>                         else:
>>> -                               file_uri_tuples.append((os.path.basename(myuri), None))
>>> +                               file_uri_tuples.append((urlunquote(os.path.basename(myuri)), None))
>
> I'm not sure how to reach this particular code path. In my testing,
> the fetch() function gets passed an OrderedDict in myuris, and the
> filenames have already been unquoted, so we don't want to do it again.
>
> Any idea how this "else" block would ever be executed?
This code is only for backward compatibility code, so we should probably
add a deprecation warning and forget about quoting/unquoting.
--
Thanks,
Zac


signature.asc (1000 bytes) Download Attachment