[PATCH 1/2] fetch: Use real os.walk() to avoid unicode issues with Portage

classic Classic list List threaded Threaded
6 messages Options
Reply | Threaded
Open this post in threaded view
|

[PATCH 1/2] fetch: Use real os.walk() to avoid unicode issues with Portage

Michał Górny-5
Use real os.walk() when getting filenames for FlatLayout.  Unlike
the wrapped Portage module, it return str output for str path parameter,
so we don't have to recode it back and forth.

Signed-off-by: Michał Górny <[hidden email]>
---
 lib/portage/package/ebuild/fetch.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py
index cedf12b19..be277f1a3 100644
--- a/lib/portage/package/ebuild/fetch.py
+++ b/lib/portage/package/ebuild/fetch.py
@@ -11,6 +11,7 @@ import io
 import itertools
 import json
 import logging
+import os as real_os
 import random
 import re
 import stat
@@ -270,7 +271,7 @@ class FlatLayout(object):
  return filename
 
  def get_filenames(self, distdir):
- for dirpath, dirnames, filenames in os.walk(distdir,
+ for dirpath, dirnames, filenames in real_os.walk(distdir,
  onerror=_raise_exc):
  return iter(filenames)
 
--
2.23.0


Reply | Threaded
Open this post in threaded view
|

[PATCH 2/2] emirrordist: Pass path from DeletionIterator to DeletionTask

Michał Górny-5
Since DeletionIterator needs to stat the distfile and therefore find
one working path for it, pass it to DeletionTask instead of recomputing
it there.  This also fixes wrongly assuming that first layout will
always be correct.

Bug: https://bugs.gentoo.org/697890
Signed-off-by: Michał Górny <[hidden email]>
---
 lib/portage/_emirrordist/DeletionIterator.py |  2 ++
 lib/portage/_emirrordist/DeletionTask.py     | 14 +++++---------
 2 files changed, 7 insertions(+), 9 deletions(-)

diff --git a/lib/portage/_emirrordist/DeletionIterator.py b/lib/portage/_emirrordist/DeletionIterator.py
index 5c193911a..3cbff2c3a 100644
--- a/lib/portage/_emirrordist/DeletionIterator.py
+++ b/lib/portage/_emirrordist/DeletionIterator.py
@@ -72,6 +72,7 @@ class DeletionIterator(object):
 
  yield DeletionTask(background=True,
  distfile=filename,
+ distfile_path=path,
  config=self._config)
 
  else:
@@ -85,6 +86,7 @@ class DeletionIterator(object):
 
  yield DeletionTask(background=True,
  distfile=filename,
+ distfile_path=path,
  config=self._config)
 
  if deletion_db is not None:
diff --git a/lib/portage/_emirrordist/DeletionTask.py b/lib/portage/_emirrordist/DeletionTask.py
index a4bb29419..4e9c26ca2 100644
--- a/lib/portage/_emirrordist/DeletionTask.py
+++ b/lib/portage/_emirrordist/DeletionTask.py
@@ -10,14 +10,9 @@ from _emerge.CompositeTask import CompositeTask
 
 class DeletionTask(CompositeTask):
 
- __slots__ = ('distfile', 'config')
+ __slots__ = ('distfile', 'distfile_path', 'config')
 
  def _start(self):
-
- distfile_path = os.path.join(
- self.config.options.distfiles,
- self.config.layouts[0].get_path(self.distfile))
-
  if self.config.options.recycle_dir is not None:
  recycle_path = os.path.join(
  self.config.options.recycle_dir, self.distfile)
@@ -29,7 +24,8 @@ class DeletionTask(CompositeTask):
  "distfiles to recycle") % self.distfile)
  try:
  # note: distfile_path can be a symlink here
- os.rename(os.path.realpath(distfile_path), recycle_path)
+ os.rename(os.path.realpath(self.distfile_path),
+ recycle_path)
  except OSError as e:
  if e.errno != errno.EXDEV:
  logging.error(("rename %s from distfiles to "
@@ -40,7 +36,7 @@ class DeletionTask(CompositeTask):
  return
 
  self._start_task(
- FileCopier(src_path=distfile_path,
+ FileCopier(src_path=self.distfile_path,
  dest_path=recycle_path,
  background=False),
  self._recycle_copier_exit)
@@ -55,7 +51,7 @@ class DeletionTask(CompositeTask):
  logging.debug(("delete '%s' from "
  "distfiles") % self.distfile)
  try:
- os.unlink(distfile_path)
+ os.unlink(self.distfile_path)
  except OSError as e:
  if e.errno not in (errno.ENOENT, errno.ESTALE):
  logging.error("%s unlink failed in distfiles: %s" %
--
2.23.0


Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 1/2] fetch: Use real os.walk() to avoid unicode issues with Portage

Zac Medico-2
In reply to this post by Michał Górny-5
On 10/21/19 1:43 AM, Michał Górny wrote:

> Use real os.walk() when getting filenames for FlatLayout.  Unlike
> the wrapped Portage module, it return str output for str path parameter,
> so we don't have to recode it back and forth.
>
> Signed-off-by: Michał Górny <[hidden email]>
> ---
>  lib/portage/package/ebuild/fetch.py | 3 ++-
>  1 file changed, 2 insertions(+), 1 deletion(-)
>
> diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py
> index cedf12b19..be277f1a3 100644
> --- a/lib/portage/package/ebuild/fetch.py
> +++ b/lib/portage/package/ebuild/fetch.py
> @@ -11,6 +11,7 @@ import io
>  import itertools
>  import json
>  import logging
> +import os as real_os
>  import random
>  import re
>  import stat
> @@ -270,7 +271,7 @@ class FlatLayout(object):
>   return filename
>  
>   def get_filenames(self, distdir):
> - for dirpath, dirnames, filenames in os.walk(distdir,
> + for dirpath, dirnames, filenames in real_os.walk(distdir,
>   onerror=_raise_exc):
>   return iter(filenames)
>  
>
The real_os.walk will trigger UnicodeEncodeError if distdir can't be
encoded with sys.getfilesystemencoding(). It's an edge case, but
generally I prefer to handle it.

We can continue to use portage.os for the os.walk call, and turn
get_filenames into a generator method like this:

    for filename in filenames:
        try:
            yield portage._unicode_decode(filename, errors='strict')
        except UnicodeDecodeError:
            # Ignore it. Distfiles names must have valid UTF8 encoding.
            pass
--
Thanks,
Zac


signature.asc (1000 bytes) Download Attachment
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 1/2] fetch: Use real os.walk() to avoid unicode issues with Portage

Michał Górny-5
On Mon, 2019-10-21 at 02:10 -0700, Zac Medico wrote:

> On 10/21/19 1:43 AM, Michał Górny wrote:
> > Use real os.walk() when getting filenames for FlatLayout.  Unlike
> > the wrapped Portage module, it return str output for str path parameter,
> > so we don't have to recode it back and forth.
> >
> > Signed-off-by: Michał Górny <[hidden email]>
> > ---
> >  lib/portage/package/ebuild/fetch.py | 3 ++-
> >  1 file changed, 2 insertions(+), 1 deletion(-)
> >
> > diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py
> > index cedf12b19..be277f1a3 100644
> > --- a/lib/portage/package/ebuild/fetch.py
> > +++ b/lib/portage/package/ebuild/fetch.py
> > @@ -11,6 +11,7 @@ import io
> >  import itertools
> >  import json
> >  import logging
> > +import os as real_os
> >  import random
> >  import re
> >  import stat
> > @@ -270,7 +271,7 @@ class FlatLayout(object):
> >   return filename
> >  
> >   def get_filenames(self, distdir):
> > - for dirpath, dirnames, filenames in os.walk(distdir,
> > + for dirpath, dirnames, filenames in real_os.walk(distdir,
> >   onerror=_raise_exc):
> >   return iter(filenames)
> >  
> >
>
> The real_os.walk will trigger UnicodeEncodeError if distdir can't be
> encoded with sys.getfilesystemencoding(). It's an edge case, but
> generally I prefer to handle it.
>
> We can continue to use portage.os for the os.walk call, and turn
> get_filenames into a generator method like this:
>
>     for filename in filenames:
>         try:
>             yield portage._unicode_decode(filename, errors='strict')
>         except UnicodeDecodeError:
>             # Ignore it. Distfiles names must have valid UTF8 encoding.
>             pass
Since you've already written it, could you commit it?  I don't wish to
have my name on the implicit module overrides hackery I don't approve
of.

--
Best regards,
Michał Górny


signature.asc (631 bytes) Download Attachment
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 1/2] fetch: Use real os.walk() to avoid unicode issues with Portage

Zac Medico-2
On 10/21/19 2:16 AM, Michał Górny wrote:

> On Mon, 2019-10-21 at 02:10 -0700, Zac Medico wrote:
>> On 10/21/19 1:43 AM, Michał Górny wrote:
>>> Use real os.walk() when getting filenames for FlatLayout.  Unlike
>>> the wrapped Portage module, it return str output for str path parameter,
>>> so we don't have to recode it back and forth.
>>>
>>> Signed-off-by: Michał Górny <[hidden email]>
>>> ---
>>>  lib/portage/package/ebuild/fetch.py | 3 ++-
>>>  1 file changed, 2 insertions(+), 1 deletion(-)
>>>
>>> diff --git a/lib/portage/package/ebuild/fetch.py b/lib/portage/package/ebuild/fetch.py
>>> index cedf12b19..be277f1a3 100644
>>> --- a/lib/portage/package/ebuild/fetch.py
>>> +++ b/lib/portage/package/ebuild/fetch.py
>>> @@ -11,6 +11,7 @@ import io
>>>  import itertools
>>>  import json
>>>  import logging
>>> +import os as real_os
>>>  import random
>>>  import re
>>>  import stat
>>> @@ -270,7 +271,7 @@ class FlatLayout(object):
>>>   return filename
>>>  
>>>   def get_filenames(self, distdir):
>>> - for dirpath, dirnames, filenames in os.walk(distdir,
>>> + for dirpath, dirnames, filenames in real_os.walk(distdir,
>>>   onerror=_raise_exc):
>>>   return iter(filenames)
>>>  
>>>
>>
>> The real_os.walk will trigger UnicodeEncodeError if distdir can't be
>> encoded with sys.getfilesystemencoding(). It's an edge case, but
>> generally I prefer to handle it.
>>
>> We can continue to use portage.os for the os.walk call, and turn
>> get_filenames into a generator method like this:
>>
>>     for filename in filenames:
>>         try:
>>             yield portage._unicode_decode(filename, errors='strict')
>>         except UnicodeDecodeError:
>>             # Ignore it. Distfiles names must have valid UTF8 encoding.
>>             pass
>
> Since you've already written it, could you commit it?  I don't wish to
> have my name on the implicit module overrides hackery I don't approve
> of.
Done:

https://gitweb.gentoo.org/proj/portage.git/commit/?id=d9855418352398013ae787bb73f70e935ec109ca

I don't really like the portage.os unicode wrapper either, but I'm not
aware of a good alternative to solve the pervasive UnicodeEncodeError
issue that I've mentioned.
--
Thanks,
Zac


signature.asc (1000 bytes) Download Attachment
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH 2/2] emirrordist: Pass path from DeletionIterator to DeletionTask

Zac Medico-2
In reply to this post by Michał Górny-5
On 10/21/19 1:43 AM, Michał Górny wrote:

> Since DeletionIterator needs to stat the distfile and therefore find
> one working path for it, pass it to DeletionTask instead of recomputing
> it there.  This also fixes wrongly assuming that first layout will
> always be correct.
>
> Bug: https://bugs.gentoo.org/697890
> Signed-off-by: Michał Górny <[hidden email]>
> ---
>  lib/portage/_emirrordist/DeletionIterator.py |  2 ++
>  lib/portage/_emirrordist/DeletionTask.py     | 14 +++++---------
>  2 files changed, 7 insertions(+), 9 deletions(-)
>
> diff --git a/lib/portage/_emirrordist/DeletionIterator.py b/lib/portage/_emirrordist/DeletionIterator.py
> index 5c193911a..3cbff2c3a 100644
> --- a/lib/portage/_emirrordist/DeletionIterator.py
> +++ b/lib/portage/_emirrordist/DeletionIterator.py
> @@ -72,6 +72,7 @@ class DeletionIterator(object):
>  
>   yield DeletionTask(background=True,
>   distfile=filename,
> + distfile_path=path,
>   config=self._config)
>  
>   else:
> @@ -85,6 +86,7 @@ class DeletionIterator(object):
>  
>   yield DeletionTask(background=True,
>   distfile=filename,
> + distfile_path=path,
>   config=self._config)
>  
>   if deletion_db is not None:
> diff --git a/lib/portage/_emirrordist/DeletionTask.py b/lib/portage/_emirrordist/DeletionTask.py
> index a4bb29419..4e9c26ca2 100644
> --- a/lib/portage/_emirrordist/DeletionTask.py
> +++ b/lib/portage/_emirrordist/DeletionTask.py
> @@ -10,14 +10,9 @@ from _emerge.CompositeTask import CompositeTask
>  
>  class DeletionTask(CompositeTask):
>  
> - __slots__ = ('distfile', 'config')
> + __slots__ = ('distfile', 'distfile_path', 'config')
>  
>   def _start(self):
> -
> - distfile_path = os.path.join(
> - self.config.options.distfiles,
> - self.config.layouts[0].get_path(self.distfile))
> -
>   if self.config.options.recycle_dir is not None:
>   recycle_path = os.path.join(
>   self.config.options.recycle_dir, self.distfile)
> @@ -29,7 +24,8 @@ class DeletionTask(CompositeTask):
>   "distfiles to recycle") % self.distfile)
>   try:
>   # note: distfile_path can be a symlink here
> - os.rename(os.path.realpath(distfile_path), recycle_path)
> + os.rename(os.path.realpath(self.distfile_path),
> + recycle_path)
>   except OSError as e:
>   if e.errno != errno.EXDEV:
>   logging.error(("rename %s from distfiles to "
> @@ -40,7 +36,7 @@ class DeletionTask(CompositeTask):
>   return
>  
>   self._start_task(
> - FileCopier(src_path=distfile_path,
> + FileCopier(src_path=self.distfile_path,
>   dest_path=recycle_path,
>   background=False),
>   self._recycle_copier_exit)
> @@ -55,7 +51,7 @@ class DeletionTask(CompositeTask):
>   logging.debug(("delete '%s' from "
>   "distfiles") % self.distfile)
>   try:
> - os.unlink(distfile_path)
> + os.unlink(self.distfile_path)
>   except OSError as e:
>   if e.errno not in (errno.ENOENT, errno.ESTALE):
>   logging.error("%s unlink failed in distfiles: %s" %
>
Looks good. Please merge.
--
Thanks,
Zac


signature.asc (1000 bytes) Download Attachment