[PATCH] Rewrite doins in python (bug 624526)

classic Classic list List threaded Threaded
5 messages Options
Reply | Threaded
Open this post in threaded view
|

[PATCH] Rewrite doins in python (bug 624526)

Zac Medico-2
From: Hidehiko Abe <[hidden email]>

doins is written in bash. However, specifically in case that
too many files are installed, it is very slow.
This CL rewrites the script in python for performance.

BUG=chromium:712659
TEST=time (./setup_board --forace && \
     ./build_package --withdev && \
     ./build_image --noenable_rootfs_verification test)
===Before===
real    21m35.445s
user    93m40.588s
sys     21m31.224s

===After===
real    17m30.106s
user    94m1.812s
sys     20m13.468s

Change-Id: Ib10f623961ba316753d58397cff5e72fbc343339
Reviewed-on: https://chromium-review.googlesource.com/559225
X-Chromium-Bug: 712659
X-Chromium-Bug-url: https://bugs.chromium.org/p/chromium/issues/detail?id=712659
X-Gentoo-Bug: 624526
X-Gentoo-Bug-url: https://bugs.gentoo.org/624526
---
 bin/doins.py             | 341 +++++++++++++++++++++++++++++++++++++++++++++++
 bin/ebuild-helpers/doins | 123 ++---------------
 2 files changed, 353 insertions(+), 111 deletions(-)
 create mode 100644 bin/doins.py

diff --git a/bin/doins.py b/bin/doins.py
new file mode 100644
index 000000000..4a17287ca
--- /dev/null
+++ b/bin/doins.py
@@ -0,0 +1,341 @@
+#!/usr/bin/python -b
+# Copyright 2017 The Chromium OS Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+from __future__ import print_function
+
+import argparse
+import errno
+import grp
+import logging
+import os
+import pwd
+import re
+import shlex
+import shutil
+import stat
+import subprocess
+import sys
+
+from portage.util import movefile
+
+# Change back to original cwd _after_ all imports (bug #469338).
+# See also bin/ebuild-helpers/doins, which sets the cwd.
+os.chdir(os.environ["__PORTAGE_HELPER_CWD"])
+
+_PORTAGE_ACTUAL_DISTDIR = (
+ (os.environb if sys.version_info.major >= 3 else os.environ).get(
+ b'PORTAGE_ACTUAL_DISTDIR', b'') + b'/')
+
+
+def _parse_install_options(
+ options, inprocess_runner_class, subprocess_runner_class):
+ """Parses command line arguments for install command."""
+ parser = argparse.ArgumentParser()
+ parser.add_argument('-g', '--group', default=-1, type=_parse_group)
+ parser.add_argument('-o', '--owner', default=-1, type=_parse_user)
+ # "install"'s --mode option is complicated. So here is partially
+ # supported.
+ parser.add_argument('-m', '--mode', type=_parse_mode)
+ split_options = shlex.split(options)
+ namespace, remaining = parser.parse_known_args(split_options)
+ if remaining:
+ print('Unknown install options: %s, %r' % (options, remaining),
+ file=sys.stderr)
+ if os.environ.get('DOINSSTRICTOPTION', '') == '1':
+ sys.exit(1)
+ print('Continue with falling back to \'install\' '
+ 'command execution, which can be slower.',
+ file=sys.stderr)
+ return subprocess_runner_class(split_options)
+ return inprocess_runner_class(namespace)
+
+
+def _parse_group(group):
+ """Parses gid."""
+ g = grp.getgrnam(group)
+ if g:
+ return g.gr_gid
+ return int(group)
+
+
+def _parse_user(user):
+ """Parses uid."""
+ u = pwd.getpwnam(user)
+ if u:
+ return u.pw_uid
+ return int(user)
+
+
+def _parse_mode(mode):
+ # "install"'s --mode option is complicated. So here is partially
+ # supported.
+ # In Python 3, the prefix of octal int must be '0o' rather than '0'.
+ # So, set base explicitly in that case.
+ return int(mode, 8 if re.search(r'^0[0-7]*$', mode) else 0)
+
+
+def _set_attributes(options, path):
+ """Sets attributes the file/dir at given |path|.
+
+ Args:
+ options: object which has |owner|, |group| and |mode| fields.
+ |owner| is int value representing uid. Similary |group|
+ represents gid.
+ If -1 is set, just unchanged.
+ |mode| is the bits of permissions.
+ path: File/directory path.
+ """
+ if options.owner != -1 or options.group != -1:
+ os.lchown(path, options.owner, options.group)
+ if options.mode is not None:
+ os.chmod(path, options.mode)
+
+
+class _InsInProcessInstallRunner(object):
+ def __init__(self, parsed_options):
+ self._parsed_options = parsed_options
+ self._copy_xattr = (
+ 'xattr' in os.environ.get('FEATURES', '').split())
+ if self._copy_xattr:
+ self._xattr_exclude = os.environ.get(
+ 'PORTAGE_XATTR_EXCLUDE',
+ 'security.* system.nfs4_acl')
+
+ def run(self, source, dest_dir):
+ """Installs a file at |source| into |dest_dir| in process."""
+ dest = os.path.join(dest_dir, os.path.basename(source))
+ try:
+ # TODO: Consider to use portage.util.file_copy.copyfile
+ # introduced by
+ # https://gitweb.gentoo.org/proj/portage.git/commit/
+ #   ?id=8ab5c8835931fd9ec098dbf4c5f416eb32e4a3a4
+ # after uprev.
+ shutil.copyfile(source, dest)
+ _set_attributes(self._parsed_options, dest)
+ if self._copy_xattr:
+ movefile._copyxattr(
+ source, dest,
+ exclude=self._xattr_exclude)
+ except Exception:
+ logging.exception(
+ 'Failed to copy file: '
+ '_parsed_options=%r, source=%r, dest_dir=%r',
+ self._parsed_options, source, dest_dir)
+ return False
+ return True
+
+
+class _InsSubprocessInstallRunner(object):
+ def __init__(self, split_options):
+ self._split_options = split_options
+
+ def run(self, source, dest_dir):
+ """Installs a file at |source| into |dest_dir| by 'install'."""
+ command = ['install'] + self._split_options + [source, dest_dir]
+ return subprocess.call(command) == 0
+
+
+class _DirInProcessInstallRunner(object):
+ def __init__(self, parsed_options):
+ self._parsed_options = parsed_options
+
+ def run(self, dest):
+ """Installs a dir into |dest| in process."""
+ try:
+ os.makedirs(dest)
+ except OSError as e:
+ if e.errno != errno.EEXIST or not os.path.isdir(dest):
+ raise
+ _set_attributes(self._parsed_options, dest)
+
+
+class _DirSubprocessInstallRunner(object):
+ """Runs real 'install' command to install files or directories."""
+
+ def __init__(self, split_options):
+ self._split_options = split_options
+
+ def run(self, dest):
+ """Installs a dir into |dest| by 'install' command."""
+ command = ['install', '-d'] + self._split_options + [dest]
+ subprocess.call(command)
+
+
+class _InstallRunner(object):
+ def __init__(self):
+ self._ins_runner = _parse_install_options(
+ os.environ.get('INSOPTIONS', ''),
+ _InsInProcessInstallRunner,
+ _InsSubprocessInstallRunner)
+ self._dir_runner = _parse_install_options(
+ os.environ.get('DIROPTIONS', ''),
+ _DirInProcessInstallRunner,
+ _DirSubprocessInstallRunner)
+
+ def install_file(self, source, dest_dir):
+ """Installs a file at |source| into |dest_dir| directory."""
+ return self._ins_runner.run(source, dest_dir)
+
+ def install_dir(self, dest):
+ """Creates a directory at |dest|."""
+ self._dir_runner.run(dest)
+
+
+def _doins(args, install_runner, relpath, source_root):
+ """Installs a file as if 'install' command runs.
+
+ Installs a file at |source_root|/|relpath| into |args.dest|/|relpath|.
+ If |args.preserve_symlinks| is set, creates symlink if the source is a
+ symlink.
+
+ Args:
+ args: parsed arguments. It should have following fields.
+ - preserve_symlinks: bool representing whether symlinks
+ needs to be preserved.
+ - dest: Destination root directory.
+ - insoptions: options for 'install' command.
+ intall_runner: _InstallRunner instance for file install.
+ relpath: Relative path of the file being installed.
+ source_root: Source root directory.
+
+ Returns: True on success.
+ """
+ source = os.path.join(source_root, relpath)
+ dest = os.path.join(args.dest, relpath)
+ if os.path.islink(source):
+ # Our fake $DISTDIR contains symlinks that should not be
+ # reproduced inside $D. In order to ensure that things like
+ # dodoc "$DISTDIR"/foo.pdf work as expected, we dereference
+ # symlinked files that refer to absolute paths inside
+ # $PORTAGE_ACTUAL_DISTDIR/.
+ try:
+ if (args.preserve_symlinks and
+ not os.readlink(source).startswith(
+ _PORTAGE_ACTUAL_DISTDIR)):
+ linkto = os.readlink(source)
+ shutil.rmtree(dest, ignore_errors=True)
+ os.symlink(linkto, dest)
+ return True
+ except Exception:
+ logging.exception(
+ 'Failed to create symlink: '
+ 'args=%r, relpath=%r, source_root=%r',
+ args, relpath, source_root)
+ return False
+
+ return install_runner.install_file(source, os.path.dirname(dest))
+
+
+def _parse_args():
+ """Parses the command line arguments."""
+ parser = argparse.ArgumentParser()
+ parser.add_argument(
+ '--recursive', action='store_true',
+ help='If set, installs files recursively. Otherwise, '
+ 'just skips directories.')
+ parser.add_argument(
+ '--preserve_symlinks', action='store_true',
+ help='If set, a symlink will be installed as symlink.')
+ # If helper is dodoc, it changes the behavior for the directory
+ # install without --recursive.
+ parser.add_argument('--helper', help='Name of helper.')
+ parser.add_argument(
+ '--dest',
+ help='Destination where the files are installed.')
+ parser.add_argument(
+ 'sources', nargs='*',
+ help='Source file/directory paths to be installed.')
+
+ args = parser.parse_args()
+
+ # Encode back to the original byte stream. Please see
+ # http://bugs.python.org/issue8776.
+ if sys.version_info.major >= 3:
+ args.dest = os.fsencode(args.dest)
+ args.sources = [os.fsencode(source) for source in args.sources]
+
+ return args
+
+
+def _install_dir(args, install_runner, source):
+ """Installs directory at |source|.
+
+ Returns:
+ True on success, False on failure, or None on skipped.
+ """
+ if not args.recursive:
+ if args.helper == 'dodoc':
+ print('!!! %s: %s is a directory' % (
+ args.helper, source),
+ file=sys.stderr)
+ return False
+ # Neither success nor fail. Return None to indicate skipped.
+ return None
+
+ # Strip trailing '/'s.
+ source = source.rstrip(b'/')
+ source_root = os.path.dirname(source)
+ dest_dir = os.path.join(args.dest, os.path.basename(source))
+ install_runner.install_dir(dest_dir)
+
+ relpath_list = []
+ for dirpath, dirnames, filenames in os.walk(source):
+ for dirname in dirnames:
+ relpath = os.path.relpath(
+ os.path.join(dirpath, dirname),
+ source_root)
+ dest = os.path.join(args.dest, relpath)
+ install_runner.install_dir(dest)
+ relpath_list.extend(
+ os.path.relpath(
+ os.path.join(dirpath, filename), source_root)
+ for filename in filenames)
+
+ if not relpath_list:
+ # NOTE: Even if only an empty directory is installed here, it
+ # still counts as success, since an empty directory given as
+ # an argument to doins -r should not trigger failure.
+ return True
+ success = True
+ for relpath in relpath_list:
+ if not _doins(args, install_runner, relpath, source_root):
+ success = False
+ return success
+
+
+def main():
+ args = _parse_args()
+ install_runner = _InstallRunner()
+
+ if not os.path.isdir(args.dest):
+ install_runner.install_dir(args.dest)
+
+ any_success = False
+ any_failure = False
+ for source in args.sources:
+ if (os.path.isdir(source) and
+ (not args.preserve_symlinks or
+ not os.path.islink(source))):
+ ret = _install_dir(args, install_runner, source)
+ if ret is None:
+ continue
+ if ret:
+ any_success = True
+ else:
+ any_failure = True
+ else:
+ if _doins(
+ args, install_runner,
+ os.path.basename(source),
+ os.path.dirname(source)):
+ any_success = True
+ else:
+ any_failure = True
+
+ return 0 if not any_failure and any_success else 1
+
+
+if __name__ == '__main__':
+ sys.exit(main())
diff --git a/bin/ebuild-helpers/doins b/bin/ebuild-helpers/doins
index 93052c2ea..de559780c 100755
--- a/bin/ebuild-helpers/doins
+++ b/bin/ebuild-helpers/doins
@@ -24,10 +24,10 @@ if [ $# -lt 1 ] ; then
 fi
 
 if [[ "$1" == "-r" ]] ; then
- DOINSRECUR=y
+ RECURSIVE_OPTION='--recursive'
  shift
 else
- DOINSRECUR=n
+ RECURSIVE_OPTION=''
 fi
 
 if ! ___eapi_has_prefix_variables; then
@@ -44,116 +44,17 @@ if [[ ${INSDESTTREE#${ED}} != "${INSDESTTREE}" ]]; then
 fi
 
 if ___eapi_doins_and_newins_preserve_symlinks; then
- PRESERVE_SYMLINKS=y
+ SYMLINK_OPTION='--preserve_symlinks'
 else
- PRESERVE_SYMLINKS=n
+ SYMLINK_OPTION=''
 fi
 
-export TMP=$(mktemp -d "${T}/.doins_tmp_XXXXXX")
-# Use separate directories to avoid potential name collisions.
-mkdir -p "$TMP"/{1,2}
+# Use safe cwd, avoiding unsafe import for bug #469338.
+export __PORTAGE_HELPER_CWD=${PWD}
+cd "${PORTAGE_PYM_PATH:-/usr/lib/portage/pym}"
 
-[[ ! -d ${ED}${INSDESTTREE} ]] && dodir "${INSDESTTREE}"
-
-_doins() {
- local mysrc="$1" mydir="$2" cleanup="" rval
-
- if [ -L "$mysrc" ] ; then
- # Our fake $DISTDIR contains symlinks that should
- # not be reproduced inside $D. In order to ensure
- # that things like dodoc "$DISTDIR"/foo.pdf work
- # as expected, we dereference symlinked files that
- # refer to absolute paths inside
- # $PORTAGE_ACTUAL_DISTDIR/.
- if [ $PRESERVE_SYMLINKS = y ] && \
- ! [[ $(readlink "$mysrc") == "$PORTAGE_ACTUAL_DISTDIR"/* ]] ; then
- rm -rf "${ED}$INSDESTTREE/$mydir/${mysrc##*/}" || return $?
- cp -P "$mysrc" "${ED}$INSDESTTREE/$mydir/${mysrc##*/}"
- return $?
- else
- cp "$mysrc" "$TMP/2/${mysrc##*/}" || return $?
- mysrc="$TMP/2/${mysrc##*/}"
- cleanup=$mysrc
- fi
- fi
-
- install ${INSOPTIONS} "${mysrc}" "${ED}${INSDESTTREE}/${mydir}"
- rval=$?
- [[ -n ${cleanup} ]] && rm -f "${cleanup}"
- [ $rval -ne 0 ] && echo "!!! ${helper}: $mysrc does not exist" 1>&2
- return $rval
-}
-
-_xdoins() {
- local -i failed=0
- while read -r -d $'\0' x ; do
- _doins "$x" "${x%/*}"
- ((failed|=$?))
- done
- return $failed
-}
-
-success=0
-failed=0
-
-for x in "$@" ; do
- if [[ $PRESERVE_SYMLINKS = n && -d $x ]] || \
- [[ $PRESERVE_SYMLINKS = y && -d $x && ! -L $x ]] ; then
- if [ "${DOINSRECUR}" == "n" ] ; then
- if [[ ${helper} == dodoc ]] ; then
- echo "!!! ${helper}: $x is a directory" 1>&2
- ((failed|=1))
- fi
- continue
- fi
-
- while [ "$x" != "${x%/}" ] ; do
- x=${x%/}
- done
- if [ "$x" = "${x%/*}" ] ; then
- pushd "$PWD" >/dev/null
- else
- pushd "${x%/*}" >/dev/null
- fi
- x=${x##*/}
- x_orig=$x
- # Follow any symlinks recursively until we've got
- # a normal directory for 'find' to traverse. The
- # name of the symlink will be used for the name
- # of the installed directory, as discussed in
- # bug #239529.
- while [ -L "$x" ] ; do
- pushd "$(readlink "$x")" >/dev/null
- x=${PWD##*/}
- pushd "${PWD%/*}" >/dev/null
- done
- if [[ $x != $x_orig ]] ; then
- mv "$x" "$TMP/1/$x_orig"
- pushd "$TMP/1" >/dev/null
- fi
- find "$x_orig" -type d -exec dodir "${INSDESTTREE}/{}" \;
- find "$x_orig" \( -type f -or -type l \) -print0 | _xdoins
- if [[ ${PIPESTATUS[1]} -eq 0 ]] ; then
- # NOTE: Even if only an empty directory is installed here, it
- # still counts as success, since an empty directory given as
- # an argument to doins -r should not trigger failure.
- ((success|=1))
- else
- ((failed|=1))
- fi
- if [[ $x != $x_orig ]] ; then
- popd >/dev/null
- mv "$TMP/1/$x_orig" "$x"
- fi
- while popd >/dev/null 2>&1 ; do true ; done
- else
- _doins "${x}"
- if [[ $? -eq 0 ]] ; then
- ((success|=1))
- else
- ((failed|=1))
- fi
- fi
-done
-rm -rf "$TMP"
-[[ $failed -ne 0 || $success -eq 0 ]] && { __helpers_die "${helper} failed"; exit 1; } || exit 0
+"${PORTAGE_PYTHON:-/usr/bin/python}" \
+ "${PORTAGE_BIN_PATH:-/usr/lib/portage/bin}"/doins.py \
+ ${RECURSIVE_OPTION} ${SYMLINK_OPTION} \
+ --helper "${helper}" --dest "${ED}${INSDESTTREE}" "$@" || \
+{ __helpers_die "${helper} failed"; exit 1; }
--
2.13.0


Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Rewrite doins in python (bug 624526)

M. J. Everitt
On 14/08/17 08:39, Zac Medico wrote:

> From: Hidehiko Abe <[hidden email]>
>
> doins is written in bash. However, specifically in case that
> too many files are installed, it is very slow.
> This CL rewrites the script in python for performance.
>
> BUG=chromium:712659
> TEST=time (./setup_board --forace && \
>      ./build_package --withdev && \
>      ./build_image --noenable_rootfs_verification test)
> ===Before===
> real    21m35.445s
> user    93m40.588s
> sys     21m31.224s
>
> ===After===
> real    17m30.106s
> user    94m1.812s
> sys     20m13.468s
>
I know I'm gonna get chewed out on this one, but here goes anyway ...

Surely for a package like chromium, who's build time is already in the
'hours' range anyway, surely a couple of minutes gain for the install
phase is neither here nor there?! If there were some genuine filesystem
iop gains/etc for this change, I think I'd likely support it further ..

On this basis, what do the performance differences look like on an
'average' package .. and are there any regressions in this regard?!

I take issue with the copyright assignment, as I believe the legal ..
err .. IANAL devs are campaigning for full rights to be owned and
enforced by Gentoo Inc LLC in the US .. even if they have no idea what
that means or does .. :]

MJE


signature.asc (836 bytes) Download Attachment
Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Rewrite doins in python (bug 624526)

Zac Medico-2
On 08/14/2017 12:49 PM, M. J. Everitt wrote:

> On 14/08/17 08:39, Zac Medico wrote:
>> From: Hidehiko Abe <[hidden email]>
>>
>> doins is written in bash. However, specifically in case that
>> too many files are installed, it is very slow.
>> This CL rewrites the script in python for performance.
>>
>> BUG=chromium:712659
>> TEST=time (./setup_board --forace && \
>>      ./build_package --withdev && \
>>      ./build_image --noenable_rootfs_verification test)
>> ===Before===
>> real    21m35.445s
>> user    93m40.588s
>> sys     21m31.224s
>>
>> ===After===
>> real    17m30.106s
>> user    94m1.812s
>> sys     20m13.468s
>>
> I know I'm gonna get chewed out on this one, but here goes anyway ...
>
> Surely for a package like chromium, who's build time is already in the
> 'hours' range anyway, surely a couple of minutes gain for the install
> phase is neither here nor there?! If there were some genuine filesystem
> iop gains/etc for this change, I think I'd likely support it further ..

It's going to reduce time, power consumption, and heat generation for
all portage users. Also, we can use portage.util.file_copy to optimize
it further with zero-copy, reflink, and sparse file support.

> On this basis, what do the performance differences look like on an
> 'average' package

Well, it's very inefficient to fork/exec the install command for many
files as the existing bash implementation does. The performance
difference is related to the number of files.

> .. and are there any regressions in this regard?!

It's supposed to fallback to calling the install command if there are
any unrecognized options, so the intention is for 100% compatibility.

> I take issue with the copyright assignment, as I believe the legal ..
> err .. IANAL devs are campaigning for full rights to be owned and
> enforced by Gentoo Inc LLC in the US .. even if they have no idea what
> that means or does .. :]

Since it's a BSD-style license, we can copy the code into our project as
long as we retain the copyright notice.
--
Thanks,
Zac

Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Rewrite doins in python (bug 624526)

Brian Dolbec-3
On Mon, 14 Aug 2017 13:31:53 -0700
Zac Medico <[hidden email]> wrote:

> On 08/14/2017 12:49 PM, M. J. Everitt wrote:
> > On 14/08/17 08:39, Zac Medico wrote:  
> >> From: Hidehiko Abe <[hidden email]>
> >>
> >> doins is written in bash. However, specifically in case that
> >> too many files are installed, it is very slow.
> >> This CL rewrites the script in python for performance.
> >>
> >> BUG=chromium:712659
> >> TEST=time (./setup_board --forace && \
> >>      ./build_package --withdev && \
> >>      ./build_image --noenable_rootfs_verification test)
> >> ===Before===
> >> real    21m35.445s
> >> user    93m40.588s
> >> sys     21m31.224s
> >>
> >> ===After===
> >> real    17m30.106s
> >> user    94m1.812s
> >> sys     20m13.468s
> >>  
> > I know I'm gonna get chewed out on this one, but here goes
> > anyway ...
> >
> > Surely for a package like chromium, who's build time is already in
> > the 'hours' range anyway, surely a couple of minutes gain for the
> > install phase is neither here nor there?! If there were some
> > genuine filesystem iop gains/etc for this change, I think I'd
> > likely support it further ..  
>
> It's going to reduce time, power consumption, and heat generation for
> all portage users. Also, we can use portage.util.file_copy to optimize
> it further with zero-copy, reflink, and sparse file support.
>
> > On this basis, what do the performance differences look like on an
> > 'average' package  
>
> Well, it's very inefficient to fork/exec the install command for many
> files as the existing bash implementation does. The performance
> difference is related to the number of files.
>
> > .. and are there any regressions in this regard?!  
>
> It's supposed to fallback to calling the install command if there are
> any unrecognized options, so the intention is for 100% compatibility.
>
> > I take issue with the copyright assignment, as I believe the
> > legal .. err .. IANAL devs are campaigning for full rights to be
> > owned and enforced by Gentoo Inc LLC in the US .. even if they have
> > no idea what that means or does .. :]  
>
> Since it's a BSD-style license, we can copy the code into our project
> as long as we retain the copyright notice.

Patches look fine to me, glad for the speedup :)

--
Brian Dolbec <dolsen>


Reply | Threaded
Open this post in threaded view
|

Re: [PATCH] Rewrite doins in python (bug 624526)

Michał Górny-5
In reply to this post by Zac Medico-2
W dniu pon, 14.08.2017 o godzinie 00∶39 -0700, użytkownik Zac Medico
napisał:

> From: Hidehiko Abe <[hidden email]>
>
> doins is written in bash. However, specifically in case that
> too many files are installed, it is very slow.
> This CL rewrites the script in python for performance.
>
> BUG=chromium:712659
> TEST=time (./setup_board --forace && \
>      ./build_package --withdev && \
>      ./build_image --noenable_rootfs_verification test)
> ===Before===
> real    21m35.445s
> user    93m40.588s
> sys     21m31.224s
>
> ===After===
> real    17m30.106s
> user    94m1.812s
> sys     20m13.468s
>
> Change-Id: Ib10f623961ba316753d58397cff5e72fbc343339
> Reviewed-on: https://chromium-review.googlesource.com/559225
> X-Chromium-Bug: 712659
> X-Chromium-Bug-url: https://bugs.chromium.org/p/chromium/issues/detail?id=712659
> X-Gentoo-Bug: 624526
> X-Gentoo-Bug-url: https://bugs.gentoo.org/624526
> ---
>  bin/doins.py             | 341 +++++++++++++++++++++++++++++++++++++++++++++++
>  bin/ebuild-helpers/doins | 123 ++---------------
>  2 files changed, 353 insertions(+), 111 deletions(-)
>  create mode 100644 bin/doins.py
>
> diff --git a/bin/doins.py b/bin/doins.py
> new file mode 100644
> index 000000000..4a17287ca
> --- /dev/null
> +++ b/bin/doins.py
> @@ -0,0 +1,341 @@
> +#!/usr/bin/python -b
> +# Copyright 2017 The Chromium OS Authors. All rights reserved.
> +# Use of this source code is governed by a BSD-style license that can be
> +# found in the LICENSE file.
> +
> +from __future__ import print_function
> +
> +import argparse
> +import errno
> +import grp
> +import logging
> +import os
> +import pwd
> +import re
> +import shlex
> +import shutil
> +import stat
> +import subprocess
> +import sys
> +
> +from portage.util import movefile
> +
> +# Change back to original cwd _after_ all imports (bug #469338).
> +# See also bin/ebuild-helpers/doins, which sets the cwd.
> +os.chdir(os.environ["__PORTAGE_HELPER_CWD"])
> +
> +_PORTAGE_ACTUAL_DISTDIR = (
> + (os.environb if sys.version_info.major >= 3 else os.environ).get(
> + b'PORTAGE_ACTUAL_DISTDIR', b'') + b'/')
> +
> +
> +def _parse_install_options(
> + options, inprocess_runner_class, subprocess_runner_class):
> + """Parses command line arguments for install command."""
> + parser = argparse.ArgumentParser()
> + parser.add_argument('-g', '--group', default=-1, type=_parse_group)
> + parser.add_argument('-o', '--owner', default=-1, type=_parse_user)
> + # "install"'s --mode option is complicated. So here is partially
> + # supported.
> + parser.add_argument('-m', '--mode', type=_parse_mode)
> + split_options = shlex.split(options)
> + namespace, remaining = parser.parse_known_args(split_options)
> + if remaining:
> + print('Unknown install options: %s, %r' % (options, remaining),
> + file=sys.stderr)
> + if os.environ.get('DOINSSTRICTOPTION', '') == '1':
> + sys.exit(1)
> + print('Continue with falling back to \'install\' '
> + 'command execution, which can be slower.',
> + file=sys.stderr)
> + return subprocess_runner_class(split_options)
> + return inprocess_runner_class(namespace)
> +
> +
> +def _parse_group(group):
> + """Parses gid."""
> + g = grp.getgrnam(group)
> + if g:
> + return g.gr_gid
> + return int(group)
> +
> +
> +def _parse_user(user):
> + """Parses uid."""
> + u = pwd.getpwnam(user)
> + if u:
> + return u.pw_uid
> + return int(user)
> +
> +
> +def _parse_mode(mode):
> + # "install"'s --mode option is complicated. So here is partially
> + # supported.
> + # In Python 3, the prefix of octal int must be '0o' rather than '0'.
> + # So, set base explicitly in that case.
> + return int(mode, 8 if re.search(r'^0[0-7]*$', mode) else 0)
> +
> +
> +def _set_attributes(options, path):
> + """Sets attributes the file/dir at given |path|.
> +
> + Args:
> + options: object which has |owner|, |group| and |mode| fields.
> + |owner| is int value representing uid. Similary |group|
> + represents gid.
> + If -1 is set, just unchanged.
> + |mode| is the bits of permissions.
> + path: File/directory path.
> + """
> + if options.owner != -1 or options.group != -1:
> + os.lchown(path, options.owner, options.group)
> + if options.mode is not None:
> + os.chmod(path, options.mode)
> +
> +
> +class _InsInProcessInstallRunner(object):
> + def __init__(self, parsed_options):
> + self._parsed_options = parsed_options
> + self._copy_xattr = (
> + 'xattr' in os.environ.get('FEATURES', '').split())
> + if self._copy_xattr:
> + self._xattr_exclude = os.environ.get(
> + 'PORTAGE_XATTR_EXCLUDE',
> + 'security.* system.nfs4_acl')
> +
> + def run(self, source, dest_dir):
> + """Installs a file at |source| into |dest_dir| in process."""
> + dest = os.path.join(dest_dir, os.path.basename(source))
> + try:
> + # TODO: Consider to use portage.util.file_copy.copyfile
> + # introduced by
> + # https://gitweb.gentoo.org/proj/portage.git/commit/
> + #   ?id=8ab5c8835931fd9ec098dbf4c5f416eb32e4a3a4
> + # after uprev.
> + shutil.copyfile(source, dest)
> + _set_attributes(self._parsed_options, dest)
> + if self._copy_xattr:
> + movefile._copyxattr(
> + source, dest,
> + exclude=self._xattr_exclude)
> + except Exception:
> + logging.exception(
> + 'Failed to copy file: '
> + '_parsed_options=%r, source=%r, dest_dir=%r',
> + self._parsed_options, source, dest_dir)
> + return False
> + return True
> +
> +
> +class _InsSubprocessInstallRunner(object):
> + def __init__(self, split_options):
> + self._split_options = split_options
> +
> + def run(self, source, dest_dir):
> + """Installs a file at |source| into |dest_dir| by 'install'."""
> + command = ['install'] + self._split_options + [source, dest_dir]
> + return subprocess.call(command) == 0
> +
> +
> +class _DirInProcessInstallRunner(object):
> + def __init__(self, parsed_options):
> + self._parsed_options = parsed_options
> +
> + def run(self, dest):
> + """Installs a dir into |dest| in process."""
> + try:
> + os.makedirs(dest)
> + except OSError as e:
> + if e.errno != errno.EEXIST or not os.path.isdir(dest):
> + raise
> + _set_attributes(self._parsed_options, dest)
> +
> +
> +class _DirSubprocessInstallRunner(object):
> + """Runs real 'install' command to install files or directories."""
> +
> + def __init__(self, split_options):
> + self._split_options = split_options
> +
> + def run(self, dest):
> + """Installs a dir into |dest| by 'install' command."""
> + command = ['install', '-d'] + self._split_options + [dest]
> + subprocess.call(command)
> +
> +
> +class _InstallRunner(object):
> + def __init__(self):
> + self._ins_runner = _parse_install_options(
> + os.environ.get('INSOPTIONS', ''),
> + _InsInProcessInstallRunner,
> + _InsSubprocessInstallRunner)
> + self._dir_runner = _parse_install_options(
> + os.environ.get('DIROPTIONS', ''),
> + _DirInProcessInstallRunner,
> + _DirSubprocessInstallRunner)
> +
> + def install_file(self, source, dest_dir):
> + """Installs a file at |source| into |dest_dir| directory."""
> + return self._ins_runner.run(source, dest_dir)
> +
> + def install_dir(self, dest):
> + """Creates a directory at |dest|."""
> + self._dir_runner.run(dest)
> +
> +
> +def _doins(args, install_runner, relpath, source_root):
> + """Installs a file as if 'install' command runs.
> +
> + Installs a file at |source_root|/|relpath| into |args.dest|/|relpath|.
> + If |args.preserve_symlinks| is set, creates symlink if the source is a
> + symlink.
> +
> + Args:
> + args: parsed arguments. It should have following fields.
> + - preserve_symlinks: bool representing whether symlinks
> + needs to be preserved.
> + - dest: Destination root directory.
> + - insoptions: options for 'install' command.
> + intall_runner: _InstallRunner instance for file install.
> + relpath: Relative path of the file being installed.
> + source_root: Source root directory.
> +
> + Returns: True on success.
> + """
> + source = os.path.join(source_root, relpath)
> + dest = os.path.join(args.dest, relpath)
> + if os.path.islink(source):
> + # Our fake $DISTDIR contains symlinks that should not be
> + # reproduced inside $D. In order to ensure that things like
> + # dodoc "$DISTDIR"/foo.pdf work as expected, we dereference
> + # symlinked files that refer to absolute paths inside
> + # $PORTAGE_ACTUAL_DISTDIR/.
> + try:
> + if (args.preserve_symlinks and
> + not os.readlink(source).startswith(
> + _PORTAGE_ACTUAL_DISTDIR)):
> + linkto = os.readlink(source)
> + shutil.rmtree(dest, ignore_errors=True)
> + os.symlink(linkto, dest)
> + return True
> + except Exception:
> + logging.exception(
> + 'Failed to create symlink: '
> + 'args=%r, relpath=%r, source_root=%r',
> + args, relpath, source_root)
> + return False
> +
> + return install_runner.install_file(source, os.path.dirname(dest))
> +
> +
> +def _parse_args():
> + """Parses the command line arguments."""
> + parser = argparse.ArgumentParser()
> + parser.add_argument(
> + '--recursive', action='store_true',
> + help='If set, installs files recursively. Otherwise, '
> + 'just skips directories.')
> + parser.add_argument(
> + '--preserve_symlinks', action='store_true',
> + help='If set, a symlink will be installed as symlink.')
> + # If helper is dodoc, it changes the behavior for the directory
> + # install without --recursive.
> + parser.add_argument('--helper', help='Name of helper.')
> + parser.add_argument(
> + '--dest',
> + help='Destination where the files are installed.')
> + parser.add_argument(
> + 'sources', nargs='*',
> + help='Source file/directory paths to be installed.')
> +
> + args = parser.parse_args()
> +
> + # Encode back to the original byte stream. Please see
> + # http://bugs.python.org/issue8776.
> + if sys.version_info.major >= 3:
> + args.dest = os.fsencode(args.dest)
> + args.sources = [os.fsencode(source) for source in args.sources]
> +
> + return args
> +
> +
> +def _install_dir(args, install_runner, source):
> + """Installs directory at |source|.
> +
> + Returns:
> + True on success, False on failure, or None on skipped.
> + """
> + if not args.recursive:
> + if args.helper == 'dodoc':
> + print('!!! %s: %s is a directory' % (
> + args.helper, source),
> + file=sys.stderr)
> + return False
> + # Neither success nor fail. Return None to indicate skipped.
> + return None
> +
> + # Strip trailing '/'s.
> + source = source.rstrip(b'/')
> + source_root = os.path.dirname(source)
> + dest_dir = os.path.join(args.dest, os.path.basename(source))
> + install_runner.install_dir(dest_dir)
> +
> + relpath_list = []
> + for dirpath, dirnames, filenames in os.walk(source):
> + for dirname in dirnames:
> + relpath = os.path.relpath(
> + os.path.join(dirpath, dirname),
> + source_root)
> + dest = os.path.join(args.dest, relpath)
> + install_runner.install_dir(dest)
> + relpath_list.extend(
> + os.path.relpath(
> + os.path.join(dirpath, filename), source_root)
> + for filename in filenames)
> +
> + if not relpath_list:
> + # NOTE: Even if only an empty directory is installed here, it
> + # still counts as success, since an empty directory given as
> + # an argument to doins -r should not trigger failure.
> + return True
> + success = True
> + for relpath in relpath_list:
> + if not _doins(args, install_runner, relpath, source_root):
> + success = False
> + return success
> +
> +
> +def main():
> + args = _parse_args()
> + install_runner = _InstallRunner()
> +
> + if not os.path.isdir(args.dest):
> + install_runner.install_dir(args.dest)
> +
> + any_success = False
> + any_failure = False
> + for source in args.sources:
> + if (os.path.isdir(source) and
> + (not args.preserve_symlinks or
> + not os.path.islink(source))):
> + ret = _install_dir(args, install_runner, source)
> + if ret is None:
> + continue
> + if ret:
> + any_success = True
> + else:
> + any_failure = True
> + else:
> + if _doins(
> + args, install_runner,
> + os.path.basename(source),
> + os.path.dirname(source)):
> + any_success = True
> + else:
> + any_failure = True
> +
> + return 0 if not any_failure and any_success else 1
> +
> +
> +if __name__ == '__main__':
> + sys.exit(main())
> diff --git a/bin/ebuild-helpers/doins b/bin/ebuild-helpers/doins
> index 93052c2ea..de559780c 100755
> --- a/bin/ebuild-helpers/doins
> +++ b/bin/ebuild-helpers/doins
> @@ -24,10 +24,10 @@ if [ $# -lt 1 ] ; then
>  fi
>  
>  if [[ "$1" == "-r" ]] ; then
> - DOINSRECUR=y
> + RECURSIVE_OPTION='--recursive'
>   shift
>  else
> - DOINSRECUR=n
> + RECURSIVE_OPTION=''
>  fi
>  
>  if ! ___eapi_has_prefix_variables; then
> @@ -44,116 +44,17 @@ if [[ ${INSDESTTREE#${ED}} != "${INSDESTTREE}" ]]; then
>  fi
>  
>  if ___eapi_doins_and_newins_preserve_symlinks; then
> - PRESERVE_SYMLINKS=y
> + SYMLINK_OPTION='--preserve_symlinks'
>  else
> - PRESERVE_SYMLINKS=n
> + SYMLINK_OPTION=''
>  fi
>  
> -export TMP=$(mktemp -d "${T}/.doins_tmp_XXXXXX")
> -# Use separate directories to avoid potential name collisions.
> -mkdir -p "$TMP"/{1,2}
> +# Use safe cwd, avoiding unsafe import for bug #469338.
> +export __PORTAGE_HELPER_CWD=${PWD}
> +cd "${PORTAGE_PYM_PATH:-/usr/lib/portage/pym}"
>  
> -[[ ! -d ${ED}${INSDESTTREE} ]] && dodir "${INSDESTTREE}"
> -
> -_doins() {
> - local mysrc="$1" mydir="$2" cleanup="" rval
> -
> - if [ -L "$mysrc" ] ; then
> - # Our fake $DISTDIR contains symlinks that should
> - # not be reproduced inside $D. In order to ensure
> - # that things like dodoc "$DISTDIR"/foo.pdf work
> - # as expected, we dereference symlinked files that
> - # refer to absolute paths inside
> - # $PORTAGE_ACTUAL_DISTDIR/.
> - if [ $PRESERVE_SYMLINKS = y ] && \
> - ! [[ $(readlink "$mysrc") == "$PORTAGE_ACTUAL_DISTDIR"/* ]] ; then
> - rm -rf "${ED}$INSDESTTREE/$mydir/${mysrc##*/}" || return $?
> - cp -P "$mysrc" "${ED}$INSDESTTREE/$mydir/${mysrc##*/}"
> - return $?
> - else
> - cp "$mysrc" "$TMP/2/${mysrc##*/}" || return $?
> - mysrc="$TMP/2/${mysrc##*/}"
> - cleanup=$mysrc
> - fi
> - fi
> -
> - install ${INSOPTIONS} "${mysrc}" "${ED}${INSDESTTREE}/${mydir}"
> - rval=$?
> - [[ -n ${cleanup} ]] && rm -f "${cleanup}"
> - [ $rval -ne 0 ] && echo "!!! ${helper}: $mysrc does not exist" 1>&2
> - return $rval
> -}
> -
> -_xdoins() {
> - local -i failed=0
> - while read -r -d $'\0' x ; do
> - _doins "$x" "${x%/*}"
> - ((failed|=$?))
> - done
> - return $failed
> -}
> -
> -success=0
> -failed=0
> -
> -for x in "$@" ; do
> - if [[ $PRESERVE_SYMLINKS = n && -d $x ]] || \
> - [[ $PRESERVE_SYMLINKS = y && -d $x && ! -L $x ]] ; then
> - if [ "${DOINSRECUR}" == "n" ] ; then
> - if [[ ${helper} == dodoc ]] ; then
> - echo "!!! ${helper}: $x is a directory" 1>&2
> - ((failed|=1))
> - fi
> - continue
> - fi
> -
> - while [ "$x" != "${x%/}" ] ; do
> - x=${x%/}
> - done
> - if [ "$x" = "${x%/*}" ] ; then
> - pushd "$PWD" >/dev/null
> - else
> - pushd "${x%/*}" >/dev/null
> - fi
> - x=${x##*/}
> - x_orig=$x
> - # Follow any symlinks recursively until we've got
> - # a normal directory for 'find' to traverse. The
> - # name of the symlink will be used for the name
> - # of the installed directory, as discussed in
> - # bug #239529.
> - while [ -L "$x" ] ; do
> - pushd "$(readlink "$x")" >/dev/null
> - x=${PWD##*/}
> - pushd "${PWD%/*}" >/dev/null
> - done
> - if [[ $x != $x_orig ]] ; then
> - mv "$x" "$TMP/1/$x_orig"
> - pushd "$TMP/1" >/dev/null
> - fi
> - find "$x_orig" -type d -exec dodir "${INSDESTTREE}/{}" \;
> - find "$x_orig" \( -type f -or -type l \) -print0 | _xdoins
> - if [[ ${PIPESTATUS[1]} -eq 0 ]] ; then
> - # NOTE: Even if only an empty directory is installed here, it
> - # still counts as success, since an empty directory given as
> - # an argument to doins -r should not trigger failure.
> - ((success|=1))
> - else
> - ((failed|=1))
> - fi
> - if [[ $x != $x_orig ]] ; then
> - popd >/dev/null
> - mv "$TMP/1/$x_orig" "$x"
> - fi
> - while popd >/dev/null 2>&1 ; do true ; done
> - else
> - _doins "${x}"
> - if [[ $? -eq 0 ]] ; then
> - ((success|=1))
> - else
> - ((failed|=1))
> - fi
> - fi
> -done
> -rm -rf "$TMP"
> -[[ $failed -ne 0 || $success -eq 0 ]] && { __helpers_die "${helper} failed"; exit 1; } || exit 0
> +"${PORTAGE_PYTHON:-/usr/bin/python}" \
> + "${PORTAGE_BIN_PATH:-/usr/lib/portage/bin}"/doins.py \
> + ${RECURSIVE_OPTION} ${SYMLINK_OPTION} \
> + --helper "${helper}" --dest "${ED}${INSDESTTREE}" "$@" || \
> +{ __helpers_die "${helper} failed"; exit 1; }

To be honest, I don't like the idea of using more Python inside ebuild
helpers. But if you're sure this is safe and not going to collide with
ebuilds doing random stuff with Python, feel free to proceed with it.

--
Best regards,
Michał Górny