buildbot/repo.py

352 lines
14 KiB
Python
Raw Normal View History

2019-04-02 22:03:28 +08:00
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
# repo.py: Automatic management tool for an arch repo.
# This file is part of Buildbot by JerryXiao
# Directory structure of the repo:
# buildbot -- buildbot (git)
# buildbot/repo -- repo root
# /updates/ -- new packages goes in here
2019-04-03 18:10:00 +08:00
# /recycled/ -- litter bin
# /archive/ -- archive dir, old packages goes in here
2019-04-02 22:03:28 +08:00
# /www/ -- http server root
2019-04-03 18:10:00 +08:00
# /www/archive => /archive -- archive dir for users
2019-04-02 22:03:28 +08:00
# /www/aarch64 -- packages for "aarch64"
# /www/any -- packages for "any"
# /www/armv7h -- packages for "armv7h" (No build bot)
# /www/x86_64 -- packages for "x86_64"
# /www/robots.txt => /r_r_n/r.txt -- robots.txt
import os
from pathlib import Path
2019-04-03 18:10:00 +08:00
from shutil import copyfile as __copy_file
2019-04-02 22:03:28 +08:00
import logging
2019-04-03 18:10:00 +08:00
from utils import bash, Pkg, get_pkg_details_from_name, \
print_exc_plus
2019-04-02 22:03:28 +08:00
from time import time
2019-04-03 20:47:50 +08:00
from config import REPO_NAME, PKG_COMPRESSION, ARCHS, REPO_CMD, \
REPO_REMOVE_CMD
from shared_vars import PKG_SUFFIX, PKG_SIG_SUFFIX
2019-04-02 22:03:28 +08:00
abspath = os.path.abspath(__file__)
repocwd = Path(abspath).parent / 'repo'
repocwd.mkdir(mode=0o755, exist_ok=True)
os.chdir(repocwd)
logger = logging.getLogger(__name__)
def symlink(dst, src, exist_ok=True):
assert issubclass(type(dst), os.PathLike) and type(src) is str
try:
dst.symlink_to(src)
except FileExistsError:
2019-04-03 18:10:00 +08:00
if (not dst.is_symlink()) or (not exist_ok):
2019-04-02 22:03:28 +08:00
raise
2019-04-03 18:10:00 +08:00
def copyfile(src, dst):
src = str(src)
dst = str(dst)
__copy_file(src, dst, follow_symlinks=False)
def prepare_env():
dirs = [Path('updates/'), Path('archive/'), Path('recycled/')] + \
[Path('www/') / arch for arch in ARCHS]
2019-04-02 22:03:28 +08:00
for mydir in dirs:
mydir.mkdir(mode=0o755, exist_ok=True, parents=True)
2019-04-03 18:10:00 +08:00
symlink(Path('www/archive'), '../archive')
prepare_env()
2019-04-02 22:03:28 +08:00
2019-04-02 22:29:38 +08:00
def repo_add(fpaths):
assert type(fpaths) is list
2019-04-03 20:47:50 +08:00
assert not [None for fpath in fpaths if fpath.parent != fpaths[0].parent]
2019-04-02 22:29:38 +08:00
for fpath in fpaths:
assert issubclass(type(fpath), os.PathLike) and \
fpath.name.endswith(PKG_SUFFIX)
2019-04-03 20:47:50 +08:00
dbpath = fpaths[0].parent / f'{REPO_NAME}.db.tar.gz'
return bash(f'{REPO_CMD} {dbpath} {" ".join([str(fpath) for fpath in fpaths])}', RUN_CMD_TIMEOUT=5*60)
2019-04-02 22:03:28 +08:00
2019-04-03 20:47:50 +08:00
def repo_remove(fpaths):
assert type(fpaths) is list
assert not [None for fpath in fpaths if fpath.parent != fpaths[0].parent]
for fpath in fpaths:
assert issubclass(type(fpath), os.PathLike) and \
fpath.name.endswith(PKG_SUFFIX)
dbpath = fpaths[0].parent / f'{REPO_NAME}.db.tar.gz'
for fpath in fpaths:
throw_away(fpath)
2019-04-05 00:45:50 +08:00
sigpath = fpath.parent / f'{fpath.name}.sig'
2019-04-03 21:02:24 +08:00
# there is a fscking problem that fscking pathlib always follow symlinks
if sigpath.exists() or sigpath.is_symlink():
2019-04-03 20:47:50 +08:00
throw_away(sigpath)
pkgnames = [get_pkg_details_from_name(fpath.name).pkgname for fpath in fpaths]
return bash(f'{REPO_REMOVE_CMD} {dbpath} {" ".join(pkgnames)}', RUN_CMD_TIMEOUT=5*60)
2019-04-02 22:03:28 +08:00
def throw_away(fpath):
assert issubclass(type(fpath), os.PathLike)
2019-04-03 18:10:00 +08:00
newPath = Path('recycled') / f"{fpath.name}_{time()}"
2019-04-02 22:03:28 +08:00
assert not newPath.exists()
logger.warning('Throwing away %s', fpath)
2019-04-02 22:03:28 +08:00
fpath.rename(newPath)
2019-04-03 18:10:00 +08:00
def archive_pkg(fpath):
assert issubclass(type(fpath), os.PathLike)
if fpath.is_symlink():
logger.warning('Not archiving symlink %s', fpath)
throw_away(fpath)
return
newPath = Path('archive') / fpath.name
2019-04-05 17:39:09 +08:00
if newPath.exists():
logger.warning(f'Removing old archive {newPath}')
throw_away(newPath)
2019-04-03 18:10:00 +08:00
logger.warning('Archiving %s', fpath)
fpath.rename(newPath)
def filter_old_pkg(fpaths, keep_new=1, archive=False, recycle=False):
'''
Accepts a list of paths (must be in the same dir)
return a tuple of list of paths
([new1, new2], [old1, old2])
packages are arranged from new to old, one by one.
new: pkga-v8, pkga-v7, pkgb-v5, pkgb-v4
old: pkga-v6, pkga-v5, pkgb-v3, pkgb-v2
(assume keep_new=2)
'''
if not fpaths:
return (list(), list())
assert type(fpaths) is list
for fpath in fpaths:
assert issubclass(type(fpath), os.PathLike) and \
fpath.name.endswith(PKG_SUFFIX)
assert not (archive and recycle)
assert not [None for fpath in fpaths if fpath.parent != fpaths[0].parent]
new_pkgs = list()
old_pkgs = list()
pkgs_vers = dict()
for fpath in fpaths:
pkg = get_pkg_details_from_name(fpath.name)
pkgs_vers.setdefault(pkg.pkgname + pkg.arch, list()).append(pkg)
for pkgname_arch in pkgs_vers:
family = pkgs_vers[pkgname_arch]
2019-04-03 18:10:00 +08:00
# new packages first
family = sorted(family, reverse=True)
if len(family) > keep_new:
new_pkgs += family[:keep_new]
old_pkgs += family[keep_new:]
else:
new_pkgs += family
for pkg in old_pkgs:
fullpath = fpaths[0].parent / pkg.fname
2019-04-05 00:45:50 +08:00
sigpath = fpaths[0].parent / f'{pkg.fname}.sig'
2019-04-03 18:10:00 +08:00
if archive:
archive_pkg(fullpath)
2019-04-03 18:49:07 +08:00
if sigpath.exists():
archive_pkg(sigpath)
2019-04-03 18:10:00 +08:00
elif recycle:
throw_away(fullpath)
2019-04-03 18:49:07 +08:00
if sigpath.exists():
throw_away(sigpath)
2019-04-03 18:10:00 +08:00
return (new_pkgs, old_pkgs)
2019-04-03 18:10:00 +08:00
def _clean_archive(keep_new=3):
logger.info('starting clean')
2019-04-03 18:10:00 +08:00
basedir = Path('archive')
2019-04-03 18:49:07 +08:00
dir_list = [fpath for fpath in basedir.iterdir() if fpath.name.endswith(PKG_SUFFIX)]
2019-04-03 18:10:00 +08:00
filter_old_pkg(dir_list, keep_new=keep_new, recycle=True)
logger.info('finished clean')
2019-04-05 00:21:12 +08:00
return True
def _regenerate(target_archs=ARCHS, just_symlink=False):
if just_symlink:
logger.info('starting regenerate symlinks %s', target_archs)
else:
logger.info('starting regenerate %s', target_archs)
2019-04-02 22:03:28 +08:00
rn = REPO_NAME
repo_files = (f"{rn}.db {rn}.db.tar.gz {rn}.db.tar.gz.old "
f"{rn}.files {rn}.files.tar.gz {rn}.files.tar.gz.old")
repo_files = repo_files.split(' ')
repo_files_essential = [fname for fname in repo_files if not fname.endswith('.old')]
assert repo_files_essential
# make symlink for arch=any pkgs
basedir = Path('www') / 'any'
if basedir.exists():
for pkgfile in basedir.iterdir():
if pkgfile.name.endswith(PKG_SUFFIX) and \
2019-04-02 22:03:28 +08:00
get_pkg_details_from_name(pkgfile.name).arch == 'any':
2019-04-05 00:45:50 +08:00
sigfile = Path(f"{pkgfile}.sig")
2019-04-02 22:03:28 +08:00
if sigfile.exists():
logger.info(f'Creating symlink for {pkgfile}, {sigfile}')
for arch in target_archs:
2019-04-02 22:03:28 +08:00
if arch == 'any':
continue
symlink(pkgfile.parent / '..' / arch / pkgfile.name, f'../any/{pkgfile.name}')
symlink(sigfile.parent / '..' / arch / sigfile.name, f'../any/{sigfile.name}')
else:
logger.error(f'{arch} dir does not exist!')
if just_symlink:
2019-04-05 00:21:12 +08:00
return True
2019-04-02 22:03:28 +08:00
# run repo_add
for arch in target_archs:
2019-04-02 22:03:28 +08:00
basedir = Path('www') / arch
repo_files_count = list()
pkgs_to_add = list()
2019-04-02 22:03:28 +08:00
if not basedir.exists():
logger.error(f'{arch} dir does not exist!')
continue
2019-04-03 18:55:23 +08:00
filter_old_pkg([f for f in basedir.iterdir() if f.name.endswith(PKG_SUFFIX)],
keep_new=1, recycle=True)
2019-04-02 22:03:28 +08:00
pkgfiles = [f for f in basedir.iterdir()]
for pkgfile in pkgfiles:
if pkgfile.name in repo_files:
repo_files_count.append(pkgfile.name)
continue
if pkgfile.name.endswith(PKG_SIG_SUFFIX):
2019-04-02 22:03:28 +08:00
if not Path(str(pkgfile)[:-4]).exists() and pkgfile.exists():
logger.warning(f"{pkgfile} has no package!")
throw_away(pkgfile)
continue
elif pkgfile.name.endswith(PKG_SUFFIX):
2019-04-05 00:45:50 +08:00
sigfile = Path(f"{pkgfile}.sig")
2019-04-02 22:03:28 +08:00
if not sigfile.exists():
logger.warning(f"{pkgfile} has no signature!")
throw_away(pkgfile)
continue
realarch = get_pkg_details_from_name(pkgfile.name).arch
if realarch != 'any' and realarch != arch:
newpath = pkgfile.parent / '..' / realarch / pkgfile.name
2019-04-05 00:45:50 +08:00
newSigpath= Path(f'{newpath}.sig')
logger.info(f'Moving {pkgfile} to {newpath}, {sigfile} to {newSigpath}')
assert not (newpath.exists() or newSigpath.exists())
2019-04-02 22:03:28 +08:00
pkgfile.rename(newpath)
sigfile.rename(newSigpath)
pkgs_to_add.append(newpath)
2019-04-02 22:03:28 +08:00
else:
pkgs_to_add.append(pkgfile)
2019-04-02 22:03:28 +08:00
else:
logger.warning(f"{pkgfile} is garbage!")
throw_away(pkgfile)
if pkgs_to_add:
logger.info("repo-add: %s", repo_add(pkgs_to_add))
2019-04-02 22:29:38 +08:00
else:
logger.warning('repo-add: Nothing to do in %s', arch)
2019-04-02 22:03:28 +08:00
for rfile in repo_files_essential:
if rfile not in repo_files_count:
logger.error(f'{rfile} does not exist in {arch}!')
logger.info('finished regenerate')
2019-04-05 00:21:12 +08:00
return True
2019-04-02 22:03:28 +08:00
2019-04-05 00:45:50 +08:00
def _update(overwrite=False):
logger.info('starting update')
update_path = Path('updates')
assert update_path.exists()
pkgs_to_add = dict()
2019-04-03 18:55:23 +08:00
filter_old_pkg([f for f in update_path.iterdir() if f.name.endswith(PKG_SUFFIX)],
keep_new=1, archive=True)
for pkg_to_add in update_path.iterdir():
if pkg_to_add.is_dir():
continue
else:
if pkg_to_add.name.endswith(PKG_SUFFIX):
2019-04-05 00:45:50 +08:00
sigfile = Path(f"{pkg_to_add}.sig")
if sigfile.exists():
2019-04-03 12:39:33 +08:00
arch = get_pkg_details_from_name(pkg_to_add.name).arch
pkg_nlocation = pkg_to_add.parent / '..' / 'www' / arch / pkg_to_add.name
2019-04-05 00:45:50 +08:00
sig_nlocation = Path(f'{pkg_nlocation}.sig')
2019-04-03 18:10:00 +08:00
logger.info(f'Copying {pkg_to_add} to {pkg_nlocation}, {sigfile} to {sig_nlocation}')
2019-04-05 00:45:50 +08:00
if overwrite:
for nlocation in (pkg_nlocation, sig_nlocation):
if nlocation.exists():
logger.warning(f'Overwriting {nlocation}')
else:
assert not (pkg_nlocation.exists() or sig_nlocation.exists())
2019-04-03 18:10:00 +08:00
copyfile(pkg_to_add, pkg_nlocation)
copyfile(sigfile, sig_nlocation)
archive_pkg(pkg_to_add)
archive_pkg(sigfile)
if arch == 'any':
for arch in ARCHS:
pkg_nlocation = pkg_to_add.parent / '..' / 'www' / arch / pkg_to_add.name
pkgs_to_add.setdefault(arch, list()).append(pkg_nlocation)
else:
pkgs_to_add.setdefault(arch, list()).append(pkg_nlocation)
else:
logger.warning(f'{pkg_to_add} has no signature!')
throw_away(pkg_to_add)
if 'any' in pkgs_to_add:
_regenerate(target_archs=ARCHS, just_symlink=True)
for arch in pkgs_to_add:
logger.info("repo-add: %s", repo_add(pkgs_to_add[arch]))
# remove add other things
for other in update_path.iterdir():
if other.is_dir():
continue
else:
logger.warning(f"{other} is garbage!")
throw_away(other)
logger.info('finished update')
2019-04-05 00:21:12 +08:00
return True
2019-04-03 20:47:50 +08:00
def _remove(pkgnames, target_archs=[a for a in ARCHS if a != 'any']):
assert type(pkgnames) is list and pkgnames
assert not [None for s in pkgnames if not (type(s) is str)]
logger.info('starting remove %s for %s', pkgnames, target_archs)
if len(target_archs) == 1 and target_archs[0] == 'any':
target_archs = ARCHS
else:
assert 'any' not in target_archs
for arch in target_archs:
remove_pkgs = list()
basedir = Path('www') / arch
for fpath in basedir.iterdir():
if fpath.name.endswith(PKG_SUFFIX) and \
get_pkg_details_from_name(fpath.name).pkgname in pkgnames:
remove_pkgs.append(fpath)
if remove_pkgs:
2019-04-03 20:53:52 +08:00
logger.info("repo-remove: %s", repo_remove(remove_pkgs))
2019-04-03 20:47:50 +08:00
else:
logger.warning(f'Nothing to remove in {arch}')
logger.info('finished remove')
2019-04-05 00:21:12 +08:00
return True
2019-04-03 20:47:50 +08:00
if __name__ == '__main__':
2019-04-05 00:45:50 +08:00
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
2019-04-05 00:21:12 +08:00
import argparse
try:
parser = argparse.ArgumentParser(description='Automatic management tool for an arch repo.')
2019-04-03 20:47:50 +08:00
parser.add_argument('-a', '--arch', nargs='?', default=False, help='arch to regenerate, split by comma, defaults to all')
2019-04-05 00:45:50 +08:00
parser.add_argument('-o', '--overwrite', action='store_true', help='overwrite when updating existing packages')
parser.add_argument('-u', '--update', action='store_true', help='get updates from updates dir, push them to the repo')
parser.add_argument('-r', '--regenerate', action='store_true', help='regenerate the whole package database')
2019-04-03 20:47:50 +08:00
parser.add_argument('-R', '--remove', nargs='?', default=False, help='remove comma split packages from the database')
2019-04-03 18:10:00 +08:00
parser.add_argument('-c', '--clean', action='store_true', help='clean archive, keep 3 recent versions')
args = parser.parse_args()
arch = args.arch
2019-04-03 20:47:50 +08:00
arch = arch.split(',') if arch is not False else None
remove_pkgs = args.remove
remove_pkgs = remove_pkgs.split(',') if remove_pkgs is not False else None
2019-04-03 20:50:08 +08:00
if arch is not None:
assert not [None for a in arch if a not in ARCHS] # ensure arch (= ARCHS
if args.update:
2019-04-05 00:45:50 +08:00
_update(overwrite=args.overwrite)
elif args.regenerate:
2019-04-03 20:47:50 +08:00
if arch:
_regenerate(target_archs=arch)
else:
_regenerate()
2019-04-03 18:10:00 +08:00
elif args.clean:
_clean_archive(keep_new=3)
2019-04-03 20:47:50 +08:00
elif remove_pkgs:
if arch:
_remove(remove_pkgs, target_archs=arch)
else:
_remove(remove_pkgs)
else:
parser.error("Please choose an action")
except Exception as err:
print_exc_plus()
2019-04-05 00:21:12 +08:00
parser.exit(status=1)