On 07/06/13 18:04, Richard Maw wrote:
This removes staging areas and extracted chunks from --tempdir.
Then asks the local artifact cache what artifacts it
has and how old they are, removing all sources older than
--cachedir-artifact-delete-older-than, and may delete other sources that
are younger than --cachedir-artifact-keep-younger-than if it still needs
to make space.
---
morphlib/plugins/gc_plugin.py | 158 +++++++++++++++++++++++++++++++++++++++++
without-test-modules | 1 +
2 files changed, 159 insertions(+)
create mode 100644 morphlib/plugins/gc_plugin.py
diff --git a/morphlib/plugins/gc_plugin.py b/morphlib/plugins/gc_plugin.py
new file mode 100644
index 0000000..cc82cae
--- /dev/null
+++ b/morphlib/plugins/gc_plugin.py
@@ -0,0 +1,158 @@
+# Copyright (C) 2013 Codethink Limited
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; version 2 of the License.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+
+
+import logging
+import os
+import shutil
+import time
+
+import cliapp
+
+import morphlib
+
+
+class GCPlugin(cliapp.Plugin):
+
+ def enable(self):
+ self.app.add_subcommand('gc', self.gc,
+ arg_synopsis='')
+ self.app.settings.integer(['cachedir-artifact-delete-older-than'],
+ 'always delete artifacts older than this '
+ 'period in seconds, (default: 1 week)',
+ metavar='PERIOD',
+ group="Storage Options",
+ default=(60*60*24*7))
+ self.app.settings.integer(['cachedir-artifact-keep-younger-than'],
+ 'allow deletion of artifacts older than '
+ 'this period in seconds, (default: 1 day)',
+ metavar='PERIOD',
+ group="Storage Options",
+ default=(60*60*24))
I wonder if we really need those two settings. Can't we simple order the
cache
artifact list by modification date, and then keep deleting artifacts
until we had
freed enough disk space (the free disk space on filesystem where
cachedir is,
be higher than self.app.settings['cachedir-min-space']).
Or keep just one setting and remove all artifacts older than this setting.
And if we still don’t have not enough space, we keep walk the list of the
remaining artifacts and delete them until we have enough free space.
+
+ def disable(self):
+ pass
+
+ def gc(self, args):
+ '''Make space by removing unused files.
+
+ This removes all artifacts older than
+ --cachedir-artifact-delete-older-than, and may delete artifacts
+ older than --cachedir-artifact-keep-younger-than if it still
+ needs to make space.
+
+ This removes extracted chunks and staging areas for failed builds
+ from the directory specified by --tempdir.
+
+ '''
+
+ tempdir = self.app.settings['tempdir']
+ cachedir = self.app.settings['cachedir']
+ tempdir_min_space, cachedir_min_space = \
+ morphlib.util.unify_space_requirements(
+ tempdir, self.app.settings['tempdir-min-space'],
+ cachedir, self.app.settings['cachedir-min-space'])
+
+ self.cleanup_tempdir(tempdir, tempdir_min_space)
+ self.cleanup_cachedir(cachedir, cachedir_min_space)
+
+ def cleanup_tempdir(self, temp_path, min_space):
+ self.app.status(msg='Cleaning up temp dir %(temp_path)s',
+ temp_path=temp_path, chatty=True)
+ for subdir in ('failed', 'chunks'):
I think we should also clean staging and deployments. It is true
that failed builds are moved to the failed dir, but that is not the
case if morph crashes due an unexpected error. And when we
are hacking on morph this happens occasionally. Deployments
should also be cleaned because currently we don't remove failed
deployments in all configuration extensions.
+ if morphlib.util.get_bytes_free_in_path(temp_path) >=
min_space:
+ self.app.status(msg='Not Removing subdirectory '
+ '%(subdir)s, enough space already cleared',
+ subdir=os.path.join(temp_path, subdir),
+ chatty=True)
+ break
+ self.app.status('Removing temp subdirectory: %(subdir)s',
+ subdir=subdir)
+ path = os.path.join(temp_path, subdir)
+ if os.path.exists(path):
+ shutil.rmtree(path)
+
+ def calculate_delete_range(self):
+ now = time.time()
+ always_delete_age = \
+ now - self.app.settings['cachedir-artifact-delete-older-than']
+ may_delete_age = \
+ now - self.app.settings['cachedir-artifact-keep-younger-than']
+ return always_delete_age, may_delete_age
+
+ def find_deletable_artifacts(self, lac, max_age, min_age):
+ '''Get a list of cache keys in order of how old they
are.'''
+ contents = list(lac.list_contents())
+ always = set(cachekey
+ for cachekey, artifacts, mtime in contents
+ if mtime < max_age)
+ maybe = ((cachekey, mtime)
+ for cachekey, artifacts, mtime in contents
+ if max_age <= mtime < min_age)
+ return always, [cachekey for cachekey, mtime
+ in sorted(maybe, key=lambda x: x[1])]
+
+ def cleanup_cachedir(self, cache_path, min_space):
+ def sufficient_free():
+ free = morphlib.util.get_bytes_free_in_path(cache_path)
+ return (free >= min_space)
+ if sufficient_free():
+ self.app.status(msg='Not cleaning up cachedir, '
+ 'sufficient space already cleared',
+ chatty=True)
+ return
+ lac = morphlib.localartifactcache.LocalArtifactCache(cache_path)
+ max_age, min_age = self.calculate_delete_range()
+ logging.debug('Must remove artifacts older than timestamp %d'
+ % max_age)
+ always_delete, may_delete = \
+ self.find_deletable_artifacts(lac, max_age, min_age)
+ removed = 0
+ source_count = len(always_delete) + len(may_delete)
+ logging.debug('Must remove artifacts %s' % repr(always_delete))
+ logging.debug('Can remove artifacts %s' % repr(may_delete))
+
+ # Remove all old artifacts
+ for cachekey in always_delete:
+ self.app.status(msg='Removing source %(cachekey)s',
+ cachekey=cachekey, chatty=True)
+ lac.remove(cachekey)
+ removed += 1
+
+ # Maybe remove remaining middle-aged artifacts
+ for cachekey in may_delete:
+ if sufficient_free():
+ self.app.status(msg='Finished cleaning up cachedir with '
+ '%(remaining)d old sources remaining',
+ remaining=(source_count - removed),
+ chatty=True)
+ break
+ self.app.status(msg='Removing source %(cachekey)s',
+ cachekey=cachekey, chatty=True)
+ lac.remove(cachekey)
+ removed += 1
+
+ if sufficient_free():
+ self.app.status(msg='Made sufficient space in %(cache_path)s '
+ 'after removing %(removed)d sources',
+ removed=removed, cache_path=cache_path)
+ return
+ self.app.status(msg='Unable to clear enough space in %(cache_path)s '
+ 'after removing %(removed)d sources. Please '
+ 'reduce cachedir-artifact-keep-younger-than, '
+ 'clear space from elsewhere, enlarge the disk '
+ 'or reduce cachedir-min-space.',
+ cache_path=cache_path, removed=removed,
+ error=True)
diff --git a/without-test-modules b/without-test-modules
index 89b0bd8..f143eb4 100644
--- a/without-test-modules
+++ b/without-test-modules
@@ -26,3 +26,4 @@ morphlib/plugins/__init__.py
morphlib/writeexts.py
morphlib/plugins/copy-artifacts_plugin.py
morphlib/plugins/trovectl_plugin.py
+morphlib/plugins/gc_plugin.py