##// END OF EJS Templates
scm: git: performance improvements in fetching revisions (#8857, #9472)...
scm: git: performance improvements in fetching revisions (#8857, #9472) Parse a revision for a given branch, just if we haven't parsed it for any branches before. Moved the db check to for existing revisions into a grouped search. Search for many revisions at once: this reduces db load. Revisions are grouped into sets of 100. This is to improve memory consumption. There will be just one query instead of each 100. The above two methods significantly increase parsing speed. Test case was a git repo with 6000+ commits on a master branch, and several other branches originating for master. Speed improved from 1.4h to 18min. Contributed by Gergely Fábián. git-svn-id: svn+ssh://rubyforge.org/var/svn/redmine/trunk@9144 e93f8b46-1217-0410-a6f0-8f06a7374b81

File last commit:

r8856:55a8087f674b
r9024:999a4ba30d7b
Show More
mercurial.rb
159 lines | 5.5 KiB | text/x-ruby | RubyLexer
# Redmine - project management software
# Copyright (C) 2006-2011 Jean-Philippe Lang
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
require 'redmine/scm/adapters/mercurial_adapter'
class Repository::Mercurial < Repository
# sort changesets by revision number
has_many :changesets,
:order => "#{Changeset.table_name}.id DESC",
:foreign_key => 'repository_id'
attr_protected :root_url
validates_presence_of :url
# number of changesets to fetch at once
FETCH_AT_ONCE = 100
def self.human_attribute_name(attribute_key_name, *args)
attr_name = attribute_key_name.to_s
if attr_name == "url"
attr_name = "path_to_repository"
end
super(attr_name, *args)
end
def self.scm_adapter_class
Redmine::Scm::Adapters::MercurialAdapter
end
def self.scm_name
'Mercurial'
end
def supports_directory_revisions?
true
end
def supports_revision_graph?
true
end
def repo_log_encoding
'UTF-8'
end
# Returns the readable identifier for the given mercurial changeset
def self.format_changeset_identifier(changeset)
"#{changeset.revision}:#{changeset.scmid}"
end
# Returns the identifier for the given Mercurial changeset
def self.changeset_identifier(changeset)
changeset.scmid
end
def diff_format_revisions(cs, cs_to, sep=':')
super(cs, cs_to, ' ')
end
# Finds and returns a revision with a number or the beginning of a hash
def find_changeset_by_name(name)
return nil if name.blank?
s = name.to_s
if /[^\d]/ =~ s or s.size > 8
e = changesets.find(:first, :conditions => ['scmid = ?', s])
else
e = changesets.find(:first, :conditions => ['revision = ?', s])
end
return e if e
changesets.find(:first, :conditions => ['scmid LIKE ?', "#{s}%"]) # last ditch
end
# Returns the latest changesets for +path+; sorted by revision number
#
# Because :order => 'id DESC' is defined at 'has_many',
# there is no need to set 'order'.
# But, MySQL test fails.
# Sqlite3 and PostgreSQL pass.
# Is this MySQL bug?
def latest_changesets(path, rev, limit=10)
changesets.find(:all,
:include => :user,
:conditions => latest_changesets_cond(path, rev, limit),
:limit => limit,
:order => "#{Changeset.table_name}.id DESC")
end
def latest_changesets_cond(path, rev, limit)
cond, args = [], []
if scm.branchmap.member? rev
# Mercurial named branch is *stable* in each revision.
# So, named branch can be stored in database.
# Mercurial provides *bookmark* which is equivalent with git branch.
# But, bookmark is not implemented.
cond << "#{Changeset.table_name}.scmid IN (?)"
# Revisions in root directory and sub directory are not equal.
# So, in order to get correct limit, we need to get all revisions.
# But, it is very heavy.
# Mercurial does not treat direcotry.
# So, "hg log DIR" is very heavy.
branch_limit = path.blank? ? limit : ( limit * 5 )
args << scm.nodes_in_branch(rev, :limit => branch_limit)
elsif last = rev ? find_changeset_by_name(scm.tagmap[rev] || rev) : nil
cond << "#{Changeset.table_name}.id <= ?"
args << last.id
end
unless path.blank?
cond << "EXISTS (SELECT * FROM #{Change.table_name}
WHERE #{Change.table_name}.changeset_id = #{Changeset.table_name}.id
AND (#{Change.table_name}.path = ?
OR #{Change.table_name}.path LIKE ? ESCAPE ?))"
args << path.with_leading_slash
args << "#{path.with_leading_slash.gsub(%r{[%_\\]}) { |s| "\\#{s}" }}/%" << '\\'
end
[cond.join(' AND '), *args] unless cond.empty?
end
private :latest_changesets_cond
def fetch_changesets
return if scm.info.nil?
scm_rev = scm.info.lastrev.revision.to_i
db_rev = latest_changeset ? latest_changeset.revision.to_i : -1
return unless db_rev < scm_rev # already up-to-date
logger.debug "Fetching changesets for repository #{url}" if logger
(db_rev + 1).step(scm_rev, FETCH_AT_ONCE) do |i|
transaction do
scm.each_revision('', i, [i + FETCH_AT_ONCE - 1, scm_rev].min) do |re|
cs = Changeset.create(:repository => self,
:revision => re.revision,
:scmid => re.scmid,
:committer => re.author,
:committed_on => re.time,
:comments => re.message)
re.paths.each { |e| cs.create_change(e) }
parents = {}
parents[cs] = re.parents unless re.parents.nil?
parents.each do |ch, chparents|
ch.parents = chparents.collect{|rp| find_changeset_by_name(rp)}.compact
end
end
end
end
end
end