##// END OF EJS Templates
scm: git: performance improvements in fetching revisions (#8857, #9472)...
Toshi MARUYAMA -
r9024:999a4ba30d7b
parent child
Show More
@@ -1,233 +1,265
1 # Redmine - project management software
1 # Redmine - project management software
2 # Copyright (C) 2006-2011 Jean-Philippe Lang
2 # Copyright (C) 2006-2011 Jean-Philippe Lang
3 # Copyright (C) 2007 Patrick Aljord patcito@ŋmail.com
3 # Copyright (C) 2007 Patrick Aljord patcito@ŋmail.com
4 #
4 #
5 # This program is free software; you can redistribute it and/or
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
8 # of the License, or (at your option) any later version.
9 #
9 #
10 # This program is distributed in the hope that it will be useful,
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
13 # GNU General Public License for more details.
14 #
14 #
15 # You should have received a copy of the GNU General Public License
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
17 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18
18
19 require 'redmine/scm/adapters/git_adapter'
19 require 'redmine/scm/adapters/git_adapter'
20
20
21 class Repository::Git < Repository
21 class Repository::Git < Repository
22 attr_protected :root_url
22 attr_protected :root_url
23 validates_presence_of :url
23 validates_presence_of :url
24
24
25 def self.human_attribute_name(attribute_key_name, *args)
25 def self.human_attribute_name(attribute_key_name, *args)
26 attr_name = attribute_key_name.to_s
26 attr_name = attribute_key_name.to_s
27 if attr_name == "url"
27 if attr_name == "url"
28 attr_name = "path_to_repository"
28 attr_name = "path_to_repository"
29 end
29 end
30 super(attr_name, *args)
30 super(attr_name, *args)
31 end
31 end
32
32
33 def self.scm_adapter_class
33 def self.scm_adapter_class
34 Redmine::Scm::Adapters::GitAdapter
34 Redmine::Scm::Adapters::GitAdapter
35 end
35 end
36
36
37 def self.scm_name
37 def self.scm_name
38 'Git'
38 'Git'
39 end
39 end
40
40
41 def report_last_commit
41 def report_last_commit
42 extra_report_last_commit
42 extra_report_last_commit
43 end
43 end
44
44
45 def extra_report_last_commit
45 def extra_report_last_commit
46 return false if extra_info.nil?
46 return false if extra_info.nil?
47 v = extra_info["extra_report_last_commit"]
47 v = extra_info["extra_report_last_commit"]
48 return false if v.nil?
48 return false if v.nil?
49 v.to_s != '0'
49 v.to_s != '0'
50 end
50 end
51
51
52 def supports_directory_revisions?
52 def supports_directory_revisions?
53 true
53 true
54 end
54 end
55
55
56 def supports_revision_graph?
56 def supports_revision_graph?
57 true
57 true
58 end
58 end
59
59
60 def repo_log_encoding
60 def repo_log_encoding
61 'UTF-8'
61 'UTF-8'
62 end
62 end
63
63
64 # Returns the identifier for the given git changeset
64 # Returns the identifier for the given git changeset
65 def self.changeset_identifier(changeset)
65 def self.changeset_identifier(changeset)
66 changeset.scmid
66 changeset.scmid
67 end
67 end
68
68
69 # Returns the readable identifier for the given git changeset
69 # Returns the readable identifier for the given git changeset
70 def self.format_changeset_identifier(changeset)
70 def self.format_changeset_identifier(changeset)
71 changeset.revision[0, 8]
71 changeset.revision[0, 8]
72 end
72 end
73
73
74 def branches
74 def branches
75 scm.branches
75 scm.branches
76 end
76 end
77
77
78 def tags
78 def tags
79 scm.tags
79 scm.tags
80 end
80 end
81
81
82 def default_branch
82 def default_branch
83 scm.default_branch
83 scm.default_branch
84 rescue Exception => e
84 rescue Exception => e
85 logger.error "git: error during get default branch: #{e.message}"
85 logger.error "git: error during get default branch: #{e.message}"
86 nil
86 nil
87 end
87 end
88
88
89 def find_changeset_by_name(name)
89 def find_changeset_by_name(name)
90 return nil if name.nil? || name.empty?
90 return nil if name.nil? || name.empty?
91 e = changesets.find(:first, :conditions => ['revision = ?', name.to_s])
91 e = changesets.find(:first, :conditions => ['revision = ?', name.to_s])
92 return e if e
92 return e if e
93 changesets.find(:first, :conditions => ['scmid LIKE ?', "#{name}%"])
93 changesets.find(:first, :conditions => ['scmid LIKE ?', "#{name}%"])
94 end
94 end
95
95
96 def entries(path=nil, identifier=nil)
96 def entries(path=nil, identifier=nil)
97 scm.entries(path,
97 scm.entries(path,
98 identifier,
98 identifier,
99 options = {:report_last_commit => extra_report_last_commit})
99 options = {:report_last_commit => extra_report_last_commit})
100 end
100 end
101
101
102 # With SCMs that have a sequential commit numbering,
102 # With SCMs that have a sequential commit numbering,
103 # such as Subversion and Mercurial,
103 # such as Subversion and Mercurial,
104 # Redmine is able to be clever and only fetch changesets
104 # Redmine is able to be clever and only fetch changesets
105 # going forward from the most recent one it knows about.
105 # going forward from the most recent one it knows about.
106 #
106 #
107 # However, Git does not have a sequential commit numbering.
107 # However, Git does not have a sequential commit numbering.
108 #
108 #
109 # In order to fetch only new adding revisions,
109 # In order to fetch only new adding revisions,
110 # Redmine needs to parse revisions per branch.
110 # Redmine needs to parse revisions per branch.
111 # Branch "last_scmid" is for this requirement.
111 # Branch "last_scmid" is for this requirement.
112 #
112 #
113 # In Git and Mercurial, revisions are not in date order.
113 # In Git and Mercurial, revisions are not in date order.
114 # Redmine Mercurial fixed issues.
114 # Redmine Mercurial fixed issues.
115 # * Redmine Takes Too Long On Large Mercurial Repository
115 # * Redmine Takes Too Long On Large Mercurial Repository
116 # http://www.redmine.org/issues/3449
116 # http://www.redmine.org/issues/3449
117 # * Sorting for changesets might go wrong on Mercurial repos
117 # * Sorting for changesets might go wrong on Mercurial repos
118 # http://www.redmine.org/issues/3567
118 # http://www.redmine.org/issues/3567
119 #
119 #
120 # Database revision column is text, so Redmine can not sort by revision.
120 # Database revision column is text, so Redmine can not sort by revision.
121 # Mercurial has revision number, and revision number guarantees revision order.
121 # Mercurial has revision number, and revision number guarantees revision order.
122 # Redmine Mercurial model stored revisions ordered by database id to database.
122 # Redmine Mercurial model stored revisions ordered by database id to database.
123 # So, Redmine Mercurial model can use correct ordering revisions.
123 # So, Redmine Mercurial model can use correct ordering revisions.
124 #
124 #
125 # Redmine Mercurial adapter uses "hg log -r 0:tip --limit 10"
125 # Redmine Mercurial adapter uses "hg log -r 0:tip --limit 10"
126 # to get limited revisions from old to new.
126 # to get limited revisions from old to new.
127 # But, Git 1.7.3.4 does not support --reverse with -n or --skip.
127 # But, Git 1.7.3.4 does not support --reverse with -n or --skip.
128 #
128 #
129 # The repository can still be fully reloaded by calling #clear_changesets
129 # The repository can still be fully reloaded by calling #clear_changesets
130 # before fetching changesets (eg. for offline resync)
130 # before fetching changesets (eg. for offline resync)
131 def fetch_changesets
131 def fetch_changesets
132 scm_brs = branches
132 scm_brs = branches
133 return if scm_brs.nil? || scm_brs.empty?
133 return if scm_brs.nil? || scm_brs.empty?
134 h1 = extra_info || {}
134 h1 = extra_info || {}
135 h = h1.dup
135 h = h1.dup
136 h["branches"] ||= {}
136 h["branches"] ||= {}
137 h["db_consistent"] ||= {}
137 h["db_consistent"] ||= {}
138 if changesets.count == 0
138 if changesets.count == 0
139 h["db_consistent"]["ordering"] = 1
139 h["db_consistent"]["ordering"] = 1
140 merge_extra_info(h)
140 merge_extra_info(h)
141 self.save
141 self.save
142 elsif ! h["db_consistent"].has_key?("ordering")
142 elsif ! h["db_consistent"].has_key?("ordering")
143 h["db_consistent"]["ordering"] = 0
143 h["db_consistent"]["ordering"] = 0
144 merge_extra_info(h)
144 merge_extra_info(h)
145 self.save
145 self.save
146 end
146 end
147 save_revisions(h, scm_brs)
147 save_revisions(h, scm_brs)
148 end
148 end
149
149
150 def save_revisions(h, scm_brs)
150 def save_revisions(h, scm_brs)
151 # Remember what revisions we already processed (in any branches)
152 all_revisions = []
151 scm_brs.each do |br1|
153 scm_brs.each do |br1|
152 br = br1.to_s
154 br = br1.to_s
155 last_revision = nil
153 from_scmid = nil
156 from_scmid = nil
154 from_scmid = h["branches"][br]["last_scmid"] if h["branches"][br]
157 from_scmid = h["branches"][br]["last_scmid"] if h["branches"][br]
155 h["branches"][br] ||= {}
158 h["branches"][br] ||= {}
156 begin
159
157 cnt = 0
160 revisions = scm.revisions('', from_scmid, br, {:reverse => true})
158 last_rev_scmid = nil
161 next if revisions.blank?
159 scm.revisions('', from_scmid, br, {:reverse => true}) do |rev|
162
160 cnt += 1
163 # Remember the last commit id here, before we start removing revisions from the array.
161 db_rev = find_changeset_by_name(rev.revision)
164 # We'll do that for optimization, but it also means, that we may lose even all revisions.
162 if db_rev.nil?
165 last_revision = revisions.last
163 transaction do
166
164 db_saved_rev = save_revision(rev)
167 # remove revisions that we have already processed (possibly in other branches)
165 parents = {}
168 revisions.reject!{|r| all_revisions.include?(r.scmid)}
166 parents[db_saved_rev] = rev.parents unless rev.parents.nil?
169 # add revisions that we are to parse now to 'all processed revisions'
167 parents.each do |ch, chparents|
170 # (this equals to a union, because we executed diff above)
168 ch.parents = chparents.collect{|rp| find_changeset_by_name(rp)}.compact
171 all_revisions += revisions.map{|r| r.scmid}
169 end
172
170 end
173 # Make the search for existing revisions in the database in a more sufficient manner
171 end
174 # This is replacing the one-after-one queries.
172 last_rev_scmid = rev.scmid
175 # Find all revisions, that are in the database, and then remove them from the revision array.
173 if cnt > 100
176 # Then later we won't need any conditions for db existence.
174 cnt = 0
177 # Query for several revisions at once, and remove them from the revisions array, if they are there.
175 h["branches"][br]["last_scmid"] = last_rev_scmid
178 # Do this in chunks, to avoid eventual memory problems (in case of tens of thousands of commits).
176 merge_extra_info(h)
179 # If there are no revisions (because the original code's algoritm filtered them),
177 self.save
180 # then this part will be stepped over.
181 # We make queries, just if there is any revision.
182 limit = 100
183 offset = 0
184 revisions_copy = revisions.clone # revisions will change
185 while offset < revisions_copy.size
186 recent_changesets_slice = changesets.find(
187 :all,
188 :conditions => [
189 'scmid IN (?)',
190 revisions_copy.slice(offset, limit).map{|x| x.scmid}
191 ]
192 )
193 # Subtract revisions that redmine already knows about
194 recent_revisions = recent_changesets_slice.map{|c| c.scmid}
195 revisions.reject!{|r| recent_revisions.include?(r.scmid)}
196 offset += limit
197 end
198
199 revisions.each do |rev|
200 transaction do
201 # There is no search in the db for this revision, because above we ensured,
202 # that it's not in the db.
203 db_saved_rev = save_revision(rev)
204 parents = {}
205 parents[db_saved_rev] = rev.parents unless rev.parents.nil?
206 parents.each do |ch, chparents|
207 ch.parents = chparents.collect{|rp| find_changeset_by_name(rp)}.compact
178 end
208 end
209 # saving the last scmid was moved from here, because we won't come in here,
210 # if the revision was already added for another branch
179 end
211 end
180 unless last_rev_scmid.nil?
212 end
181 h["branches"][br]["last_scmid"] = last_rev_scmid
213
182 merge_extra_info(h)
214 # save the data about the last revision for this branch
183 self.save
215 unless last_revision.nil?
184 end
216 h["branches"][br]["last_scmid"] = last_revision.scmid
185 rescue Redmine::Scm::Adapters::CommandFailed => e
217 merge_extra_info(h)
186 logger.error("save revisions error: #{e.message}")
218 self.save
187 end
219 end
188 end
220 end
189 end
221 end
190 private :save_revisions
222 private :save_revisions
191
223
192 def save_revision(rev)
224 def save_revision(rev)
193 changeset = Changeset.new(
225 changeset = Changeset.new(
194 :repository => self,
226 :repository => self,
195 :revision => rev.identifier,
227 :revision => rev.identifier,
196 :scmid => rev.scmid,
228 :scmid => rev.scmid,
197 :committer => rev.author,
229 :committer => rev.author,
198 :committed_on => rev.time,
230 :committed_on => rev.time,
199 :comments => rev.message
231 :comments => rev.message
200 )
232 )
201 if changeset.save
233 if changeset.save
202 rev.paths.each do |file|
234 rev.paths.each do |file|
203 Change.create(
235 Change.create(
204 :changeset => changeset,
236 :changeset => changeset,
205 :action => file[:action],
237 :action => file[:action],
206 :path => file[:path])
238 :path => file[:path])
207 end
239 end
208 end
240 end
209 changeset
241 changeset
210 end
242 end
211 private :save_revision
243 private :save_revision
212
244
213 def heads_from_branches_hash
245 def heads_from_branches_hash
214 h1 = extra_info || {}
246 h1 = extra_info || {}
215 h = h1.dup
247 h = h1.dup
216 h["branches"] ||= {}
248 h["branches"] ||= {}
217 h['branches'].map{|br, hs| hs['last_scmid']}
249 h['branches'].map{|br, hs| hs['last_scmid']}
218 end
250 end
219
251
220 def latest_changesets(path,rev,limit=10)
252 def latest_changesets(path,rev,limit=10)
221 revisions = scm.revisions(path, nil, rev, :limit => limit, :all => false)
253 revisions = scm.revisions(path, nil, rev, :limit => limit, :all => false)
222 return [] if revisions.nil? || revisions.empty?
254 return [] if revisions.nil? || revisions.empty?
223
255
224 changesets.find(
256 changesets.find(
225 :all,
257 :all,
226 :conditions => [
258 :conditions => [
227 "scmid IN (?)",
259 "scmid IN (?)",
228 revisions.map!{|c| c.scmid}
260 revisions.map!{|c| c.scmid}
229 ],
261 ],
230 :order => 'committed_on DESC'
262 :order => 'committed_on DESC'
231 )
263 )
232 end
264 end
233 end
265 end
General Comments 0
You need to be logged in to leave comments. Login now