From c0120745617cbd50a920338cb83e751b9a26963e Mon Sep 17 00:00:00 2001 From: Keith Laban Date: Wed, 25 Jan 2017 12:08:22 -0500 Subject: [PATCH] LUCENE-7671 - Enhance UpgradeIndexMergePolicy with additional options --- .../index/TestBackwardsCompatibility.java | 66 +++++- .../index/index.6.0.0.singlesegment-cfs.zip | Bin 0 -> 4081 bytes .../index/index.6.0.0.singlesegment-nocfs.zip | Bin 0 -> 6457 bytes .../apache/lucene/index/IndexUpgrader.java | 12 +- .../lucene/index/UpgradeIndexMergePolicy.java | 189 ++++++++++++++---- .../index/TestUpgradeIndexMergePolicy.java | 1 - 6 files changed, 216 insertions(+), 52 deletions(-) create mode 100644 lucene/backward-codecs/src/test/org/apache/lucene/index/index.6.0.0.singlesegment-cfs.zip create mode 100644 lucene/backward-codecs/src/test/org/apache/lucene/index/index.6.0.0.singlesegment-nocfs.zip diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java index 73b7271d78e7..cabc648b2354 100644 --- a/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java +++ b/lucene/backward-codecs/src/test/org/apache/lucene/index/TestBackwardsCompatibility.java @@ -429,6 +429,8 @@ public void testCreateIndexWithDocValuesUpdates() throws Exception { // TODO: on 6.0.0 release, gen the single segment indices and add here: final static String[] oldSingleSegmentNames = { + "6.0.0.singlesegment-cfs", + "6.0.0.singlesegment-nocfs" }; static Map oldIndexDirs; @@ -456,8 +458,9 @@ public static void beforeClass() throws Exception { oldIndexDirs = new HashMap<>(); for (String name : names) { Path dir = createTempDir(name); - InputStream resource = TestBackwardsCompatibility.class.getResourceAsStream("index." + name + ".zip"); - assertNotNull("Index name " + name + " not found", resource); + String nameOnDisk = "index." + name + ".zip"; + InputStream resource = TestBackwardsCompatibility.class.getResourceAsStream(nameOnDisk); + assertNotNull("Index name " + nameOnDisk + " not found", resource); TestUtil.unzip(resource, dir); oldIndexDirs.put(name, newFSDirectory(dir)); } @@ -1281,14 +1284,18 @@ public void testNumericFields() throws Exception { } private int checkAllSegmentsUpgraded(Directory dir) throws IOException { + return checkAllSegmentsMatchVersion(dir, Version.LATEST); + } + + private int checkAllSegmentsMatchVersion(Directory dir, Version version) throws IOException { final SegmentInfos infos = SegmentInfos.readLatestCommit(dir); if (VERBOSE) { - System.out.println("checkAllSegmentsUpgraded: " + infos); + System.out.println("checkAllSegmentsMatchVersion: " + version + "-" + infos); } for (SegmentCommitInfo si : infos) { - assertEquals(Version.LATEST, si.info.getVersion()); + assertEquals(version, si.info.getVersion()); } - assertEquals(Version.LATEST, infos.getCommitLuceneVersion()); + assertEquals(version, infos.getCommitLuceneVersion()); return infos.size(); } @@ -1306,14 +1313,59 @@ public void testUpgradeOldIndex() throws Exception { System.out.println("testUpgradeOldIndex: index=" +name); } Directory dir = newDirectory(oldIndexDirs.get(name)); + + int numSegmentsBefore = SegmentInfos.readLatestCommit(dir).size(); + + newIndexUpgrader(dir).upgrade(Integer.MAX_VALUE); - newIndexUpgrader(dir).upgrade(); + assertEquals(numSegmentsBefore, checkAllSegmentsUpgraded(dir)); + + dir.close(); + } + } + + public void testUpgradeWithExcplicitUpgrades() throws Exception { + List names = new ArrayList<>(oldNames.length + oldSingleSegmentNames.length); + names.addAll(Arrays.asList(oldNames)); + names.addAll(Arrays.asList(oldSingleSegmentNames)); + for(String name : names) { + if (VERBOSE) { + System.out.println("testUpgradeWithExcplicitUpgrades: index=" +name); + } + Directory dir = newDirectory(oldIndexDirs.get(name)); + + SegmentInfos infosBefore = SegmentInfos.readLatestCommit(dir); + int numSegmentsBefore = infosBefore.size(); + Version versionBefore = infosBefore.getCommitLuceneVersion(); + + assertFalse("Excpected these segments to be an old version", versionBefore.equals(Version.LATEST)); + + UpgradeIndexMergePolicy uimp = new UpgradeIndexMergePolicy(NoMergePolicy.INSTANCE); + + uimp.setRequireExplicitUpgrades(true); + uimp.setIgnoreNewSegments(true); + + assertEquals(numSegmentsBefore, checkAllSegmentsMatchVersion(dir, versionBefore)); + + try (IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(uimp))) { + w.forceMerge(numSegmentsBefore); // Don't optimize just upgrade + } + + assertEquals(numSegmentsBefore, checkAllSegmentsMatchVersion(dir, versionBefore)); - checkAllSegmentsUpgraded(dir); + uimp.setUpgradeInProgress(true); // Turn on upgrades + try (IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(new MockAnalyzer(random())).setMergePolicy(uimp))) { + w.forceMerge(numSegmentsBefore); // Don't optimize just upgrade + } + + assertEquals(numSegmentsBefore, checkAllSegmentsUpgraded(dir)); + dir.close(); } } + + // Write a test that checks that the underlying policy gets delegated to?? public void testCommandLineArgs() throws Exception { diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/index/index.6.0.0.singlesegment-cfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/index.6.0.0.singlesegment-cfs.zip new file mode 100644 index 0000000000000000000000000000000000000000..e3eafae2bffaed8553a0f2fc615564587de2ab9d GIT binary patch literal 4081 zcma)C#HZL1=5 z5z^0Z0bS;p<64AVEV8rAM>&iF$XA~~@`1eF$X;(Ub{lI#?V}PN-jHsVM3Rv1mxpm? zxH#||2U8_4&W;K&DohE_Yf9HhQh?qd=qqj>wyq(1$tj)+%8&WCgUQNk^F~S2+D9#@ zaa@c~_%RSu3co4^{7F7-y_>4F4EbPn{&&y%?_Tu>V|f3>>#k6+wTl4m*!(Vzm8l3S zG&IiQqin5Rq9m{^T5kjtE|;RwY-tee;Sw(+X}dyWqt5JCxPGNR_9Gb&+*NZLSg{Tt zBLSHI63(lB=8gth069Orxv=FC`}J0p%{^@C7drniq*shw&We?x4W%fO8aj-jh0r{(|GKn z)p%K~PFW_t`pBRe$Y|oa_80xb4qC7e`DdXSYT+9mTp)r;=1h-5F4{1_V&ccaUO|YP^R<^4p|U ziR8KoRx+-eGM=R&D@_ZMS4~bMjoQry2*sRnO5E&C3xP(W9viih4o}?`MMh#mK`*dg z6cJZD7v@DR1bG+Myn?`1?)N*&-)uy$75!}~&Jn75R#<$D68 zr&+V!KaOA`&SZb6tt|#b!i*8~gYVa`C~#-UlTq6mldG6TJ2C~$`Od9sAN7Tfg-`2Y zoi{^Du%%4{d&fn0l0xdJm{uaSonKXo z-3YMMfJYS)p0h!o<{G@w7u4s>38hFIq;k(nu!jf|>C&=D+^OTY>;$`LheoKLs3tTC z2*T+Iay9C{66pcL&)`#9?xN|h4YoiQms^T;J(OLuYJw$=g22f6G|mZo@rn8I+LOEu zmC2a%m5ojXLd$>AI3-xO=S#q@8HB0Edtex(lkLn>O%N=VNzA`1& z_y#-KS!s8PK>w)E1Oi=|Qk98W${V&ivkYxHoxTNzWHO`B(i;>lzV)>2*%$%gd8NK9MgtoBNG!+r& z@8-T3+f9*%fTAIDqq$ZiPcVd`p7HN56$F?Dg&O&iCk&GuZ$nKrC=fmdfBP*7wj{En<;--R=5{uS#!k)P45A^m8wzrjF~NKN1eI0o>h zQ&qISC)57YI=xCN`Q6RfWA;i>)SW>W2e;UVuPsx3DpC*5%0NZ~yN=Ih|Hn1;<0O(_ zufq;*2AA$OZ3fc|!PAG}OAwoy2soiO(z<2_yQgONZEGLz<>d20+4)ku@;113vYoUk zc%BVr2L7jQ2%?Hhd{Z&`_QM+Mz63Fw<6spNfh|bs8Gj#sH|XB#JuUQy{OC%e!pzeN z{E=dabM=nnkbj-_C+nF)A$X%u<&vYm^gC>aQ>kbm6KTk ztvIcBo4&(Gi)nS+4B0i28@s!NrG}7HAVXmNwQLRTio)Xi6m0?52gbf_>P#}X>#zRc z7ni)?|T7m$ll(_<@YEwIdu>wSR4C97YcRk}l z6^Z=XBe$>F^EDs4;`uI_Q@k_Ri4hPbXsOBPblnh=*OGf0Pv=lUKT|<=eYo3$J1qB9 zUrCaxR9PD|m3AookbhE%PVdrUZoICi4ohAx-!*e~jGjV0$2AK+j6QumvqFUzAsO4S;BobLRNva8-8CgUyj`B@~G|*tzdKbR^?=Y(a;Ozi@+Qr^Pve^t2g)f z(@tLciTmJ*W~_`mMKPOaVS>@ny2ZY~jb>LX#AE-O8e*3WCHcYdIp)mdvJr;Ufin&J=rkp=WqZ&J1 z5$f&uz#Xz~iuhongez|+java77!Xz%m1Vu3u^;Ji4qjVHrgDKJu%exNdX1B61&4QP zkB>^6>LRy4M>RCdg}OY3CvZ(hzY4(z$RN39uZ-P${%TL}z_g2-s6%z9aN#Y9{w@7N zi7o18%u5?^7R`(LuQipgc5I6kE*6r%{?br#>X%-je#iS7!`%B@@d&b}jCgpT9_4f@ zCV_#HopQRXuU7~7$SqhTsgc{Mf5CxL4H(6}e|63KacZ9nN z7V6I&+vQ5?;+n+@E@{Lh_rZasw{i|tzJroDxndAnilv{FxMDK;;$(a>;Q_zhdZwMS zvk{z}6_QdoVN8iTe-!Ogy{KbJFhJ z&q?^#%#7A7Q7=@lOa?B^Y@bu5No$y~k5-eh9{zX{*Pe}ib9(;Ob?Nxs;d!IxHZ4($ zLZUtK-s3``#iRRYhY|OxY9#%S1Z%`^-3mpiouzuDhQ5|AA)fEX{d&=@ILl5_yfA>Q zi$?K>1%&$k(>?ch3E29N1TdkCN{fr^7E#E8vEJU^$k@#X1#JJ?E_^dWP8=9J7FSwa#(Px6ebBVaX6u z;*=rEzyryB;G&7~@#V@685&a|D>YWDIbkxP7ndG4y0J8QMNvjkzU*kYvEJ|^ojtau z!+erMj_1iL=cWjUi1%X~`@+FuR9ZYvQXD5SJbHg~HDS8G*wg2wE!x!~A3Z{=n8j<{ zDFQ|=b6zb9Cc!28-BOarlYy*EbFK}Fq<2vKmICIe49s#R!055f}@6g8<;SK z>;~-KvLpREtUfM&4;;PxObuHcV=V&R(_I2%4aJdgSg=7%aC~FqYm%&CX}5&q@zwzF zU+kiyq>;qtLcP*18tp~sp-}GtXkb{V>(&1+`8O7SByKr+5%-p@XWtB=3iOAirAl7 zd>0k_JL^Kr{yWyMwf<)oCGdCF&&vNVQNNtvpHb2DfM3q@pLl;4{GWMj41iyb;qQ19 Tlt06zxOkH;g0a8&3jp{xE%hS} literal 0 HcmV?d00001 diff --git a/lucene/backward-codecs/src/test/org/apache/lucene/index/index.6.0.0.singlesegment-nocfs.zip b/lucene/backward-codecs/src/test/org/apache/lucene/index/index.6.0.0.singlesegment-nocfs.zip new file mode 100644 index 0000000000000000000000000000000000000000..8c551d2f359f680fe9bb0e1f86d53ffe96f2967c GIT binary patch literal 6457 zcmbW5dpuMBAICTM%iL3Q%YANPYI4gp%Kd)NW!SX2B_z5q#tad&GII&tT3K{fJ8yw2o--+gp(sXi1BsZN!OKiD2fvVW;%@*Q)S{LmCIK*7D%g%IZ*Sc z*#QB(9wcuv^~LENYL@-MN_)g?6UyOih&7KSRl^WqcNx-G1~!LJ`O^eSV(x11H?+{B-yXFUkl)iN0)>D{oE3uX6|+1NDw?4ol>@(HJqOZ~t=HI-TlTC7O{-rySzDz0GUBPO*RX>2!I#i*Bg`-un zO6$xyh$p0_1249k{U38ef`4bu)l>R5Ee4yKt|ay+4F5o1aazrnC&^Qvq&ndDs=80wsKvB|v%p3A2zJ)Iy zi#b?IFnp5OQ}{M{(;cj^bQJ!mm>9mG_*-8fm_@Ra+@L$)I2QRVZ1{3 zFT6NH2?9mL@d_G}my`qcElZ`t!~@-*Z!tKV&>M!3pj5?LZa*t4YUA^Q@78T?PrjEm zCp;SZWvktr)r4xi$41X{3gsQW+;?j#%mQ`ro$yQ-med;8l`d|TwTs)+`4)D-#lYo( z;_&Ov0{M}VK7R2-9-M8$l4f~!>dtvdy62aJhhLH8p_1()lO2LLyPWdz-7NI{9aCO=o+P_f4&Dt&On+rfl)Zi8*UKy0(D+}T@Wp#Ek9a`=ajXN{cRAfKK3=;V{_d}lz(|Ni)M*W ze5E2~blYZVIayEz#=RvASG~*#{=O2}NGWR884d6{n{saCphAGaR$%~vn158QaP03q zfzIqQXH?rn*q>lMW0L9|iZ$fbNXsE%@182YR_LnqFtJvs*X(J!B}~>V>G6}$v$GO^ zc-}ZT#<|F$htJi{_ag615Dn8?{+>53dVJ~GQr9gD#(-wn3|zS6Lm^Gws_*8h&Yr$k zL(OxolQVB$yj1VR5yYD(ypzj zi>~dHRRIhl2CQy%W~XTeWhgL8a9>5u+q<4LgJOsTE_#ni(&M?2vYrcr=7W}D>=mIm z?=-3Cplbw=roy z$Cjvl*OJXQBY2dwS`8)G1~h9JuZ>FV|D4qxhf6>~98H(OQ1HtISPSKP2Qp6Jt%|~F znw(QI^`Yzh8pqY>{dGAEutjE|N4yk6 zr9OjQD18svEgTTi9cWGXct{|!Aip%SEBx8H=d$D5p_Dl5U?dXBpr(lnV&-Ms20|Gy z;UGj1;|@n$oRDHuGei)BByiwr@XRZ>Gr)XE;1UkUGi%Cj&DE(hur*C>^Mk0!#bCqb ztiSfEOyn(|&-c)rv%p_!Qkvm!N%p>B5--p=?^npcusklq3sK2qiU;%CF79eElYn|? zR+?6CM}*>D2tqG%6=KjxlQE5o;yEqX2a5*EC0|*b5qgYD!s7C{&i%ko;lNML+`A|6 zqJ)-Lz%~9-;@rTYntHc}dN(nZ*YaZSRgz4^s6=is^FU<^!@3WX{>qeinF`=OYj(Jx zs0tj8n>&%hk>pVBjK|m_!z-ghHEl4?5f&_bODXZ@nOQRSyEHw7-M49-Fz#(?lQWcj zl;aVU5^P{6MGmiwQ|B$pOXc#>i+O2#%2=LtiLGy;AGkj{zl|DJI6Oz-GSV`YZ7yU| z66cEU-8~eSc6ql0nk2XB5C+ja5EPWPi&|mMfW{$qnh_y8&1xY8nfuJ9*Q6}s7m%kC zatI2^C?4uK@2Aq1STv#S+(`!wOB?GB1wb31y~186)m`R%Z!b=B&7e>yd1qWmR2NdF zJDKX1m+MV-FK{DCHcNboQ&8TaE+?z1rlGEHqfx1;wWIG;Tw`8bzBdO^NeH#X1$AAL z+x~Sgb0QdtM-Nn7n}+Yha@X$jKm_wu_M=VXCme2+oa z6~rqqF6R;eLnfF(AimXt=xuq(4;_M5hkL_SPzU^eR~%RC_0(T>Wq5m#a6ynu*=OX=!T$Z$A6({9TSB*);7uG2gxr*E>(yGCgf3%h|0t$6t74Tbtd z|DPM$l;PU(FB`)Du*Pu9n+C6S?u{H4-I@nj;`>YYh8_t2KdoUJR|6Rm>XxnuU3@q% zR<=A#eD7YJ&Urz52$u;*!XQvKMpz4u&B0YjXbN94hLtkPCWiV*LR_tKgtVYqu2uw| zj%~Di+f1}jNR*^hG22}k4FTybFEh+C63uX7?Zvc~ViPTymSN*@Mk#wZ-x3loXv~U$ zHR2Smrw67muG^+|?o?(0P^ndaxy=adZ@YejgB3==j4I-M?J_wo?MWK;(eNB%JV4o zHvV&D-vtg6Wny*gj^X;8{8?IV<@MXP&ysz$Vx!A^sSf_=YuY|91C84fQSaaMK7BVg z_h$G^zSh!Y<)?=pL)&taC+pyk5+kHb~|tJXez~NhmtinpG{+OBfC16eb^Sg z!J}dN`U3Lzf>nF;+jF0FU3W3RP*Yb^jB#uvS7T{>W~ajHL<41F+X4~igS)a+vjhA~ zVOzRWI-H#flwB;B%AAxIuXcN}q%ow{C!RPz(^=$;zdjzVl^@O6TlM8rk$u#y+U*}> zA0@mgMsMCfZXfd4(c)$A0}u5BZq^3)D~G#2c$7ZLISw{1s@{tBtmk^{cj>O1O@&^| z5z>O)V+VLYv8t$Ak_;6&xRXEVL`5dbkDRv2S2Q)u@)f07t>`Y_UIuAX-%Sc!v8Sig zjhNn(PJVr83=X)kIpF-x0J#g?9QJaF@|KLmv3V` z-Rxn^7D=N7#q^os^s>XWFXorUJypBv6_k-+;Zz!bIosF!vb%zK(!aHb3`Ho$K(8OAGz1% zd2gx%9}U=k<4<{xSm_qO7iTjMZgMvxp}0|PmwX)WHT~i^D*W%fgF^{Fy_Fvw;LF4)+%W~i<2_HUw9Ux^J@70(2M#r5ngi|SC2*N z?mzHtvZ3ed3@zq)$k|jI`mp%)%*=4*yj;n&J}iFt<+t@OxKH1d!A_L*-Qid!2MeXE z2`n8MDe_9R>EXfZ3-Xu%Ol51j@VR($VF%D5-9<%2io@7M2U9NWWX#@EB#TMj?C|!e z`Rw36y~DHLmg|q!I|SquH94zX7JpMYdB^OCOV{{(b+B%^7{)EF=8oO2<$XuEdP=s? zI)^3(8(L3Tn@lGl)QD>QlDl|yEEm}FCERvqJ8+i|*j${fp=`OAA0%z(E0Qdr@&KLr zocfA%rgr?u6{moP9$#ne2!Xj+!zrDIr6tFN%A#Bz7%dp6iV~#ZY=#E6 zCz{+eSzq~Vx--IXqSb_gvj`RXqsgh3MX-cA!Bsk!-jmI^_2a{FgHI2i?(R6t;G;TTzd$DRm;>ikzO7YJO!y`I~PXoAG$@#nR zr$PXrbRAdkY)`-9OT5#ioy|CQ+X z?DD7uxl|YXZs6Q;dVO~9N1*(0;H?t=%)WaHc^&H6rKwL3iRzv2D2*=lXZc+ zjDsFp(QTAU(xcg^Rhh#9m;qWHzpZ}x^JB%Z!(rG6v`Wwczra<)rhxI=%33#qv$X&- zY-RknAs+N^=()I+uYZm7n177){)0t7!dlC+1~Oxn1&G{@vFPVdYgvPTvR0;18)MN= zKi0C!8CUIEorP?SML*J5%aQ^zAJ}6^)?$IK{Sy|@B^$%iclfo~1)jCozx(~h zu=EXbEtZS#XY7V^h`v0p#ZK^p=!*64%>1{ix|SC!0HUk00PmOel^IC56%D`#LIN78 J75Enj^grG3kU{_e literal 0 HcmV?d00001 diff --git a/lucene/core/src/java/org/apache/lucene/index/IndexUpgrader.java b/lucene/core/src/java/org/apache/lucene/index/IndexUpgrader.java index 00084c880f23..ce2ebc0f7b98 100644 --- a/lucene/core/src/java/org/apache/lucene/index/IndexUpgrader.java +++ b/lucene/core/src/java/org/apache/lucene/index/IndexUpgrader.java @@ -147,9 +147,13 @@ public IndexUpgrader(Directory dir, IndexWriterConfig iwc, boolean deletePriorCo this.iwc = iwc; this.deletePriorCommits = deletePriorCommits; } + + public void upgrade() throws IOException { + this.upgrade(1); + } /** Perform the upgrade. */ - public void upgrade() throws IOException { + public void upgrade(int maxSegements) throws IOException { if (!DirectoryReader.indexExists(dir)) { throw new IndexNotFoundException(dir.toString()); } @@ -161,7 +165,9 @@ public void upgrade() throws IOException { } } - iwc.setMergePolicy(new UpgradeIndexMergePolicy(iwc.getMergePolicy())); + UpgradeIndexMergePolicy uimp = new UpgradeIndexMergePolicy(iwc.getMergePolicy()); + uimp.setIgnoreNewSegments(true); + iwc.setMergePolicy(uimp); iwc.setIndexDeletionPolicy(new KeepOnlyLastCommitDeletionPolicy()); try (final IndexWriter w = new IndexWriter(dir, iwc)) { @@ -169,7 +175,7 @@ public void upgrade() throws IOException { if (infoStream.isEnabled(LOG_PREFIX)) { infoStream.message(LOG_PREFIX, "Upgrading all pre-" + Version.LATEST + " segments of index directory '" + dir + "' to version " + Version.LATEST + "..."); } - w.forceMerge(1); + w.forceMerge(maxSegements); if (infoStream.isEnabled(LOG_PREFIX)) { infoStream.message(LOG_PREFIX, "All segments upgraded to version " + Version.LATEST); infoStream.message(LOG_PREFIX, "Enforcing commit to rewrite all index metadata..."); diff --git a/lucene/core/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java b/lucene/core/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java index 74cbc905695a..840a572f4c61 100644 --- a/lucene/core/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java +++ b/lucene/core/src/java/org/apache/lucene/index/UpgradeIndexMergePolicy.java @@ -20,8 +20,7 @@ import org.apache.lucene.util.Version; import java.io.IOException; -import java.util.ArrayList; -import java.util.List; +import java.util.Collections; import java.util.Map; import java.util.HashMap; @@ -29,8 +28,9 @@ * an index when calling {@link IndexWriter#forceMerge(int)}. * All other methods delegate to the base {@code MergePolicy} given to the constructor. * This allows for an as-cheap-as possible upgrade of an older index by only upgrading segments that - * are created by previous Lucene versions. forceMerge does no longer really merge; - * it is just used to "forceMerge" older segment versions away. + * are created by previous Lucene versions. forceMerge in part still delegates to the wrapped {@code MergePolicy}; + * It will ask the wrapped policy for segments to merge, and the left over segments will be rewritten with the latest version + * (i.e. merged with themselves). *

In general one would use {@link IndexUpgrader}, but for a fully customizeable upgrade, * you can use this like any other {@code MergePolicy} and call {@link IndexWriter#forceMerge(int)}: *

@@ -40,6 +40,10 @@
   *  w.forceMerge(1);
   *  w.close();
   * 
+ *

The above example would result in a single segment in the latest version. However take this scenario: + * If there were 10 segments in the index and they all need upgrade, calling w.forceMerge(10) would leave 10 segments + * in the index written with the latest lucene version. Calling w.forceMerge(5) would delegate wrapped merge policy + * to determine which segments should be merged together, the remaining segments will be upgraded (rewritten) if need be. *

Warning: This merge policy may reorder documents if the index was partially * upgraded before calling forceMerge (e.g., documents were added). If your application relies * on "monotonicity" of doc IDs (which means that the order in which the documents @@ -49,6 +53,11 @@ * @see IndexUpgrader */ public class UpgradeIndexMergePolicy extends MergePolicyWrapper { + + private int maxUpgradesAtATime = 5; + private volatile boolean upgradeInProgress = false; + private volatile boolean requireExplicitUpgrades = false; + private boolean ignoreNewSegments = false; /** Wrap the given {@link MergePolicy} and intercept forceMerge requests to * only upgrade segments written with previous Lucene versions. */ @@ -56,6 +65,51 @@ public UpgradeIndexMergePolicy(MergePolicy in) { super(in); } + /** + * Sets whether an explicit call to {@link #setUpgradeInProgress(boolean)} must + * be called before {@link #findForcedMerges(SegmentInfos, int, Map, IndexWriter)} in order for + * an upgrade to initiate. Otherwise every request for a force merge will trigger and upgrade investigation + * + * This option is recommended if using UpgradeIndexMergePolicy as the default merge policy and fine grained control + * over when an upgrade is initiated is required + * + * @param requireExplicitUpgrades whether or not setting upgrades in progress is required: Default false + */ + public void setRequireExplicitUpgrades(boolean requireExplicitUpgrades) { + this.requireExplicitUpgrades = requireExplicitUpgrades; + } + + /** + * Set whether or not it is ok for this merge policy to do an upgrade. This + * option needs to enabled before doing a force merge for an upgrade to initiate. + * + * This option has no effect when {@code requireExplicitUpgrades} is disabled + * + * @param upgradeInProgress allow this policy to upgrade segments: Default false + */ + public void setUpgradeInProgress(boolean upgradeInProgress) { + this.upgradeInProgress = upgradeInProgress; + } + + /** + * How many segment upgrades should be commited for scheduling at a time. If more segments + * than maxUpgradeSegments need to be upgraded this merge policy relies on IndexWriters cascaded + * requests to find segments to merge. Submitting a few segments at a time allows segments in need + * of an upgrade to remain candidates for a natrually triggered merge. + * + * @param maxUpgradesAtATime how many segment upgrades should be commited for scheduling at a time: Default 5 + */ + public void setMaxUpgradesAtATime(int maxUpgradesAtATime) { + this.maxUpgradesAtATime = maxUpgradesAtATime; + } + + /** + * @param ignoreNewSegments Whether or not this merge policy should ignore already upgraded segments when force merging: Default false + */ + public void setIgnoreNewSegments(boolean ignoreNewSegments) { + this.ignoreNewSegments = ignoreNewSegments; + } + /** Returns if the given segment should be upgraded. The default implementation * will return {@code !Version.LATEST.equals(si.getVersion())}, * so all segments created with a different version number than this Lucene version will @@ -64,62 +118,115 @@ public UpgradeIndexMergePolicy(MergePolicy in) { protected boolean shouldUpgradeSegment(SegmentCommitInfo si) { return !Version.LATEST.equals(si.info.getVersion()); } - - @Override - public MergeSpecification findMerges(MergeTrigger mergeTrigger, SegmentInfos segmentInfos, IndexWriter writer) throws IOException { - return in.findMerges(null, segmentInfos, writer); - } @Override public MergeSpecification findForcedMerges(SegmentInfos segmentInfos, int maxSegmentCount, Map segmentsToMerge, IndexWriter writer) throws IOException { - // first find all old segments - final Map oldSegments = new HashMap<>(); - for (final SegmentCommitInfo si : segmentInfos) { - final Boolean v = segmentsToMerge.get(si); - if (v != null && shouldUpgradeSegment(si)) { - oldSegments.put(si, v); - } - } - if (verbose(writer)) { - message("findForcedMerges: segmentsToUpgrade=" + oldSegments, writer); - } - - if (oldSegments.isEmpty()) - return null; - - MergeSpecification spec = in.findForcedMerges(segmentInfos, maxSegmentCount, oldSegments, writer); + // Find segments to merge in directory, unless we are ignoring + // newer segments. If new segments are ignored, first old + // segments need to be discovered. + MergeSpecification spec = ignoreNewSegments ? null : in.findForcedMerges(segmentInfos, maxSegmentCount, segmentsToMerge, writer); - if (spec != null) { - // remove all segments that are in merge specification from oldSegments, - // the resulting set contains all segments that are left over - // and will be merged to one additional segment: - for (final OneMerge om : spec.merges) { - oldSegments.keySet().removeAll(om.segments); + if(upgradeInProgress || !requireExplicitUpgrades) { + + // first find all old segments + final Map oldSegments = findSegmentsNeedingUpgrade(segmentInfos, segmentsToMerge); + + if (verbose(writer)) { + message("findForcedMerges: segmentsToUpgrade=" + oldSegments, writer); + } + + if (oldSegments.isEmpty()) { + upgradeInProgress = false; // Nothing to upgrade + return spec; + } + + if(ignoreNewSegments) { + // Ask the wrapped spec now to do the merge with the old segments + spec = in.findForcedMerges(segmentInfos, maxSegmentCount, oldSegments, writer); + } + + if (spec != null) { + // remove all segments that are in merge specification from oldSegments, + // the resulting set contains all segments that are left over + // and will be rewritten + for (final OneMerge om : spec.merges) { + oldSegments.keySet().removeAll(om.segments); + } } + + // Add other segments missed by the wrapped merge policy to be upgraded + return maybeUpdateSpecAndUpgradeProgress(spec, oldSegments, segmentInfos, writer); + } + return spec; + } + + /** + * Updates the the merge spec with old segments needing upgrade. Also sets whether or not to the upgrade needs to continue (upgradeInProgress=false) + * + * @param spec the MergeSpecification to update + * @param oldSegments the segments needing upgrade + * @param segmentInfos all segment infos + * @param writer the index writer + * @return the possibly updated MergeSpecification + */ + private MergeSpecification maybeUpdateSpecAndUpgradeProgress(MergeSpecification spec, Map oldSegments, SegmentInfos segmentInfos, IndexWriter writer) { if (!oldSegments.isEmpty()) { if (verbose(writer)) { message("findForcedMerges: " + in.getClass().getSimpleName() + - " does not want to merge all old segments, merge remaining ones into new segment: " + oldSegments, writer); - } - final List newInfos = new ArrayList<>(); - for (final SegmentCommitInfo si : segmentInfos) { - if (oldSegments.containsKey(si)) { - newInfos.add(si); - } + " does not want to merge all old segments, rewrite remaining ones into upgraded segments: " + oldSegments, writer); } - // add the final merge + if (spec == null) { spec = new MergeSpecification(); } - spec.add(new OneMerge(newInfos)); + + final int numWrappedSpecMerges = spec.merges.size(); + + for (SegmentCommitInfo si: segmentInfos) { + + if(!oldSegments.containsKey(si)) { + continue; + } + + spec.add(new OneMerge(Collections.singletonList(si))); + + if((spec.merges.size() - numWrappedSpecMerges) == maxUpgradesAtATime) { + return spec; + } + + } + + // We found we have less than the max number but greater than 0 + if(spec.merges.size() > numWrappedSpecMerges) { + return spec; + } + } - + + // Only set this once there are 0 segments needing upgrading, because when we return a + // spec, IndexWriter may (silently!) reject that merge if some of the segments we asked + // to be merged were already being (naturally) merged: + upgradeInProgress = false; + return spec; } + private Map findSegmentsNeedingUpgrade(SegmentInfos segmentInfos, Map segmentsToMerge) { + final Map oldSegments = new HashMap<>(); + + for (final SegmentCommitInfo si : segmentInfos) { + final Boolean v = segmentsToMerge.get(si); + if (v != null && shouldUpgradeSegment(si)) { + oldSegments.put(si, v); + } + } + + return oldSegments; + } + private boolean verbose(IndexWriter writer) { return writer != null && writer.infoStream.isEnabled("UPGMP"); } diff --git a/lucene/core/src/test/org/apache/lucene/index/TestUpgradeIndexMergePolicy.java b/lucene/core/src/test/org/apache/lucene/index/TestUpgradeIndexMergePolicy.java index 0ab13b42defb..16fb577a4d03 100644 --- a/lucene/core/src/test/org/apache/lucene/index/TestUpgradeIndexMergePolicy.java +++ b/lucene/core/src/test/org/apache/lucene/index/TestUpgradeIndexMergePolicy.java @@ -16,7 +16,6 @@ */ package org.apache.lucene.index; - public class TestUpgradeIndexMergePolicy extends BaseMergePolicyTestCase { public MergePolicy mergePolicy() {