UPD [find duplicates]
Posted: 2019-12-06 17:52:22
				
				velocità di esecuzione migliorata.
I film devono essere in ordine di titolo formattato. 10.000 film con 1.300 duplicati verificati in 30 secondi.
movies must be ordered by formatted title.
improved execution speed.
10.000 movies with 1.300 duplicates tested in 30 seconds.
provate!
test it!
 
			I film devono essere in ordine di titolo formattato. 10.000 film con 1.300 duplicati verificati in 30 secondi.
movies must be ordered by formatted title.
improved execution speed.
10.000 movies with 1.300 duplicates tested in 30 seconds.
Code: Select all
(***************************************************
Ant Movie Catalog importation script
www.antp.be/software/moviecatalog/
[Infos]
Authors=Antoine Potten + fulvio53s03
Title=Find Duplicates
Description=Finds duplicate titles in the movie list
Site=
Language=ENG
Version=1.1
Requires=3.5
Comments=2019-12-07 from original ant's script, speed execution implemented by fulvio53s03 | catalog must be ordered by translated + original title.
License=GPL
GetInfo=0
RequiresMovies=1
[Options]
TitleToUse=3|1|1=Use original title|2=Use translated title|3=Check both titles
IgnoreCase=1|1|0=Case sensitive: Keep the uppercase/lowercase characters as different|1=Case insensitive: Consider "TITLE" as identical to "Title" or "title"
[Parameters]
***************************************************)
program FindDup;
uses
  StringUtils1;
var
  DupList: TStringList;
  quale_campo: integer;
  CurTitle, CurLabel, Curmedia, Cursize: string;
  tipo_confronto, ctr_giri: integer;
  OldLabel, OldMedia, OldSize, OldTitle, NewTitle, ResultPath: string;
  Old_Riga, New_Riga: string;
procedure estrai_dati;
begin
  if quale_campo = 1       then  CurTitle := GetField(fieldtranslatedTitle);
  if quale_campo = 2       then  CurTitle := GetField(fieldOriginalTitle) + ' | ' + GetField(fieldTranslatedTitle);
  if quale_campo = 3       then  CurTitle := GetField(fieldOriginalTitle) + ' | ' + GetField(fieldTranslatedTitle);
 
  if  tipo_confronto = 1
      then  CurTitle := AnsiLowerCase(CurTitle);      //altrimenti lascio com'era, senza cambiare lettere iniziali
  CurLabel := getField(fieldMedia);
  CurSize  := getField(fieldSize);
  CurMedia := getField(fieldMediaType);
end;
begin
  if ctr_giri = 0 then
     begin
         if StringUtils1_Version < 3 then
            begin
            ShowMessage('File "stringutils1.pas" is too old, please download a new version of it');
            Error;
            end;
            if ResultPath = '' then
               begin
               ResultPath := 'f:\duplicates.csv';
               Input('Find Duplicates', 'Store results to:', ResultPath);
               end;
     end;	 
//************** primo giro elaborazione ************
  if ctr_giri = 0 then
     begin
     DupList := TStringList.Create;
     tipo_confronto := GetOption('IgnoreCase');
     quale_campo := GetOption('TitleToUse');
     oldTitle := 'mxptlk';          //solo per confrontare primo record
     estrai_dati;
  end
//********* fine primo giro elaborazione ************    
    ctr_giri := ctr_giri + 1;
    estrai_dati;
    Old_riga := 'old;' + Oldlabel + ';' + Oldmedia + ';' + OldSize + ';' + Curlabel + ';' + Curmedia + ';' + CurSize + ';' + CurTitle;
    New_riga := 'new;' + Curlabel + ';' + Curmedia + ';' + CurSize + ';' + Oldlabel + ';' + Oldmedia + ';' + OldSize + ';' + oldTitle;
  if CurTitle = oldTitle then
     begin
      DupList.Add(Old_Riga);
      DupList.Add(New_Riga);
     end
//  else
//    MovList.Add(CurTitle);
  oldTitle := CurTitle;
  oldLabel := CurLabel;
  oldMedia := CurMedia;
  oldSize  := CurSize;
  DupList.SaveToFile(ResultPath);
end.test it!
