Going on with my work on kultvideo.it i found some pages (related to erotic movies, i think) that cannot be accessed without a precise positive answer in a form posted in html page.
I think it's ncessary to use POSTPAGE2 to access but I don't know how to use it (as you know, I'm a genius in copying from other people works but I'm a donkey if I need to resolve programmation's questions).
Thanks in advance.
(look for 'Emmanuelle' and choose the first movie in list to see what I'm saying and find //fulvio in Kultvideo's script below)
Code: Select all
(***************************************************
Ant Movie Catalog importation script
www.antp.be/software/moviecatalog/
[Infos]
Authors=Fulvio53s03 based on original by Penanders (2006)
Title=Kultvideo
Description=Script per Kultvideo.it
Site=http://www.kultvideo.it
Language=IT
Version=2 - 2. 9.2010
Requires=3.5.1
Comments=Kultvideo offre poche informazioni, su film davvero introvabili
License=This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. |
GetInfo=1
[Options]
***************************************************)
program KultVideo;
uses
StringUtils7552;
var
MovieName, Pagestr: string;
//TheMovieAddress: string;
const
SITE = 'http://www.kultvideo.it/';
SITE1 = 'http://www.kultvideo.it/';
// SITE1 = 'http://www.kultvideo.it/scheda.asp';
// http://www.kultvideo.it/articles/ArticleSheet.aspx?__langG=it-IT&aid=7244
// ../articles/ArticleSheet.aspx?__langG=it-IT&aid=7244
// -- Formatta la stringa cercando le prime lettere rendendole maiuscole
function PrimeMaiu(str: string): string;
begin
str := AnsiLowerCase(str);
str := AnsiMixedCase(str, ' -/');
Result := str;
end;
// ---
procedure AnalyzePage(Address: string);
var
Page: TStringList;
LineNr: integer;
BeginPos: integer;
forceHTTP11: boolean;
forceEncodeParams: boolean;
begin
Page := TStringList.Create;
Page.Text := GetPage(Address);
Page.Text := UTF8Decode(Page.Text);
Pagestr := Page.Text;
LineNr := FindLine('Risultati della ricerca per i termini', Page, 0);
if LineNr = -1 then
begin
ShowError('Spiacente, nessun film trovato');
end
else // Trovati film ! Nota: possono esserci + pagine -> non ancora gestito !
begin
// Crea la lista di film
forceHTTP11 := true;
forceEncodeParams := true;
PickTreeClear;
PickTreeAdd('Risultati ricerca per "' + UrlDecode(MovieName) + '":', '');
AddMoviesTitles(Page);
if PickTreeExec(Address) then
begin
// <form name="aspnetForm" method="POST" action="ArticleSheet.aspx?__langG=it-IT&aid=2610" onkeypress="javascript:return WebForm_FireDefaultButton(event, 'ctl00_btnSearch')" id="aspnetForm">
Page.Text := PostPage2(SITE, Address, content, BaseAddress, forceHTTP11, forceEncodeParams);
//fulvio <form name="fmAdultsOK" method="post" action="ArticleSheet.aspx">
<input type="hidden" name="aid" value='2610' />
<input type="hidden" name="adtst" id="adtst" value="1" />
</form>
//fulvio .
// <form action="search.php" method="post" enctype="application/x-www-form-urlencoded" name="search">
// <input name="search" type="text" size="16" maxlength="50">
// <input name="go" type="image" src="image/go.gif" usemap="go">
// </form>
//fulvio
// Page.Text := GetPage(Address); // Richiede la pagina del film
Page.Text := UTF8Decode(Page.Text);
Pagestr := Page.Text;
SetField(FieldUrl, Address);
//Page.savetofile('D:\Prova.txt');
AnalyzeMoviePage(Page); // Analizza la pagina del film
end;
end;
Page.Free;
end;
// ---
// Analisi ed estrazione dati dalla pagina del film
procedure AnalyzeMoviePage(Page: TStringList);
var
//Fine: Integer;
Line, Line2, Line3, Comm: string;
InitChar, EndChar, SaveNationYear: string;
LineNr: Integer;
BeginPos, EndPos: Integer;
Field: integer;
begin
//Debug
//Page.SaveToFile(PATHLOG+MovieName+'.film');
// data di estrazione dati
SetField(fieldDate, DateToStr(Date));
// Immagine
LineNr := FindLine('id="article_sheet_picture"', Page, 0);
//showmessage(intToStr(LineNR));
if LineNr>-1 then
begin
LineNr := LineNr + 1;
Line := Page.GetString(LineNr);
Line := TextBetween(Line, '<img src="../', '" width="');
if length(Line) > 0 then
begin
Line := SITE1 + Line;
setfield(fieldAudioFormat, Line);
GetPicture2(Line, GetField(FieldUrl));
end;
end;
// elimina caratteri x debug
// Cerca il titolo tradotto
InitChar := '<td align="left" valign="top" class="article_sheet_filmtitle">'; //tipo 1 http://www.kultvideo.it/articles/ArticleSheet.aspx?__langG=it-IT&aid=6821
BeginPos := Pos(InitChar, Pagestr);
Delete(Pagestr, 1, BeginPos - 1);
Line := '>' + TextBetween(Pagestr, InitChar, '<') + '<';
Line := TextBetween(Line, '>', '<');
HTMLRemoveTags(Line);
HTMLDecode(Line);
Line := Fulltrim(Line);
SetField(fieldTranslatedTitle, PrimeMaiu(Line) );
// SetField(fieldOriginalTitle, PrimeMaiu(Line) );
// Cerca il titolo originale
//<span class="article_sheet_subtitle">
//<span class="article_sheet_subtitle">
InitChar := '<span class="article_sheet_filmsubtitle">';
BeginPos := Pos(InitChar, Pagestr);
if BeginPos = 0 then
begin
InitChar := '<span class="article_sheet_subtitle">';
BeginPos := Pos(InitChar, Pagestr);
end;
Delete(Pagestr, 1, BeginPos - 1);
EndChar := '<';
LineNr := FindLine(InitChar, Page, 0);
Line := Page.GetString(LineNr);
InitChar := '>';
Line := InitChar + TextBetween(Line, '<span class="article_sheet_filmsubtitle">', EndChar) + EndChar;
Line := TextBetween(Line, '>', EndChar);
BeginPos := Pos(InitChar, Pagestr);
HTMLRemoveTags(Line);
HTMLDecode(Line);
Line := Fulltrim(Line);
Line := UTF8Decode(Line);
SetField(fieldOriginalTitle, PrimeMaiu(Line) );
Delete(Pagestr, 1, BeginPos - 1);
// Cerca nazionalità e anno
InitChar := '<td';
EndChar := '</td>';
Line := InitChar + Textbetween(Pagestr, InitChar, EndChar) + EndChar;
HTMLRemoveTags(Line);
HTMLDecode(Line);
Line := Fulltrim(Line); //'Francia (1987) - Colore'
SaveNationYear := Line;
Line := TextBefore(SaveNationYear, '(', '');
SetField(fieldCountry, PrimeMaiu(Trim(Line)));
// esempio: MovieName := TextBefore(MovieName, '[', '') + TextAfter(MovieName, ']');
Line := TextBetween(SaveNationYear, '(', ')');
SetField(fieldYear, Line);
// Cerca genere
InitChar := '<span class="article_sheet_datalabel">';
Line := Textbetween(Pagestr, InitChar, '</td>');
BeginPos := Pos(InitChar, Pagestr);
Delete(Pagestr, 1, BeginPos);
Line := stringreplace(Line, 'Genere:', '');
HTMLRemoveTags(Line);
HTMLDecode(Line);
Line := Fulltrim(Line);
SetField(fieldCategory, PrimeMaiu(Line));
// Cerca regia
InitChar := '<span class="article_sheet_datalabel">';
Line := Textbetween(Pagestr, InitChar, '</td>');
BeginPos := Pos(InitChar, Pagestr);
Delete(Pagestr, 1, BeginPos);
Line := stringreplace(Line, 'Regia:', '');
HTMLRemoveTags(Line);
HTMLDecode(Line);
Line := Fulltrim(Line);
SetField(fieldDirector, PrimeMaiu(Line));
// Cerca cast
InitChar := '<span class="article_sheet_datalabel">';
Line := Textbetween(Pagestr, InitChar, '</td>');
BeginPos := Pos(InitChar, Pagestr);
Delete(Pagestr, 1, BeginPos);
Line := stringreplace(Line, 'Cast:', '');
HTMLRemoveTags(Line);
HTMLDecode(Line);
Line := Fulltrim(Line);
SetField(fieldActors, PrimeMaiu(Line));
// Cerca distributore (al posto del produttore)
InitChar := '<span class="article_sheet_datalabel">';
Line := Textbetween(Pagestr, InitChar, '</td>');
BeginPos := Pos(InitChar, Pagestr);
Delete(Pagestr, 1, BeginPos);
// Line := stringreplace(Line, 'Cast:', '');
HTMLRemoveTags(Line);
HTMLDecode(Line);
Line := stringreplace(Line, crlf, '');
Line := stringreplace(Line, #09, '');
Line := Fulltrim(Line);
SetField(fieldProducer, Line);
// Cerca la durata
InitChar := '<span class="article_sheet_datalabel">Durata:'; //tipo 1 http://www.kultvideo.it/articles/ArticleSheet.aspx?__langG=it-IT&aid=6821
// InitChar := '<span class="article_sheet_datalabel">'; //tipo 2
Line := Textbetween(Pagestr, InitChar, '</td>');
BeginPos := Pos(InitChar, Pagestr);
Delete(Pagestr, 1, BeginPos);
Line := stringreplace(Line, 'Durata:', '');
HTMLRemoveTags(Line);
HTMLDecode(Line);
Line := Fulltrim(Line);
SetField(fieldLength, Line);
comm := '';
InitChar := 'Trama:</span>';
BeginPos := pos(Pagestr, InitChar);
Delete(Pagestr, 1, BeginPos);
Pagestr := Pagestr + InitChar;
Line := Textbetween(Pagestr, InitChar, '</td>');
HTMLRemoveTags(Line);
HTMLDecode(Line);
Line := stringreplace(Line, crlf, '');
Line := stringreplace(Line, #09, '');
Line := Fulltrim(Line);
SetField(fieldDescription, Line);
InitChar := '<span class="article_sheet_datalabel">';
Line := Textbetween(Pagestr, InitChar, '</td>');
BeginPos := Pos(InitChar, Pagestr);
Delete(Pagestr, 1, BeginPos);
HTMLRemoveTags(Line);
HTMLDecode(Line);
Line := stringreplace(Line, crlf, '');
Line := stringreplace(Line, #09, '');
Line := Fulltrim(Line);
// Comm := Comm + Line + crlf;
SetField(fieldComments, Comm);
end;
// ---
// Riempie la lista con i film trovati
procedure AddMoviesTitles(Page: TStringList);
var
LineNr, Linesup: Integer;
Line, Supporto: string;
MovieTitle, MovieAddress: string;
BeginPos, EndPos: Integer;
Pagina: TStringList;
begin
//TheMovieAddress := '*';
LineNr := 0;
LineNr := FindLine('<div class="artlist_artpicture">', Page, LineNr);
// LineSup := LineNr;
While LineNR <> -1 Do
Begin
// LineNr := 0;
If LineNR = -1 Then Break;
//Showmessage(intToStr(LineNr));
Line := Page.GetString(LineNr);
// Crea l'url per la pagina completa del film
LineNr := LineNr + 1;
Line := Page.GetString(LineNr);
//Showmessage(line);
MovieAddress := TextBetween(Line, '<a href="../', '"><img src="');
MovieAddress := SITE1 + MovieAddress;
//ShowMessage(MovieAddress);
Page.SetString(lineNR, ' ');
//Estrazione Tipo supporto
Linesup := FindLine('src="../img/entities/articletype/', Page, Linesup);
Supporto := Page.GetString(Linesup);
Linesup := LineSup + 1;
//Decodifica il supporto
Supporto := TextBetween(Supporto, 'articletype/', '"');
if supporto = '1_1a.jpg' then supporto := ' (dvd)';
if supporto = '2_1a.jpg' then supporto := ' (BluRay)';
if supporto = '4_1a.gif' then supporto := ' (VHS)';
if (supporto = '10_1a.jpg') or (supporto = '15_1a.jpg')or (supporto = '18_1a.jpg') then
supporto := ' (poster)';
//Estrazione Titolo film
LineNr := FindLine('<td class="artlist_arttitle">', Page, LineNr);
LineNr := LineNr + 1;
MovieTitle := Page.GetString(LineNr);
//ShowMessage(MovieTitle);
//Ripulisce il titolo
HTMLRemoveTags(MovieTitle);
HTMLDecode(MovieTitle);
MovieTitle := Fulltrim(MovieTitle);
HTMLRemoveTags(MovieTitle);
HTMLDecode(MovieTitle);
MovieTitle := Fulltrim(MovieTitle) + Supporto;
// Controlla se ci sono altre pagine di risultati
//LineNr := FindLine('Pagina Successiva',Page,0);
// if LineNr<>-1 then
// begin
// idx := idx+1;
// end;
//Line := Page.GetString(LineNr);
// Estrae l'URL della pagina successiva
//BeginPos := pos('href="', Line)+5;
//Delete( Line, 1, BeginPos);
//BeginPos := 1;
//EndPos := pos('"', Line);
//Line := SITE1 + copy(Line, BeginPos, endPos-BeginPos);
//ShowMessage(Line);
// Richiama la pagina successiva e la analizza
//Page.text := GetPage(Line);
//Page.Text := UTF8Decode(Page.Text);
//Pagestr := Page.Text;
// Debug
//Page.SaveToFile(PATHLOG+MovieName+'.res'+IntToStr(idx));
//Page.LoadFromFile(PATHLOG+MOVIE+'.res');
PickTreeAdd(MovieTitle, MovieAddress);
LineNr := FindLine('<div class="artlist_artpicture">', Page, LineNr);
end;
end;
// ----- main()
Var
TempVar: String;
begin
if CheckVersion(3,5,1) then
begin
MovieName := GetField(fieldtranslatedTitle);
if MovieName = '' then
MovieName := GetField(fieldTranslatedTitle);
if Input('KultVideo.it', 'Inserire il nome del film:', MovieName) then
begin
MovieName := UrlEncode(MovieName);
// TempVar:='http://www.kultvideo.it/cerca.asp?page=1&Genre=&key=&cerca='+UrlEncode(MovieName)+'&tipocerca=titolo&radiobutton=ALL';
TempVar:='http://www.kultvideo.it/search/Search.aspx?st=' + MovieName;
SetField(fieldURL, Tempvar); // Memorizza il campo URL
analyzepage(TempVar);
end;
end else
ShowMessage('This script requires a newer version of Ant Movie Catalog (at least the version 3.5.1)');
end.
I post my question here as I think someone else could be interested in learning how to use POSTPAGE2.
bye.